gcc/tree-vect-analyze.c

   1 /* Analysis Utilities for Loop Vectorization.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "ggc.h"
  26 #include "tree.h"
  27 #include "target.h"
  28 #include "basic-block.h"
  29 #include "diagnostic.h"
  30 #include "tree-flow.h"
  31 #include "tree-dump.h"
  32 #include "timevar.h"
  33 #include "cfgloop.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "params.h"
  37 #include "tree-chrec.h"
  38 #include "tree-data-ref.h"
  39 #include "tree-scalar-evolution.h"
  40 #include "tree-vectorizer.h"
  41 #include "toplev.h"
  42 #include "recog.h"
  43
  44 /* Main analysis functions.  */
  45 static loop_vec_info vect_analyze_loop_form (struct loop *);
  46 static bool vect_analyze_data_refs (loop_vec_info);
  47 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
  48 static void vect_analyze_scalar_cycles (loop_vec_info);
  49 static bool vect_analyze_data_ref_accesses (loop_vec_info);
  50 static bool vect_analyze_data_ref_dependences (loop_vec_info);
  51 static bool vect_analyze_data_refs_alignment (loop_vec_info);
  52 static bool vect_compute_data_refs_alignment (loop_vec_info);
  53 static bool vect_enhance_data_refs_alignment (loop_vec_info);
  54 static bool vect_analyze_operations (loop_vec_info);
  55 static bool vect_determine_vectorization_factor (loop_vec_info);
  56
  57 /* Utility functions for the analyses.  */
  58 static bool exist_non_indexing_operands_for_use_p (tree, tree);
  59 static tree vect_get_loop_niters (struct loop *, tree *);
  60 static bool vect_analyze_data_ref_dependence
  61   (struct data_dependence_relation *, loop_vec_info);
  62 static bool vect_compute_data_ref_alignment (struct data_reference *);
  63 static bool vect_analyze_data_ref_access (struct data_reference *);
  64 static bool vect_can_advance_ivs_p (loop_vec_info);
  65 static void vect_update_misalignment_for_peel
  66   (struct data_reference *, struct data_reference *, int npeel);
  67
  68 /* Function vect_determine_vectorization_factor
  69
  70    Determine the vectorization factor (VF). VF is the number of data elements
  71    that are operated upon in parallel in a single iteration of the vectorized
  72    loop. For example, when vectorizing a loop that operates on 4byte elements,
  73    on a target with vector size (VS) 16byte, the VF is set to 4, since 4
  74    elements can fit in a single vector register.
  75
  76    We currently support vectorization of loops in which all types operated upon
  77    are of the same size. Therefore this function currently sets VF according to
  78    the size of the types operated upon, and fails if there are multiple sizes
  79    in the loop.
  80
  81    VF is also the factor by which the loop iterations are strip-mined, e.g.:
  82    original loop:
  83         for (i=0; i<N; i++){
  84           a[i] = b[i] + c[i];
  85         }
  86
  87    vectorized loop:
  88         for (i=0; i<N; i+=VF){
  89           a[i:VF] = b[i:VF] + c[i:VF];
  90         }
  91 */
  92
  93 static bool
  94 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
  95 {
  96   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
  97   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
  98   int nbbs = loop->num_nodes;
  99   block_stmt_iterator si;
 100   unsigned int vectorization_factor = 0;
 101   tree scalar_type;
 102   tree phi;
 103   tree vectype;
 104   unsigned int nunits;
 105   stmt_vec_info stmt_info;
 106   int i;
 107
 108   if (vect_print_dump_info (REPORT_DETAILS))
 109     fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
 110
 111   for (i = 0; i < nbbs; i++)
 112     {
 113       basic_block bb = bbs[i];
 114
 115       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
 116         {
 117           stmt_info = vinfo_for_stmt (phi);
 118           if (vect_print_dump_info (REPORT_DETAILS))
 119             {
 120               fprintf (vect_dump, "==> examining phi: ");
 121               print_generic_expr (vect_dump, phi, TDF_SLIM);
 122             }
 123
 124           gcc_assert (stmt_info);
 125
 126           if (STMT_VINFO_RELEVANT_P (stmt_info))
 127             {
 128               gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
 129               scalar_type = TREE_TYPE (PHI_RESULT (phi));
 130
 131               if (vect_print_dump_info (REPORT_DETAILS))
 132                 {
 133                   fprintf (vect_dump, "get vectype for scalar type:  ");
 134                   print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
 135                 }
 136
 137               vectype = get_vectype_for_scalar_type (scalar_type);
 138               if (!vectype)
 139                 {
 140                   if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
 141                     {
 142                       fprintf (vect_dump,
 143                                "not vectorized: unsupported data-type ");
 144                       print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
 145                     }
 146                   return false;
 147                 }
 148               STMT_VINFO_VECTYPE (stmt_info) = vectype;
 149
 150               if (vect_print_dump_info (REPORT_DETAILS))
 151                 {
 152                   fprintf (vect_dump, "vectype: ");
 153                   print_generic_expr (vect_dump, vectype, TDF_SLIM);
 154                 }
 155
 156               nunits = TYPE_VECTOR_SUBPARTS (vectype);
 157               if (vect_print_dump_info (REPORT_DETAILS))
 158                 fprintf (vect_dump, "nunits = %d", nunits);
 159
 160               if (!vectorization_factor
 161                   || (nunits > vectorization_factor))
 162                 vectorization_factor = nunits;
 163             }
 164         }
 165
 166       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
 167         {
 168           tree stmt = bsi_stmt (si);
 169           stmt_info = vinfo_for_stmt (stmt);
 170
 171           if (vect_print_dump_info (REPORT_DETAILS))
 172             {
 173               fprintf (vect_dump, "==> examining statement: ");
 174               print_generic_expr (vect_dump, stmt, TDF_SLIM);
 175             }
 176
 177           gcc_assert (stmt_info);
 178
 179           /* skip stmts which do not need to be vectorized.  */
 180           if (!STMT_VINFO_RELEVANT_P (stmt_info)
 181               && !STMT_VINFO_LIVE_P (stmt_info))
 182             {
 183               if (vect_print_dump_info (REPORT_DETAILS))
 184                 fprintf (vect_dump, "skip.");
 185               continue;
 186             }
 187
 188           if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
 189             {
 190               if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
 191                 {
 192                   fprintf (vect_dump, "not vectorized: irregular stmt.");
 193                   print_generic_expr (vect_dump, stmt, TDF_SLIM);
 194                 }
 195               return false;
 196             }
 197
 198           if (!GIMPLE_STMT_P (stmt)
 199               && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
 200             {
 201               if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
 202                 {
 203                   fprintf (vect_dump, "not vectorized: vector stmt in loop:");
 204                   print_generic_expr (vect_dump, stmt, TDF_SLIM);
 205                 }
 206               return false;
 207             }
 208
 209           if (STMT_VINFO_VECTYPE (stmt_info))
 210             {
 211               /* The only case when a vectype had been already set is for stmts
 212                  that contain a dataref, or for "pattern-stmts" (stmts generated
 213                  by the vectorizer to represent/replace a certain idiom).  */
 214               gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
 215                           || is_pattern_stmt_p (stmt_info));
 216               vectype = STMT_VINFO_VECTYPE (stmt_info);
 217             }
 218           else
 219             {
 220               tree operation;
 221
 222               gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
 223                           && !is_pattern_stmt_p (stmt_info));
 224
 225               /* We generally set the vectype according to the type of the
 226                  result (lhs).
 227                  For stmts whose result-type is different than the type of the
 228                  arguments (e.g. demotion, promotion), vectype will be reset
 229                  appropriately (later).  Note that we have to visit the smallest
 230                  datatype in this function, because that determines the VF.
 231                  If the smallest datatype in the loop is present only as the
 232                  rhs of a promotion operation - we'd miss it here.
 233                  Such a case, where a variable of this datatype does not appear
 234                  in the lhs anywhere in the loop, can only occur if it's an
 235                  invariant: e.g.: 'int_x = (int) short_inv', which we'd expect
 236                  to have been optimized away by invariant motion. However, we
 237                  cannot rely on invariant motion to always take invariants out
 238                  of the loop, and so in the case of promotion we also have to
 239                  check the rhs.  */
 240               scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
 241
 242               operation = GIMPLE_STMT_OPERAND (stmt, 1);
 243               if (TREE_CODE (operation) == NOP_EXPR
 244                   || TREE_CODE (operation) == CONVERT_EXPR
 245                   || TREE_CODE (operation) ==  WIDEN_MULT_EXPR)
 246                 {
 247                   tree rhs_type = TREE_TYPE (TREE_OPERAND (operation, 0));
 248                   if (TYPE_SIZE_UNIT (rhs_type) < TYPE_SIZE_UNIT (scalar_type))
 249                     scalar_type = TREE_TYPE (TREE_OPERAND (operation, 0));
 250                 }
 251
 252               if (vect_print_dump_info (REPORT_DETAILS))
 253                 {
 254                   fprintf (vect_dump, "get vectype for scalar type:  ");
 255                   print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
 256                 }
 257
 258               vectype = get_vectype_for_scalar_type (scalar_type);
 259               if (!vectype)
 260                 {
 261                   if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
 262                     {
 263                       fprintf (vect_dump,
 264                                "not vectorized: unsupported data-type ");
 265                       print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
 266                     }
 267                   return false;
 268                 }
 269               STMT_VINFO_VECTYPE (stmt_info) = vectype;
 270             }
 271
 272           if (vect_print_dump_info (REPORT_DETAILS))
 273             {
 274               fprintf (vect_dump, "vectype: ");
 275               print_generic_expr (vect_dump, vectype, TDF_SLIM);
 276             }
 277
 278           nunits = TYPE_VECTOR_SUBPARTS (vectype);
 279           if (vect_print_dump_info (REPORT_DETAILS))
 280             fprintf (vect_dump, "nunits = %d", nunits);
 281
 282           if (!vectorization_factor
 283               || (nunits > vectorization_factor))
 284             vectorization_factor = nunits;
 285
 286         }
 287     }
 288
 289   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
 290   if (vect_print_dump_info (REPORT_DETAILS))
 291     fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
 292   if (vectorization_factor <= 1)
 293     {
 294       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
 295         fprintf (vect_dump, "not vectorized: unsupported data-type");
 296       return false;
 297     }
 298   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
 299
 300   return true;
 301 }
 302
 303
 304 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
 305    the number of created vector stmts depends on the unrolling factor). However,
 306    the actual number of vector stmts for every SLP node depends on VF which is
 307    set later in vect_analyze_operations(). Hence, SLP costs should be updated.
 308    In this function we assume that the inside costs calculated in
 309    vect_model_xxx_cost are linear in ncopies.  */
 310
 311 static void
 312 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
 313 {
 314   unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
 315   VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
 316   slp_instance instance;
 317
 318   if (vect_print_dump_info (REPORT_SLP))
 319     fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
 320
 321   for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
 322     /* We assume that costs are linear in ncopies.  */
 323     SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
 324       / SLP_INSTANCE_UNROLLING_FACTOR (instance);
 325 }
 326
 327
 328 /* Function vect_analyze_operations.
 329
 330    Scan the loop stmts and make sure they are all vectorizable.  */
 331
 332 static bool
 333 vect_analyze_operations (loop_vec_info loop_vinfo)
 334 {
 335   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 336   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
 337   int nbbs = loop->num_nodes;
 338   block_stmt_iterator si;
 339   unsigned int vectorization_factor = 0;
 340   int i;
 341   bool ok;
 342   tree phi;
 343   stmt_vec_info stmt_info;
 344   bool need_to_vectorize = false;
 345   int min_profitable_iters;
 346   int min_scalar_loop_bound;
 347   unsigned int th;
 348   bool only_slp_in_loop = true;
 349
 350   if (vect_print_dump_info (REPORT_DETAILS))
 351     fprintf (vect_dump, "=== vect_analyze_operations ===");
 352
 353   gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
 354   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
 355
 356   for (i = 0; i < nbbs; i++)
 357     {
 358       basic_block bb = bbs[i];
 359
 360       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
 361         {
 362           ok = true;
 363
 364           stmt_info = vinfo_for_stmt (phi);
 365           if (vect_print_dump_info (REPORT_DETAILS))
 366             {
 367               fprintf (vect_dump, "examining phi: ");
 368               print_generic_expr (vect_dump, phi, TDF_SLIM);
 369             }
 370
 371           if (! is_loop_header_bb_p (bb))
 372             {
 373               /* inner-loop loop-closed exit phi in outer-loop vectorization
 374                  (i.e. a phi in the tail of the outer-loop).
 375                  FORNOW: we currently don't support the case that these phis
 376                  are not used in the outerloop, cause this case requires
 377                  to actually do something here.  */
 378               if (!STMT_VINFO_RELEVANT_P (stmt_info)
 379                   || STMT_VINFO_LIVE_P (stmt_info))
 380                 {
 381                   if (vect_print_dump_info (REPORT_DETAILS))
 382                     fprintf (vect_dump,
 383                              "Unsupported loop-closed phi in outer-loop.");
 384                   return false;
 385                 }
 386               continue;
 387             }
 388
 389           gcc_assert (stmt_info);
 390
 391           if (STMT_VINFO_LIVE_P (stmt_info))
 392             {
 393               /* FORNOW: not yet supported.  */
 394               if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
 395                 fprintf (vect_dump, "not vectorized: value used after loop.");
 396               return false;
 397             }
 398
 399           if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
 400               && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
 401             {
 402               /* A scalar-dependence cycle that we don't support.  */
 403               if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
 404                 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
 405               return false;
 406             }
 407
 408           if (STMT_VINFO_RELEVANT_P (stmt_info))
 409             {
 410               need_to_vectorize = true;
 411               if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
 412                 ok = vectorizable_induction (phi, NULL, NULL);
 413             }
 414
 415           if (!ok)
 416             {
 417               if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
 418                 {
 419                   fprintf (vect_dump,
 420                            "not vectorized: relevant phi not supported: ");
 421                   print_generic_expr (vect_dump, phi, TDF_SLIM);
 422                 }
 423               return false;
 424             }
 425         }
 426
 427       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
 428         {
 429           tree stmt = bsi_stmt (si);
 430           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 431           enum vect_def_type relevance = STMT_VINFO_RELEVANT (stmt_info);
 432
 433           if (vect_print_dump_info (REPORT_DETAILS))
 434             {
 435               fprintf (vect_dump, "==> examining statement: ");
 436               print_generic_expr (vect_dump, stmt, TDF_SLIM);
 437             }
 438
 439           gcc_assert (stmt_info);
 440
 441           /* skip stmts which do not need to be vectorized.
 442              this is expected to include:
 443              - the COND_EXPR which is the loop exit condition
 444              - any LABEL_EXPRs in the loop
 445              - computations that are used only for array indexing or loop
 446              control  */
 447
 448           if (!STMT_VINFO_RELEVANT_P (stmt_info)
 449               && !STMT_VINFO_LIVE_P (stmt_info))
 450             {
 451               if (vect_print_dump_info (REPORT_DETAILS))
 452                 fprintf (vect_dump, "irrelevant.");
 453               continue;
 454             }
 455
 456           switch (STMT_VINFO_DEF_TYPE (stmt_info))
 457             {
 458             case vect_loop_def:
 459               break;
 460
 461             case vect_reduction_def:
 462               gcc_assert (relevance == vect_used_in_outer
 463                           || relevance == vect_used_in_outer_by_reduction
 464                           || relevance == vect_unused_in_loop);
 465               break;
 466
 467             case vect_induction_def:
 468             case vect_constant_def:
 469             case vect_invariant_def:
 470             case vect_unknown_def_type:
 471             default:
 472               gcc_unreachable ();
 473             }
 474
 475           if (STMT_VINFO_RELEVANT_P (stmt_info))
 476             {
 477               gcc_assert (GIMPLE_STMT_P (stmt)
 478                           || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
 479               gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
 480               need_to_vectorize = true;
 481             }
 482
 483           ok = (vectorizable_type_promotion (stmt, NULL, NULL)
 484                 || vectorizable_type_demotion (stmt, NULL, NULL)
 485                 || vectorizable_conversion (stmt, NULL, NULL, NULL)
 486                 || vectorizable_operation (stmt, NULL, NULL, NULL)
 487                 || vectorizable_assignment (stmt, NULL, NULL, NULL)
 488                 || vectorizable_load (stmt, NULL, NULL, NULL)
 489                 || vectorizable_call (stmt, NULL, NULL)
 490                 || vectorizable_store (stmt, NULL, NULL, NULL)
 491                 || vectorizable_condition (stmt, NULL, NULL)
 492                 || vectorizable_reduction (stmt, NULL, NULL));
 493
 494           /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
 495              need extra handling, except for vectorizable reductions.  */
 496           if (STMT_VINFO_LIVE_P (stmt_info)
 497               && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
 498             ok |= vectorizable_live_operation (stmt, NULL, NULL);
 499
 500           if (!ok)
 501             {
 502               if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
 503                 {
 504                   fprintf (vect_dump, "not vectorized: stmt not supported: ");
 505                   print_generic_expr (vect_dump, stmt, TDF_SLIM);
 506                 }
 507               return false;
 508             }
 509
 510           if (!PURE_SLP_STMT (stmt_info))
 511             {
 512               /* STMT needs loop-based vectorization.  */
 513               only_slp_in_loop = false;
 514
 515               /* Groups of strided accesses whose size is not a power of 2 are
 516                  not vectorizable yet using loop-vectorization. Therefore, if
 517                  this stmt feeds non-SLP-able stmts (i.e., this stmt has to be
 518                  both SLPed and loop-based vectorzed), the loop cannot be
 519                  vectorized.  */
 520               if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
 521                   && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
 522                                   DR_GROUP_FIRST_DR (stmt_info)))) == -1)
 523                 {
 524                   if (vect_print_dump_info (REPORT_DETAILS))
 525                     {
 526                       fprintf (vect_dump, "not vectorized: the size of group "
 527                                "of strided accesses is not a power of 2");
 528                       print_generic_expr (vect_dump, stmt, TDF_SLIM);
 529                     }
 530                   return false;
 531                 }
 532             }
 533         } /* stmts in bb */
 534     } /* bbs */
 535
 536   /* All operations in the loop are either irrelevant (deal with loop
 537      control, or dead), or only used outside the loop and can be moved
 538      out of the loop (e.g. invariants, inductions).  The loop can be
 539      optimized away by scalar optimizations.  We're better off not
 540      touching this loop.  */
 541   if (!need_to_vectorize)
 542     {
 543       if (vect_print_dump_info (REPORT_DETAILS))
 544         fprintf (vect_dump,
 545                  "All the computation can be taken out of the loop.");
 546       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
 547         fprintf (vect_dump,
 548                  "not vectorized: redundant loop. no profit to vectorize.");
 549       return false;
 550     }
 551
 552   /* If all the stmts in the loop can be SLPed, we perform only SLP, and
 553      vectorization factor of the loop is the unrolling factor required by the
 554      SLP instances.  If that unrolling factor is 1, we say, that we perform
 555      pure SLP on loop - cross iteration parallelism is not exploited.  */
 556   if (only_slp_in_loop)
 557     vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
 558   else
 559     vectorization_factor = least_common_multiple (vectorization_factor,
 560                                 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
 561
 562   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
 563
 564   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
 565       && vect_print_dump_info (REPORT_DETAILS))
 566     fprintf (vect_dump,
 567         "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
 568         vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
 569
 570   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
 571       && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
 572     {
 573       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
 574         fprintf (vect_dump, "not vectorized: iteration count too small.");
 575       if (vect_print_dump_info (REPORT_DETAILS))
 576         fprintf (vect_dump,"not vectorized: iteration count smaller than "
 577                  "vectorization factor.");
 578       return false;
 579     }
 580
 581   /* Analyze cost. Decide if worth while to vectorize.  */
 582
 583   /* Once VF is set, SLP costs should be updated since the number of created
 584      vector stmts depends on VF.  */
 585   vect_update_slp_costs_according_to_vf (loop_vinfo);
 586
 587   min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
 588   LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
 589   if (min_profitable_iters < 0)
 590     {
 591       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
 592         fprintf (vect_dump, "not vectorized: vectorization not profitable.");
 593       if (vect_print_dump_info (REPORT_DETAILS))
 594         fprintf (vect_dump, "not vectorized: vector version will never be "
 595                  "profitable.");
 596       return false;
 597     }
 598
 599   min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
 600                             * vectorization_factor) - 1);
 601
 602   /* Use the cost model only if it is more conservative than user specified
 603      threshold.  */
 604
 605   th = (unsigned) min_scalar_loop_bound;
 606   if (min_profitable_iters
 607       && (!min_scalar_loop_bound
 608           || min_profitable_iters > min_scalar_loop_bound))
 609     th = (unsigned) min_profitable_iters;
 610
 611   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
 612       && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
 613     {
 614       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
 615         fprintf (vect_dump, "not vectorized: vectorization not "
 616                  "profitable.");
 617       if (vect_print_dump_info (REPORT_DETAILS))
 618         fprintf (vect_dump, "not vectorized: iteration count smaller than "
 619                  "user specified loop bound parameter or minimum "
 620                  "profitable iterations (whichever is more conservative).");
 621       return false;
 622     }
 623
 624   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
 625       || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
 626       || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
 627     {
 628       if (vect_print_dump_info (REPORT_DETAILS))
 629         fprintf (vect_dump, "epilog loop required.");
 630       if (!vect_can_advance_ivs_p (loop_vinfo))
 631         {
 632           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
 633             fprintf (vect_dump,
 634                      "not vectorized: can't create epilog loop 1.");
 635           return false;
 636         }
 637       if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
 638         {
 639           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
 640             fprintf (vect_dump,
 641                      "not vectorized: can't create epilog loop 2.");
 642           return false;
 643         }
 644     }
 645
 646   return true;
 647 }
 648
 649
 650 /* Function exist_non_indexing_operands_for_use_p
 651
 652    USE is one of the uses attached to STMT. Check if USE is
 653    used in STMT for anything other than indexing an array.  */
 654
 655 static bool
 656 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
 657 {
 658   tree operand;
 659   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 660
 661   /* USE corresponds to some operand in STMT. If there is no data
 662      reference in STMT, then any operand that corresponds to USE
 663      is not indexing an array.  */
 664   if (!STMT_VINFO_DATA_REF (stmt_info))
 665     return true;
 666
 667   /* STMT has a data_ref. FORNOW this means that its of one of
 668      the following forms:
 669      -1- ARRAY_REF = var
 670      -2- var = ARRAY_REF
 671      (This should have been verified in analyze_data_refs).
 672
 673      'var' in the second case corresponds to a def, not a use,
 674      so USE cannot correspond to any operands that are not used
 675      for array indexing.
 676
 677      Therefore, all we need to check is if STMT falls into the
 678      first case, and whether var corresponds to USE.  */
 679
 680   if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
 681     return false;
 682
 683   operand = GIMPLE_STMT_OPERAND (stmt, 1);
 684
 685   if (TREE_CODE (operand) != SSA_NAME)
 686     return false;
 687
 688   if (operand == use)
 689     return true;
 690
 691   return false;
 692 }
 693
 694
 695 /* Function vect_analyze_scalar_cycles_1.
 696
 697    Examine the cross iteration def-use cycles of scalar variables
 698    in LOOP. LOOP_VINFO represents the loop that is noe being
 699    considered for vectorization (can be LOOP, or an outer-loop
 700    enclosing LOOP).  */
 701
 702 static void
 703 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
 704 {
 705   tree phi;
 706   basic_block bb = loop->header;
 707   tree dumy;
 708   VEC(tree,heap) *worklist = VEC_alloc (tree, heap, 64);
 709
 710   if (vect_print_dump_info (REPORT_DETAILS))
 711     fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
 712
 713   /* First - identify all inductions.  */
 714   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
 715     {
 716       tree access_fn = NULL;
 717       tree def = PHI_RESULT (phi);
 718       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 719
 720       if (vect_print_dump_info (REPORT_DETAILS))
 721         {
 722           fprintf (vect_dump, "Analyze phi: ");
 723           print_generic_expr (vect_dump, phi, TDF_SLIM);
 724         }
 725
 726       /* Skip virtual phi's. The data dependences that are associated with
 727          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
 728       if (!is_gimple_reg (SSA_NAME_VAR (def)))
 729         continue;
 730
 731       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
 732
 733       /* Analyze the evolution function.  */
 734       access_fn = analyze_scalar_evolution (loop, def);
 735       if (access_fn && vect_print_dump_info (REPORT_DETAILS))
 736         {
 737           fprintf (vect_dump, "Access function of PHI: ");
 738           print_generic_expr (vect_dump, access_fn, TDF_SLIM);
 739         }
 740
 741       if (!access_fn
 742           || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
 743         {
 744           VEC_safe_push (tree, heap, worklist, phi);
 745           continue;
 746         }
 747
 748       if (vect_print_dump_info (REPORT_DETAILS))
 749         fprintf (vect_dump, "Detected induction.");
 750       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
 751     }
 752
 753
 754   /* Second - identify all reductions.  */
 755   while (VEC_length (tree, worklist) > 0)
 756     {
 757       tree phi = VEC_pop (tree, worklist);
 758       tree def = PHI_RESULT (phi);
 759       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 760       tree reduc_stmt;
 761
 762       if (vect_print_dump_info (REPORT_DETAILS))
 763         {
 764           fprintf (vect_dump, "Analyze phi: ");
 765           print_generic_expr (vect_dump, phi, TDF_SLIM);
 766         }
 767
 768       gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
 769       gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
 770
 771       reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi);
 772       if (reduc_stmt)
 773         {
 774           if (vect_print_dump_info (REPORT_DETAILS))
 775             fprintf (vect_dump, "Detected reduction.");
 776           STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
 777           STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 778                                                         vect_reduction_def;
 779         }
 780       else
 781         if (vect_print_dump_info (REPORT_DETAILS))
 782           fprintf (vect_dump, "Unknown def-use cycle pattern.");
 783     }
 784
 785   VEC_free (tree, heap, worklist);
 786   return;
 787 }
 788
 789
 790 /* Function vect_analyze_scalar_cycles.
 791
 792    Examine the cross iteration def-use cycles of scalar variables, by
 793    analyzing the loop-header PHIs of scalar variables; Classify each
 794    cycle as one of the following: invariant, induction, reduction, unknown.
 795    We do that for the loop represented by LOOP_VINFO, and also to its
 796    inner-loop, if exists.
 797    Examples for scalar cycles:
 798
 799    Example1: reduction:
 800
 801               loop1:
 802               for (i=0; i<N; i++)
 803                  sum += a[i];
 804
 805    Example2: induction:
 806
 807               loop2:
 808               for (i=0; i<N; i++)
 809                  a[i] = i;  */
 810
 811 static void
 812 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
 813 {
 814   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 815
 816   vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
 817
 818   /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
 819      Reductions in such inner-loop therefore have different properties than
 820      the reductions in the nest that gets vectorized:
 821      1. When vectorized, they are executed in the same order as in the original
 822         scalar loop, so we can't change the order of computation when
 823         vectorizing them.
 824      2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
 825         current checks are too strict.  */
 826
 827   if (loop->inner)
 828     vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
 829 }
 830
 831
 832 /* Function vect_insert_into_interleaving_chain.
 833
 834    Insert DRA into the interleaving chain of DRB according to DRA's INIT.  */
 835
 836 static void
 837 vect_insert_into_interleaving_chain (struct data_reference *dra,
 838                                      struct data_reference *drb)
 839 {
 840   tree prev, next, next_init;
 841   stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
 842   stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
 843
 844   prev = DR_GROUP_FIRST_DR (stmtinfo_b);
 845   next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
 846   while (next)
 847     {
 848       next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
 849       if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
 850         {
 851           /* Insert here.  */
 852           DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
 853           DR_GROUP_NEXT_DR (stmtinfo_a) = next;
 854           return;
 855         }
 856       prev = next;
 857       next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
 858     }
 859
 860   /* We got to the end of the list. Insert here.  */
 861   DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
 862   DR_GROUP_NEXT_DR (stmtinfo_a) = NULL_TREE;
 863 }
 864
 865
 866 /* Function vect_update_interleaving_chain.
 867
 868    For two data-refs DRA and DRB that are a part of a chain interleaved data
 869    accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
 870
 871    There are four possible cases:
 872    1. New stmts - both DRA and DRB are not a part of any chain:
 873       FIRST_DR = DRB
 874       NEXT_DR (DRB) = DRA
 875    2. DRB is a part of a chain and DRA is not:
 876       no need to update FIRST_DR
 877       no need to insert DRB
 878       insert DRA according to init
 879    3. DRA is a part of a chain and DRB is not:
 880       if (init of FIRST_DR > init of DRB)
 881           FIRST_DR = DRB
 882           NEXT(FIRST_DR) = previous FIRST_DR
 883       else
 884           insert DRB according to its init
 885    4. both DRA and DRB are in some interleaving chains:
 886       choose the chain with the smallest init of FIRST_DR
 887       insert the nodes of the second chain into the first one.  */
 888
 889 static void
 890 vect_update_interleaving_chain (struct data_reference *drb,
 891                                 struct data_reference *dra)
 892 {
 893   stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
 894   stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
 895   tree next_init, init_dra_chain, init_drb_chain, first_a, first_b;
 896   tree node, prev, next, node_init, first_stmt;
 897
 898   /* 1. New stmts - both DRA and DRB are not a part of any chain.   */
 899   if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
 900     {
 901       DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
 902       DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
 903       DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
 904       return;
 905     }
 906
 907   /* 2. DRB is a part of a chain and DRA is not.  */
 908   if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
 909     {
 910       DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
 911       /* Insert DRA into the chain of DRB.  */
 912       vect_insert_into_interleaving_chain (dra, drb);
 913       return;
 914     }
 915
 916   /* 3. DRA is a part of a chain and DRB is not.  */
 917   if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
 918     {
 919       tree old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
 920       tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
 921                                                               old_first_stmt)));
 922       tree tmp;
 923
 924       if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
 925         {
 926           /* DRB's init is smaller than the init of the stmt previously marked
 927              as the first stmt of the interleaving chain of DRA. Therefore, we
 928              update FIRST_STMT and put DRB in the head of the list.  */
 929           DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
 930           DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
 931
 932           /* Update all the stmts in the list to point to the new FIRST_STMT.  */
 933           tmp = old_first_stmt;
 934           while (tmp)
 935             {
 936               DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
 937               tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
 938             }
 939         }
 940       else
 941         {
 942           /* Insert DRB in the list of DRA.  */
 943           vect_insert_into_interleaving_chain (drb, dra);
 944           DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
 945         }
 946       return;
 947     }
 948
 949   /* 4. both DRA and DRB are in some interleaving chains.  */
 950   first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
 951   first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
 952   if (first_a == first_b)
 953     return;
 954   init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
 955   init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
 956
 957   if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
 958     {
 959       /* Insert the nodes of DRA chain into the DRB chain.
 960          After inserting a node, continue from this node of the DRB chain (don't
 961          start from the beginning.  */
 962       node = DR_GROUP_FIRST_DR (stmtinfo_a);
 963       prev = DR_GROUP_FIRST_DR (stmtinfo_b);
 964       first_stmt = first_b;
 965     }
 966   else
 967     {
 968       /* Insert the nodes of DRB chain into the DRA chain.
 969          After inserting a node, continue from this node of the DRA chain (don't
 970          start from the beginning.  */
 971       node = DR_GROUP_FIRST_DR (stmtinfo_b);
 972       prev = DR_GROUP_FIRST_DR (stmtinfo_a);
 973       first_stmt = first_a;
 974     }
 975
 976   while (node)
 977     {
 978       node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
 979       next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
 980       while (next)
 981         {
 982           next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
 983           if (tree_int_cst_compare (next_init, node_init) > 0)
 984             {
 985               /* Insert here.  */
 986               DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
 987               DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
 988               prev = node;
 989               break;
 990             }
 991           prev = next;
 992           next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
 993         }
 994       if (!next)
 995         {
 996           /* We got to the end of the list. Insert here.  */
 997           DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
 998           DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL_TREE;
 999           prev = node;
1000         }
1001       DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
1002       node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
1003     }
1004 }
1005
1006
1007 /* Function vect_equal_offsets.
1008
1009    Check if OFFSET1 and OFFSET2 are identical expressions.  */
1010
1011 static bool
1012 vect_equal_offsets (tree offset1, tree offset2)
1013 {
1014   bool res0, res1;
1015
1016   STRIP_NOPS (offset1);
1017   STRIP_NOPS (offset2);
1018
1019   if (offset1 == offset2)
1020     return true;
1021
1022   if (TREE_CODE (offset1) != TREE_CODE (offset2)
1023       || !BINARY_CLASS_P (offset1)
1024       || !BINARY_CLASS_P (offset2))
1025     return false;
1026
1027   res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
1028                              TREE_OPERAND (offset2, 0));
1029   res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
1030                              TREE_OPERAND (offset2, 1));
1031
1032   return (res0 && res1);
1033 }
1034
1035
1036 /* Function vect_check_interleaving.
1037
1038    Check if DRA and DRB are a part of interleaving. In case they are, insert
1039    DRA and DRB in an interleaving chain.  */
1040
1041 static void
1042 vect_check_interleaving (struct data_reference *dra,
1043                          struct data_reference *drb)
1044 {
1045   HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
1046
1047   /* Check that the data-refs have same first location (except init) and they
1048      are both either store or load (not load and store).  */
1049   if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
1050        && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
1051            || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
1052            || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
1053            != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
1054       || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
1055       || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
1056       || DR_IS_READ (dra) != DR_IS_READ (drb))
1057     return;
1058
1059   /* Check:
1060      1. data-refs are of the same type
1061      2. their steps are equal
1062      3. the step is greater than the difference between data-refs' inits  */
1063   type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
1064   type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
1065
1066   if (type_size_a != type_size_b
1067       || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb)))
1068     return;
1069
1070   init_a = TREE_INT_CST_LOW (DR_INIT (dra));
1071   init_b = TREE_INT_CST_LOW (DR_INIT (drb));
1072   step = TREE_INT_CST_LOW (DR_STEP (dra));
1073
1074   if (init_a > init_b)
1075     {
1076       /* If init_a == init_b + the size of the type * k, we have an interleaving,
1077          and DRB is accessed before DRA.  */
1078       diff_mod_size = (init_a - init_b) % type_size_a;
1079
1080       if ((init_a - init_b) > step)
1081          return;
1082
1083       if (diff_mod_size == 0)
1084         {
1085           vect_update_interleaving_chain (drb, dra);
1086           if (vect_print_dump_info (REPORT_DR_DETAILS))
1087             {
1088               fprintf (vect_dump, "Detected interleaving ");
1089               print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1090               fprintf (vect_dump, " and ");
1091               print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1092             }
1093           return;
1094         }
1095     }
1096   else
1097     {
1098       /* If init_b == init_a + the size of the type * k, we have an
1099          interleaving, and DRA is accessed before DRB.  */
1100       diff_mod_size = (init_b - init_a) % type_size_a;
1101
1102       if ((init_b - init_a) > step)
1103          return;
1104
1105       if (diff_mod_size == 0)
1106         {
1107           vect_update_interleaving_chain (dra, drb);
1108           if (vect_print_dump_info (REPORT_DR_DETAILS))
1109             {
1110               fprintf (vect_dump, "Detected interleaving ");
1111               print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1112               fprintf (vect_dump, " and ");
1113               print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1114             }
1115           return;
1116         }
1117     }
1118 }
1119
1120
1121 /* Function vect_analyze_data_ref_dependence.
1122
1123    Return TRUE if there (might) exist a dependence between a memory-reference
1124    DRA and a memory-reference DRB.  */
1125
1126 static bool
1127 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
1128                                   loop_vec_info loop_vinfo)
1129 {
1130   unsigned int i;
1131   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1132   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1133   struct data_reference *dra = DDR_A (ddr);
1134   struct data_reference *drb = DDR_B (ddr);
1135   stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1136   stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1137   int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1138   int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1139   lambda_vector dist_v;
1140   unsigned int loop_depth;
1141
1142   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1143     {
1144       /* Independent data accesses.  */
1145       vect_check_interleaving (dra, drb);
1146       return false;
1147     }
1148
1149   if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
1150     return false;
1151
1152   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1153     {
1154       if (vect_print_dump_info (REPORT_DR_DETAILS))
1155         {
1156           fprintf (vect_dump,
1157                    "versioning for alias required: can't determine dependence between ");
1158           print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1159           fprintf (vect_dump, " and ");
1160           print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1161         }
1162       return true;
1163     }
1164
1165   if (DDR_NUM_DIST_VECTS (ddr) == 0)
1166     {
1167       if (vect_print_dump_info (REPORT_DR_DETAILS))
1168         {
1169           fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
1170           print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1171           fprintf (vect_dump, " and ");
1172           print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1173         }
1174       return true;
1175     }
1176
1177   loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1178   for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
1179     {
1180       int dist = dist_v[loop_depth];
1181
1182       if (vect_print_dump_info (REPORT_DR_DETAILS))
1183         fprintf (vect_dump, "dependence distance  = %d.", dist);
1184
1185       /* Same loop iteration.  */
1186       if (dist % vectorization_factor == 0 && dra_size == drb_size)
1187         {
1188           /* Two references with distance zero have the same alignment.  */
1189           VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1190           VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1191           if (vect_print_dump_info (REPORT_ALIGNMENT))
1192             fprintf (vect_dump, "accesses have the same alignment.");
1193           if (vect_print_dump_info (REPORT_DR_DETAILS))
1194             {
1195               fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1196               print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1197               fprintf (vect_dump, " and ");
1198               print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1199             }
1200
1201           /* For interleaving, mark that there is a read-write dependency if
1202              necessary. We check before that one of the data-refs is store.  */
1203           if (DR_IS_READ (dra))
1204             DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
1205           else
1206             {
1207               if (DR_IS_READ (drb))
1208                 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
1209             }
1210
1211           continue;
1212         }
1213
1214       if (abs (dist) >= vectorization_factor)
1215         {
1216           /* Dependence distance does not create dependence, as far as vectorization
1217              is concerned, in this case.  */
1218           if (vect_print_dump_info (REPORT_DR_DETAILS))
1219             fprintf (vect_dump, "dependence distance >= VF.");
1220           continue;
1221         }
1222
1223       if (vect_print_dump_info (REPORT_DR_DETAILS))
1224         {
1225           fprintf (vect_dump,
1226                    "versioning for alias required: possible dependence "
1227                    "between data-refs ");
1228           print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1229           fprintf (vect_dump, " and ");
1230           print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1231         }
1232
1233       return true;
1234     }
1235
1236   return false;
1237 }
1238
1239 /* Return TRUE if DDR_NEW is already found in MAY_ALIAS_DDRS list.  */
1240
1241 static bool
1242 vect_is_duplicate_ddr (VEC (ddr_p, heap) * may_alias_ddrs, ddr_p ddr_new)
1243 {
1244   unsigned i;
1245   ddr_p ddr;
1246
1247   for (i = 0; VEC_iterate (ddr_p, may_alias_ddrs, i, ddr); i++)
1248     {
1249       tree dref_A_i, dref_B_i, dref_A_j, dref_B_j;
1250
1251       dref_A_i = DR_REF (DDR_A (ddr));
1252       dref_B_i = DR_REF (DDR_B (ddr));
1253       dref_A_j = DR_REF (DDR_A (ddr_new));
1254       dref_B_j = DR_REF (DDR_B (ddr_new));
1255
1256       if ((operand_equal_p (dref_A_i, dref_A_j, 0)
1257            && operand_equal_p (dref_B_i, dref_B_j, 0))
1258           || (operand_equal_p (dref_A_i, dref_B_j, 0)
1259               && operand_equal_p (dref_B_i, dref_A_j, 0)))
1260         {
1261           if (vect_print_dump_info (REPORT_DR_DETAILS))
1262             {
1263               fprintf (vect_dump, "found same pair of data references ");
1264               print_generic_expr (vect_dump, dref_A_i, TDF_SLIM);
1265               fprintf (vect_dump, " and ");
1266               print_generic_expr (vect_dump, dref_B_i, TDF_SLIM);
1267             }
1268           return true;
1269         }
1270     }
1271   return false;
1272 }
1273
1274 /* Save DDR in LOOP_VINFO list of ddrs that may alias and need to be
1275    tested at run-time.  Returns false if number of run-time checks
1276    inserted by vectorizer is greater than maximum defined by
1277    PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS.  */
1278 static bool
1279 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
1280 {
1281   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1282
1283   if (vect_print_dump_info (REPORT_DR_DETAILS))
1284     {
1285       fprintf (vect_dump, "mark for run-time aliasing test between ");
1286       print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
1287       fprintf (vect_dump, " and ");
1288       print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
1289     }
1290
1291   /* FORNOW: We don't support versioning with outer-loop vectorization.  */
1292   if (loop->inner)
1293     {
1294       if (vect_print_dump_info (REPORT_DR_DETAILS))
1295         fprintf (vect_dump, "versioning not yet supported for outer-loops.");
1296       return false;
1297     }
1298
1299   /* Do not add to the list duplicate ddrs.  */
1300   if (vect_is_duplicate_ddr (LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr))
1301     return true;
1302
1303   if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo))
1304       >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
1305     {
1306       if (vect_print_dump_info (REPORT_DR_DETAILS))
1307         {
1308           fprintf (vect_dump,
1309                    "disable versioning for alias - max number of generated "
1310                    "checks exceeded.");
1311         }
1312
1313       VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
1314
1315       return false;
1316     }
1317   VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
1318   return true;
1319 }
1320
1321 /* Function vect_analyze_data_ref_dependences.
1322
1323    Examine all the data references in the loop, and make sure there do not
1324    exist any data dependences between them.  */
1325
1326 static bool
1327 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1328 {
1329   unsigned int i;
1330   VEC (ddr_p, heap) * ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1331   struct data_dependence_relation *ddr;
1332
1333   if (vect_print_dump_info (REPORT_DETAILS))
1334     fprintf (vect_dump, "=== vect_analyze_dependences ===");
1335
1336   for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1337     if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
1338       {
1339         /* Add to list of ddrs that need to be tested at run-time.  */
1340         if (!vect_mark_for_runtime_alias_test (ddr, loop_vinfo))
1341       return false;
1342       }
1343
1344   return true;
1345 }
1346
1347
1348 /* Function vect_compute_data_ref_alignment
1349
1350    Compute the misalignment of the data reference DR.
1351
1352    Output:
1353    1. If during the misalignment computation it is found that the data reference
1354       cannot be vectorized then false is returned.
1355    2. DR_MISALIGNMENT (DR) is defined.
1356
1357    FOR NOW: No analysis is actually performed. Misalignment is calculated
1358    only for trivial cases. TODO.  */
1359
1360 static bool
1361 vect_compute_data_ref_alignment (struct data_reference *dr)
1362 {
1363   tree stmt = DR_STMT (dr);
1364   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1365   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1366   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1367   tree ref = DR_REF (dr);
1368   tree vectype;
1369   tree base, base_addr;
1370   bool base_aligned;
1371   tree misalign;
1372   tree aligned_to, alignment;
1373
1374   if (vect_print_dump_info (REPORT_DETAILS))
1375     fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1376
1377   /* Initialize misalignment to unknown.  */
1378   SET_DR_MISALIGNMENT (dr, -1);
1379
1380   misalign = DR_INIT (dr);
1381   aligned_to = DR_ALIGNED_TO (dr);
1382   base_addr = DR_BASE_ADDRESS (dr);
1383
1384   /* In case the dataref is in an inner-loop of the loop that is being
1385      vectorized (LOOP), we use the base and misalignment information
1386      relative to the outer-loop (LOOP). This is ok only if the misalignment
1387      stays the same throughout the execution of the inner-loop, which is why
1388      we have to check that the stride of the dataref in the inner-loop evenly
1389      divides by the vector size.  */
1390   if (nested_in_vect_loop_p (loop, stmt))
1391     {
1392       tree step = DR_STEP (dr);
1393       HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1394
1395       if (dr_step % UNITS_PER_SIMD_WORD == 0)
1396         {
1397           if (vect_print_dump_info (REPORT_ALIGNMENT))
1398             fprintf (vect_dump, "inner step divides the vector-size.");
1399           misalign = STMT_VINFO_DR_INIT (stmt_info);
1400           aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
1401           base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
1402         }
1403       else
1404         {
1405           if (vect_print_dump_info (REPORT_ALIGNMENT))
1406             fprintf (vect_dump, "inner step doesn't divide the vector-size.");
1407           misalign = NULL_TREE;
1408         }
1409     }
1410
1411   base = build_fold_indirect_ref (base_addr);
1412   vectype = STMT_VINFO_VECTYPE (stmt_info);
1413   alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1414
1415   if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
1416       || !misalign)
1417     {
1418       if (vect_print_dump_info (REPORT_ALIGNMENT))
1419         {
1420           fprintf (vect_dump, "Unknown alignment for access: ");
1421           print_generic_expr (vect_dump, base, TDF_SLIM);
1422         }
1423       return true;
1424     }
1425
1426   if ((DECL_P (base)
1427        && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1428                                 alignment) >= 0)
1429       || (TREE_CODE (base_addr) == SSA_NAME
1430           && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1431                                                       TREE_TYPE (base_addr)))),
1432                                    alignment) >= 0))
1433     base_aligned = true;
1434   else
1435     base_aligned = false;
1436
1437   if (!base_aligned)
1438     {
1439       /* Do not change the alignment of global variables if
1440          flag_section_anchors is enabled.  */
1441       if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1442           || (TREE_STATIC (base) && flag_section_anchors))
1443         {
1444           if (vect_print_dump_info (REPORT_DETAILS))
1445             {
1446               fprintf (vect_dump, "can't force alignment of ref: ");
1447               print_generic_expr (vect_dump, ref, TDF_SLIM);
1448             }
1449           return true;
1450         }
1451
1452       /* Force the alignment of the decl.
1453          NOTE: This is the only change to the code we make during
1454          the analysis phase, before deciding to vectorize the loop.  */
1455       if (vect_print_dump_info (REPORT_DETAILS))
1456         fprintf (vect_dump, "force alignment");
1457       DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1458       DECL_USER_ALIGN (base) = 1;
1459     }
1460
1461   /* At this point we assume that the base is aligned.  */
1462   gcc_assert (base_aligned
1463               || (TREE_CODE (base) == VAR_DECL
1464                   && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1465
1466   /* Modulo alignment.  */
1467   misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1468
1469   if (!host_integerp (misalign, 1))
1470     {
1471       /* Negative or overflowed misalignment value.  */
1472       if (vect_print_dump_info (REPORT_DETAILS))
1473         fprintf (vect_dump, "unexpected misalign value");
1474       return false;
1475     }
1476
1477   SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
1478
1479   if (vect_print_dump_info (REPORT_DETAILS))
1480     {
1481       fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1482       print_generic_expr (vect_dump, ref, TDF_SLIM);
1483     }
1484
1485   return true;
1486 }
1487
1488
1489 /* Function vect_compute_data_refs_alignment
1490
1491    Compute the misalignment of data references in the loop.
1492    Return FALSE if a data reference is found that cannot be vectorized.  */
1493
1494 static bool
1495 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1496 {
1497   VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1498   struct data_reference *dr;
1499   unsigned int i;
1500
1501   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1502     if (!vect_compute_data_ref_alignment (dr))
1503       return false;
1504
1505   return true;
1506 }
1507
1508
1509 /* Function vect_update_misalignment_for_peel
1510
1511    DR - the data reference whose misalignment is to be adjusted.
1512    DR_PEEL - the data reference whose misalignment is being made
1513              zero in the vector loop by the peel.
1514    NPEEL - the number of iterations in the peel loop if the misalignment
1515            of DR_PEEL is known at compile time.  */
1516
1517 static void
1518 vect_update_misalignment_for_peel (struct data_reference *dr,
1519                                    struct data_reference *dr_peel, int npeel)
1520 {
1521   unsigned int i;
1522   VEC(dr_p,heap) *same_align_drs;
1523   struct data_reference *current_dr;
1524   int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1525   int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1526   stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1527   stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1528
1529  /* For interleaved data accesses the step in the loop must be multiplied by
1530      the size of the interleaving group.  */
1531   if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1532     dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1533   if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
1534     dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1535
1536   /* It can be assumed that the data refs with the same alignment as dr_peel
1537      are aligned in the vector loop.  */
1538   same_align_drs
1539     = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1540   for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1541     {
1542       if (current_dr != dr)
1543         continue;
1544       gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1545                   DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1546       SET_DR_MISALIGNMENT (dr, 0);
1547       return;
1548     }
1549
1550   if (known_alignment_for_access_p (dr)
1551       && known_alignment_for_access_p (dr_peel))
1552     {
1553       int misal = DR_MISALIGNMENT (dr);
1554       misal += npeel * dr_size;
1555       misal %= UNITS_PER_SIMD_WORD;
1556       SET_DR_MISALIGNMENT (dr, misal);
1557       return;
1558     }
1559
1560   if (vect_print_dump_info (REPORT_DETAILS))
1561     fprintf (vect_dump, "Setting misalignment to -1.");
1562   SET_DR_MISALIGNMENT (dr, -1);
1563 }
1564
1565
1566 /* Function vect_verify_datarefs_alignment
1567
1568    Return TRUE if all data references in the loop can be
1569    handled with respect to alignment.  */
1570
1571 static bool
1572 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1573 {
1574   VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1575   struct data_reference *dr;
1576   enum dr_alignment_support supportable_dr_alignment;
1577   unsigned int i;
1578
1579   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1580     {
1581       tree stmt = DR_STMT (dr);
1582       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1583
1584       /* For interleaving, only the alignment of the first access matters.  */
1585       if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1586           && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1587         continue;
1588
1589       supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1590       if (!supportable_dr_alignment)
1591         {
1592           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1593             {
1594               if (DR_IS_READ (dr))
1595                 fprintf (vect_dump,
1596                          "not vectorized: unsupported unaligned load.");
1597               else
1598                 fprintf (vect_dump,
1599                          "not vectorized: unsupported unaligned store.");
1600             }
1601           return false;
1602         }
1603       if (supportable_dr_alignment != dr_aligned
1604           && vect_print_dump_info (REPORT_ALIGNMENT))
1605         fprintf (vect_dump, "Vectorizing an unaligned access.");
1606     }
1607   return true;
1608 }
1609
1610
1611 /* Function vector_alignment_reachable_p
1612
1613    Return true if vector alignment for DR is reachable by peeling
1614    a few loop iterations.  Return false otherwise.  */
1615
1616 static bool
1617 vector_alignment_reachable_p (struct data_reference *dr)
1618 {
1619   tree stmt = DR_STMT (dr);
1620   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1621   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1622
1623   if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1624     {
1625       /* For interleaved access we peel only if number of iterations in
1626          the prolog loop ({VF - misalignment}), is a multiple of the
1627          number of the interleaved accesses.  */
1628       int elem_size, mis_in_elements;
1629       int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1630
1631       /* FORNOW: handle only known alignment.  */
1632       if (!known_alignment_for_access_p (dr))
1633         return false;
1634
1635       elem_size = UNITS_PER_SIMD_WORD / nelements;
1636       mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1637
1638       if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1639         return false;
1640     }
1641
1642   /* If misalignment is known at the compile time then allow peeling
1643      only if natural alignment is reachable through peeling.  */
1644   if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1645     {
1646       HOST_WIDE_INT elmsize =
1647                 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1648       if (vect_print_dump_info (REPORT_DETAILS))
1649         {
1650           fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1651           fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1652         }
1653       if (DR_MISALIGNMENT (dr) % elmsize)
1654         {
1655           if (vect_print_dump_info (REPORT_DETAILS))
1656             fprintf (vect_dump, "data size does not divide the misalignment.\n");
1657           return false;
1658         }
1659     }
1660
1661   if (!known_alignment_for_access_p (dr))
1662     {
1663       tree type = (TREE_TYPE (DR_REF (dr)));
1664       tree ba = DR_BASE_OBJECT (dr);
1665       bool is_packed = false;
1666
1667       if (ba)
1668         is_packed = contains_packed_reference (ba);
1669
1670       if (vect_print_dump_info (REPORT_DETAILS))
1671         fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1672       if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1673         return true;
1674       else
1675         return false;
1676     }
1677
1678   return true;
1679 }
1680
1681 /* Function vect_enhance_data_refs_alignment
1682
1683    This pass will use loop versioning and loop peeling in order to enhance
1684    the alignment of data references in the loop.
1685
1686    FOR NOW: we assume that whatever versioning/peeling takes place, only the
1687    original loop is to be vectorized; Any other loops that are created by
1688    the transformations performed in this pass - are not supposed to be
1689    vectorized. This restriction will be relaxed.
1690
1691    This pass will require a cost model to guide it whether to apply peeling
1692    or versioning or a combination of the two. For example, the scheme that
1693    intel uses when given a loop with several memory accesses, is as follows:
1694    choose one memory access ('p') which alignment you want to force by doing
1695    peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1696    other accesses are not necessarily aligned, or (2) use loop versioning to
1697    generate one loop in which all accesses are aligned, and another loop in
1698    which only 'p' is necessarily aligned.
1699
1700    ("Automatic Intra-Register Vectorization for the Intel Architecture",
1701    Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1702    Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1703
1704    Devising a cost model is the most critical aspect of this work. It will
1705    guide us on which access to peel for, whether to use loop versioning, how
1706    many versions to create, etc. The cost model will probably consist of
1707    generic considerations as well as target specific considerations (on
1708    powerpc for example, misaligned stores are more painful than misaligned
1709    loads).
1710
1711    Here are the general steps involved in alignment enhancements:
1712
1713      -- original loop, before alignment analysis:
1714         for (i=0; i<N; i++){
1715           x = q[i];                     # DR_MISALIGNMENT(q) = unknown
1716           p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
1717         }
1718
1719      -- After vect_compute_data_refs_alignment:
1720         for (i=0; i<N; i++){
1721           x = q[i];                     # DR_MISALIGNMENT(q) = 3
1722           p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
1723         }
1724
1725      -- Possibility 1: we do loop versioning:
1726      if (p is aligned) {
1727         for (i=0; i<N; i++){    # loop 1A
1728           x = q[i];                     # DR_MISALIGNMENT(q) = 3
1729           p[i] = y;                     # DR_MISALIGNMENT(p) = 0
1730         }
1731      }
1732      else {
1733         for (i=0; i<N; i++){    # loop 1B
1734           x = q[i];                     # DR_MISALIGNMENT(q) = 3
1735           p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
1736         }
1737      }
1738
1739      -- Possibility 2: we do loop peeling:
1740      for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
1741         x = q[i];
1742         p[i] = y;
1743      }
1744      for (i = 3; i < N; i++){   # loop 2A
1745         x = q[i];                       # DR_MISALIGNMENT(q) = 0
1746         p[i] = y;                       # DR_MISALIGNMENT(p) = unknown
1747      }
1748
1749      -- Possibility 3: combination of loop peeling and versioning:
1750      for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
1751         x = q[i];
1752         p[i] = y;
1753      }
1754      if (p is aligned) {
1755         for (i = 3; i<N; i++){  # loop 3A
1756           x = q[i];                     # DR_MISALIGNMENT(q) = 0
1757           p[i] = y;                     # DR_MISALIGNMENT(p) = 0
1758         }
1759      }
1760      else {
1761         for (i = 3; i<N; i++){  # loop 3B
1762           x = q[i];                     # DR_MISALIGNMENT(q) = 0
1763           p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
1764         }
1765      }
1766
1767      These loops are later passed to loop_transform to be vectorized. The
1768      vectorizer will use the alignment information to guide the transformation
1769      (whether to generate regular loads/stores, or with special handling for
1770      misalignment).  */
1771
1772 static bool
1773 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1774 {
1775   VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1776   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1777   enum dr_alignment_support supportable_dr_alignment;
1778   struct data_reference *dr0 = NULL;
1779   struct data_reference *dr;
1780   unsigned int i;
1781   bool do_peeling = false;
1782   bool do_versioning = false;
1783   bool stat;
1784   tree stmt;
1785   stmt_vec_info stmt_info;
1786   int vect_versioning_for_alias_required;
1787
1788   if (vect_print_dump_info (REPORT_DETAILS))
1789     fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1790
1791   /* While cost model enhancements are expected in the future, the high level
1792      view of the code at this time is as follows:
1793
1794      A) If there is a misaligned write then see if peeling to align this write
1795         can make all data references satisfy vect_supportable_dr_alignment.
1796         If so, update data structures as needed and return true.  Note that
1797         at this time vect_supportable_dr_alignment is known to return false
1798         for a misaligned write.
1799
1800      B) If peeling wasn't possible and there is a data reference with an
1801         unknown misalignment that does not satisfy vect_supportable_dr_alignment
1802         then see if loop versioning checks can be used to make all data
1803         references satisfy vect_supportable_dr_alignment.  If so, update
1804         data structures as needed and return true.
1805
1806      C) If neither peeling nor versioning were successful then return false if
1807         any data reference does not satisfy vect_supportable_dr_alignment.
1808
1809      D) Return true (all data references satisfy vect_supportable_dr_alignment).
1810
1811      Note, Possibility 3 above (which is peeling and versioning together) is not
1812      being done at this time.  */
1813
1814   /* (1) Peeling to force alignment.  */
1815
1816   /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1817      Considerations:
1818      + How many accesses will become aligned due to the peeling
1819      - How many accesses will become unaligned due to the peeling,
1820        and the cost of misaligned accesses.
1821      - The cost of peeling (the extra runtime checks, the increase
1822        in code size).
1823
1824      The scheme we use FORNOW: peel to force the alignment of the first
1825      misaligned store in the loop.
1826      Rationale: misaligned stores are not yet supported.
1827
1828      TODO: Use a cost model.  */
1829
1830   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1831     {
1832       stmt = DR_STMT (dr);
1833       stmt_info = vinfo_for_stmt (stmt);
1834
1835       /* For interleaving, only the alignment of the first access
1836          matters.  */
1837       if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1838           && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1839         continue;
1840
1841       if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1842         {
1843           do_peeling = vector_alignment_reachable_p (dr);
1844           if (do_peeling)
1845             dr0 = dr;
1846           if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1847             fprintf (vect_dump, "vector alignment may not be reachable");
1848           break;
1849         }
1850     }
1851
1852   vect_versioning_for_alias_required =
1853     (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)) > 0);
1854
1855   /* Temporarily, if versioning for alias is required, we disable peeling
1856      until we support peeling and versioning.  Often peeling for alignment
1857      will require peeling for loop-bound, which in turn requires that we
1858      know how to adjust the loop ivs after the loop.  */
1859   if (vect_versioning_for_alias_required
1860        || !vect_can_advance_ivs_p (loop_vinfo)
1861       || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1862     do_peeling = false;
1863
1864   if (do_peeling)
1865     {
1866       int mis;
1867       int npeel = 0;
1868       tree stmt = DR_STMT (dr0);
1869       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1870       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1871       int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1872
1873       if (known_alignment_for_access_p (dr0))
1874         {
1875           /* Since it's known at compile time, compute the number of iterations
1876              in the peeled loop (the peeling factor) for use in updating
1877              DR_MISALIGNMENT values.  The peeling factor is the vectorization
1878              factor minus the misalignment as an element count.  */
1879           mis = DR_MISALIGNMENT (dr0);
1880           mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1881           npeel = nelements - mis;
1882
1883           /* For interleaved data access every iteration accesses all the
1884              members of the group, therefore we divide the number of iterations
1885              by the group size.  */
1886           stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1887           if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1888             npeel /= DR_GROUP_SIZE (stmt_info);
1889
1890           if (vect_print_dump_info (REPORT_DETAILS))
1891             fprintf (vect_dump, "Try peeling by %d", npeel);
1892         }
1893
1894       /* Ensure that all data refs can be vectorized after the peel.  */
1895       for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1896         {
1897           int save_misalignment;
1898
1899           if (dr == dr0)
1900             continue;
1901
1902           stmt = DR_STMT (dr);
1903           stmt_info = vinfo_for_stmt (stmt);
1904           /* For interleaving, only the alignment of the first access
1905             matters.  */
1906           if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1907               && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1908             continue;
1909
1910           save_misalignment = DR_MISALIGNMENT (dr);
1911           vect_update_misalignment_for_peel (dr, dr0, npeel);
1912           supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1913           SET_DR_MISALIGNMENT (dr, save_misalignment);
1914
1915           if (!supportable_dr_alignment)
1916             {
1917               do_peeling = false;
1918               break;
1919             }
1920         }
1921
1922       if (do_peeling)
1923         {
1924           /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1925              If the misalignment of DR_i is identical to that of dr0 then set
1926              DR_MISALIGNMENT (DR_i) to zero.  If the misalignment of DR_i and
1927              dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1928              by the peeling factor times the element size of DR_i (MOD the
1929              vectorization factor times the size).  Otherwise, the
1930              misalignment of DR_i must be set to unknown.  */
1931           for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1932             if (dr != dr0)
1933               vect_update_misalignment_for_peel (dr, dr0, npeel);
1934
1935           LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1936           LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1937           SET_DR_MISALIGNMENT (dr0, 0);
1938           if (vect_print_dump_info (REPORT_ALIGNMENT))
1939             fprintf (vect_dump, "Alignment of access forced using peeling.");
1940
1941           if (vect_print_dump_info (REPORT_DETAILS))
1942             fprintf (vect_dump, "Peeling for alignment will be applied.");
1943
1944           stat = vect_verify_datarefs_alignment (loop_vinfo);
1945           gcc_assert (stat);
1946           return stat;
1947         }
1948     }
1949
1950
1951   /* (2) Versioning to force alignment.  */
1952
1953   /* Try versioning if:
1954      1) flag_tree_vect_loop_version is TRUE
1955      2) optimize_size is FALSE
1956      3) there is at least one unsupported misaligned data ref with an unknown
1957         misalignment, and
1958      4) all misaligned data refs with a known misalignment are supported, and
1959      5) the number of runtime alignment checks is within reason.  */
1960
1961   do_versioning =
1962         flag_tree_vect_loop_version
1963         && (!optimize_size)
1964         && (!loop->inner); /* FORNOW */
1965
1966   if (do_versioning)
1967     {
1968       for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1969         {
1970           stmt = DR_STMT (dr);
1971           stmt_info = vinfo_for_stmt (stmt);
1972
1973           /* For interleaving, only the alignment of the first access
1974              matters.  */
1975           if (aligned_access_p (dr)
1976               || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1977                   && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1978             continue;
1979
1980           supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1981
1982           if (!supportable_dr_alignment)
1983             {
1984               tree stmt;
1985               int mask;
1986               tree vectype;
1987
1988               if (known_alignment_for_access_p (dr)
1989                   || VEC_length (tree,
1990                                  LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1991                      >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1992                 {
1993                   do_versioning = false;
1994                   break;
1995                 }
1996
1997               stmt = DR_STMT (dr);
1998               vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1999               gcc_assert (vectype);
2000
2001               /* The rightmost bits of an aligned address must be zeros.
2002                  Construct the mask needed for this test.  For example,
2003                  GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
2004                  mask must be 15 = 0xf. */
2005               mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
2006
2007               /* FORNOW: use the same mask to test all potentially unaligned
2008                  references in the loop.  The vectorizer currently supports
2009                  a single vector size, see the reference to
2010                  GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2011                  vectorization factor is computed.  */
2012               gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
2013                           || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
2014               LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2015               VEC_safe_push (tree, heap,
2016                              LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
2017                              DR_STMT (dr));
2018             }
2019         }
2020
2021       /* Versioning requires at least one misaligned data reference.  */
2022       if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
2023         do_versioning = false;
2024       else if (!do_versioning)
2025         VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
2026     }
2027
2028   if (do_versioning)
2029     {
2030       VEC(tree,heap) *may_misalign_stmts
2031         = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2032       tree stmt;
2033
2034       /* It can now be assumed that the data references in the statements
2035          in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2036          of the loop being vectorized.  */
2037       for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
2038         {
2039           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2040           dr = STMT_VINFO_DATA_REF (stmt_info);
2041           SET_DR_MISALIGNMENT (dr, 0);
2042           if (vect_print_dump_info (REPORT_ALIGNMENT))
2043             fprintf (vect_dump, "Alignment of access forced using versioning.");
2044         }
2045
2046       if (vect_print_dump_info (REPORT_DETAILS))
2047         fprintf (vect_dump, "Versioning for alignment will be applied.");
2048
2049       /* Peeling and versioning can't be done together at this time.  */
2050       gcc_assert (! (do_peeling && do_versioning));
2051
2052       stat = vect_verify_datarefs_alignment (loop_vinfo);
2053       gcc_assert (stat);
2054       return stat;
2055     }
2056
2057   /* This point is reached if neither peeling nor versioning is being done.  */
2058   gcc_assert (! (do_peeling || do_versioning));
2059
2060   stat = vect_verify_datarefs_alignment (loop_vinfo);
2061   return stat;
2062 }
2063
2064
2065 /* Function vect_analyze_data_refs_alignment
2066
2067    Analyze the alignment of the data-references in the loop.
2068    Return FALSE if a data reference is found that cannot be vectorized.  */
2069
2070 static bool
2071 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
2072 {
2073   if (vect_print_dump_info (REPORT_DETAILS))
2074     fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2075
2076   if (!vect_compute_data_refs_alignment (loop_vinfo))
2077     {
2078       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2079         fprintf (vect_dump,
2080                  "not vectorized: can't calculate alignment for data ref.");
2081       return false;
2082     }
2083
2084   return true;
2085 }
2086
2087
2088 /* Analyze groups of strided accesses: check that DR belongs to a group of
2089    strided accesses of legal size, step, etc. Detect gaps, single element
2090    interleaving, and other special cases. Set strided access info.
2091    Collect groups of strided stores for further use in SLP analysis.  */
2092
2093 static bool
2094 vect_analyze_group_access (struct data_reference *dr)
2095 {
2096   tree step = DR_STEP (dr);
2097   tree scalar_type = TREE_TYPE (DR_REF (dr));
2098   HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2099   tree stmt = DR_STMT (dr);
2100   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2101   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2102   HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2103   HOST_WIDE_INT stride;
2104   bool slp_impossible = false;
2105
2106   /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2107      interleaving group (including gaps).  */
2108   stride = dr_step / type_size;
2109
2110   /* Not consecutive access is possible only if it is a part of interleaving.  */
2111   if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
2112     {
2113       /* Check if it this DR is a part of interleaving, and is a single
2114          element of the group that is accessed in the loop.  */
2115
2116       /* Gaps are supported only for loads. STEP must be a multiple of the type
2117          size.  The size of the group must be a power of 2.  */
2118       if (DR_IS_READ (dr)
2119           && (dr_step % type_size) == 0
2120           && stride > 0
2121           && exact_log2 (stride) != -1)
2122         {
2123           DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
2124           DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2125           if (vect_print_dump_info (REPORT_DR_DETAILS))
2126             {
2127               fprintf (vect_dump, "Detected single element interleaving %d ",
2128                        DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
2129               print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2130               fprintf (vect_dump, " step ");
2131               print_generic_expr (vect_dump, step, TDF_SLIM);
2132             }
2133           return true;
2134         }
2135       if (vect_print_dump_info (REPORT_DETAILS))
2136         fprintf (vect_dump, "not consecutive access");
2137       return false;
2138     }
2139
2140   if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
2141     {
2142       /* First stmt in the interleaving chain. Check the chain.  */
2143       tree next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
2144       struct data_reference *data_ref = dr;
2145       unsigned int count = 1;
2146       tree next_step;
2147       tree prev_init = DR_INIT (data_ref);
2148       tree prev = stmt;
2149       HOST_WIDE_INT diff, count_in_bytes;
2150
2151       while (next)
2152         {
2153           /* Skip same data-refs. In case that two or more stmts share data-ref
2154              (supported only for loads), we vectorize only the first stmt, and
2155              the rest get their vectorized loads from the first one.  */
2156           if (!tree_int_cst_compare (DR_INIT (data_ref),
2157                                      DR_INIT (STMT_VINFO_DATA_REF (
2158                                                    vinfo_for_stmt (next)))))
2159             {
2160               if (!DR_IS_READ (data_ref))
2161                 {
2162                   if (vect_print_dump_info (REPORT_DETAILS))
2163                     fprintf (vect_dump, "Two store stmts share the same dr.");
2164                   return false;
2165                 }
2166
2167               /* Check that there is no load-store dependencies for this loads
2168                  to prevent a case of load-store-load to the same location.  */
2169               if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2170                   || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2171                 {
2172                   if (vect_print_dump_info (REPORT_DETAILS))
2173                     fprintf (vect_dump,
2174                              "READ_WRITE dependence in interleaving.");
2175                   return false;
2176                 }
2177
2178               /* For load use the same data-ref load.  */
2179               DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2180
2181               prev = next;
2182               next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2183               continue;
2184             }
2185           prev = next;
2186
2187           /* Check that all the accesses have the same STEP.  */
2188           next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2189           if (tree_int_cst_compare (step, next_step))
2190             {
2191               if (vect_print_dump_info (REPORT_DETAILS))
2192                 fprintf (vect_dump, "not consecutive access in interleaving");
2193               return false;
2194             }
2195
2196           data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2197           /* Check that the distance between two accesses is equal to the type
2198              size. Otherwise, we have gaps.  */
2199           diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2200                   - TREE_INT_CST_LOW (prev_init)) / type_size;
2201           if (diff != 1)
2202             {
2203               /* FORNOW: SLP of accesses with gaps is not supported.  */
2204               slp_impossible = true;
2205               if (!DR_IS_READ (data_ref))
2206                 {
2207                   if (vect_print_dump_info (REPORT_DETAILS))
2208                     fprintf (vect_dump, "interleaved store with gaps");
2209                   return false;
2210                 }
2211             }
2212
2213           /* Store the gap from the previous member of the group. If there is no
2214              gap in the access, DR_GROUP_GAP is always 1.  */
2215           DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2216
2217           prev_init = DR_INIT (data_ref);
2218           next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2219           /* Count the number of data-refs in the chain.  */
2220           count++;
2221         }
2222
2223       /* COUNT is the number of accesses found, we multiply it by the size of
2224          the type to get COUNT_IN_BYTES.  */
2225       count_in_bytes = type_size * count;
2226
2227       /* Check that the size of the interleaving is not greater than STEP.  */
2228       if (dr_step < count_in_bytes)
2229         {
2230           if (vect_print_dump_info (REPORT_DETAILS))
2231             {
2232               fprintf (vect_dump, "interleaving size is greater than step for ");
2233               print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2234             }
2235           return false;
2236         }
2237
2238       /* Check that the size of the interleaving is equal to STEP for stores,
2239          i.e., that there are no gaps.  */
2240       if (!DR_IS_READ (dr) && dr_step != count_in_bytes)
2241         {
2242           if (vect_print_dump_info (REPORT_DETAILS))
2243             fprintf (vect_dump, "interleaved store with gaps");
2244           return false;
2245         }
2246
2247       /* Check that STEP is a multiple of type size.  */
2248       if ((dr_step % type_size) != 0)
2249         {
2250           if (vect_print_dump_info (REPORT_DETAILS))
2251             {
2252               fprintf (vect_dump, "step is not a multiple of type size: step ");
2253               print_generic_expr (vect_dump, step, TDF_SLIM);
2254               fprintf (vect_dump, " size ");
2255               print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2256                                   TDF_SLIM);
2257             }
2258           return false;
2259         }
2260
2261       /* FORNOW: we handle only interleaving that is a power of 2.
2262          We don't fail here if it may be still possible to vectorize the
2263          group using SLP. If not, the size of the group will be checked in
2264          vect_analyze_operations, and the vectorization will fail.  */
2265       if (exact_log2 (stride) == -1)
2266         {
2267           if (vect_print_dump_info (REPORT_DETAILS))
2268             fprintf (vect_dump, "interleaving is not a power of 2");
2269
2270           if (slp_impossible)
2271             return false;
2272         }
2273       DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2274       if (vect_print_dump_info (REPORT_DETAILS))
2275         fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2276
2277       /* SLP: create an SLP data structure for every interleaving group of
2278          stores for further analysis in vect_analyse_slp.  */
2279       if (!DR_IS_READ (dr) && !slp_impossible)
2280         VEC_safe_push (tree, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo), stmt);
2281     }
2282
2283   return true;
2284 }
2285
2286
2287 /* Analyze the access pattern of the data-reference DR.
2288    In case of non-consecutive accesse call vect_analyze_group_access() to
2289    analyze groups of strided accesses.  */
2290
2291 static bool
2292 vect_analyze_data_ref_access (struct data_reference *dr)
2293 {
2294   tree step = DR_STEP (dr);
2295   tree scalar_type = TREE_TYPE (DR_REF (dr));
2296   tree stmt = DR_STMT (dr);
2297   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2298   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2299   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2300   HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2301
2302   if (!step)
2303     {
2304       if (vect_print_dump_info (REPORT_DETAILS))
2305         fprintf (vect_dump, "bad data-ref access");
2306       return false;
2307     }
2308
2309   /* Don't allow invariant accesses.  */
2310   if (dr_step == 0)
2311     return false;
2312
2313   if (nested_in_vect_loop_p (loop, stmt))
2314     {
2315       /* For the rest of the analysis we use the outer-loop step.  */
2316       step = STMT_VINFO_DR_STEP (stmt_info);
2317       dr_step = TREE_INT_CST_LOW (step);
2318
2319       if (dr_step == 0)
2320         {
2321           if (vect_print_dump_info (REPORT_ALIGNMENT))
2322             fprintf (vect_dump, "zero step in outer loop.");
2323           if (DR_IS_READ (dr))
2324             return true;
2325           else
2326             return false;
2327         }
2328     }
2329
2330   /* Consecutive?  */
2331   if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
2332     {
2333       /* Mark that it is not interleaving.  */
2334       DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
2335       return true;
2336     }
2337
2338   if (nested_in_vect_loop_p (loop, stmt))
2339     {
2340       if (vect_print_dump_info (REPORT_ALIGNMENT))
2341         fprintf (vect_dump, "strided access in outer loop.");
2342       return false;
2343     }
2344
2345   /* Not consecutive access - check if it's a part of interleaving group.  */
2346   return vect_analyze_group_access (dr);
2347 }
2348
2349
2350 /* Function vect_analyze_data_ref_accesses.
2351
2352    Analyze the access pattern of all the data references in the loop.
2353
2354    FORNOW: the only access pattern that is considered vectorizable is a
2355            simple step 1 (consecutive) access.
2356
2357    FORNOW: handle only arrays and pointer accesses.  */
2358
2359 static bool
2360 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
2361 {
2362   unsigned int i;
2363   VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2364   struct data_reference *dr;
2365
2366   if (vect_print_dump_info (REPORT_DETAILS))
2367     fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2368
2369   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2370     if (!vect_analyze_data_ref_access (dr))
2371       {
2372         if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2373           fprintf (vect_dump, "not vectorized: complicated access pattern.");
2374         return false;
2375       }
2376
2377   return true;
2378 }
2379
2380
2381 /* Recursively free the memory allocated for the SLP tree rooted at NODE.  */
2382
2383 void
2384 vect_free_slp_tree (slp_tree node)
2385 {
2386   if (!node)
2387     return;
2388
2389   if (SLP_TREE_LEFT (node))
2390     vect_free_slp_tree (SLP_TREE_LEFT (node));
2391
2392   if (SLP_TREE_RIGHT (node))
2393     vect_free_slp_tree (SLP_TREE_RIGHT (node));
2394
2395   VEC_free (tree, heap, SLP_TREE_SCALAR_STMTS (node));
2396
2397   if (SLP_TREE_VEC_STMTS (node))
2398     VEC_free (tree, heap, SLP_TREE_VEC_STMTS (node));
2399
2400   free (node);
2401 }
2402
2403
2404 /* Get the defs for the RHS (collect them in DEF_STMTS0/1), check that they are
2405    of a legal type and that they match the defs of the first stmt of the SLP
2406    group (stored in FIRST_STMT_...).  */
2407
2408 static bool
2409 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, slp_tree slp_node,
2410                              tree rhs, VEC (tree, heap) **def_stmts0,
2411                              VEC (tree, heap) **def_stmts1,
2412                              enum vect_def_type *first_stmt_dt0,
2413                              enum vect_def_type *first_stmt_dt1,
2414                              tree *first_stmt_def0_type,
2415                              tree *first_stmt_def1_type,
2416                              tree *first_stmt_const_oprnd,
2417                              int ncopies_for_cost)
2418 {
2419   tree oprnd;
2420   enum operation_type op_type = TREE_OPERAND_LENGTH (rhs);
2421   unsigned int i, number_of_oprnds = op_type;
2422   tree def, def_stmt;
2423   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2424   stmt_vec_info stmt_info =
2425     vinfo_for_stmt (VEC_index (tree, SLP_TREE_SCALAR_STMTS (slp_node), 0));
2426
2427   /* Store.  */
2428   if (!op_type)
2429     number_of_oprnds = 1;
2430   else
2431     gcc_assert (op_type == unary_op || op_type == binary_op);
2432
2433   for (i = 0; i < number_of_oprnds; i++)
2434     {
2435       if (op_type)
2436         oprnd = TREE_OPERAND (rhs, i);
2437       else
2438         oprnd = rhs;
2439
2440       if (!vect_is_simple_use (oprnd, loop_vinfo, &def_stmt, &def, &dt[i])
2441           || (!def_stmt && dt[i] != vect_constant_def))
2442         {
2443           if (vect_print_dump_info (REPORT_SLP))
2444             {
2445               fprintf (vect_dump, "Build SLP failed: can't find def for ");
2446               print_generic_expr (vect_dump, oprnd, TDF_SLIM);
2447             }
2448
2449           return false;
2450         }
2451
2452       if (!*first_stmt_dt0)
2453         {
2454           /* op0 of the first stmt of the group - store its info.  */
2455           *first_stmt_dt0 = dt[i];
2456           if (def)
2457             *first_stmt_def0_type = TREE_TYPE (def);
2458           else
2459             *first_stmt_const_oprnd = oprnd;
2460
2461           /* Analyze costs (for the first stmt of the group only).  */
2462           if (op_type)
2463             /* Not memory operation (we don't call this functions for loads).  */
2464             vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
2465           else
2466             /* Store.  */
2467             vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
2468         }
2469
2470       else
2471         {
2472           if (!*first_stmt_dt1 && i == 1)
2473             {
2474               /* op1 of the first stmt of the group - store its info.  */
2475               *first_stmt_dt1 = dt[i];
2476               if (def)
2477                 *first_stmt_def1_type = TREE_TYPE (def);
2478               else
2479                 {
2480                   /* We assume that the stmt contains only one constant
2481                      operand. We fail otherwise, to be on the safe side.  */
2482                   if (*first_stmt_const_oprnd)
2483                     {
2484                       if (vect_print_dump_info (REPORT_SLP))
2485                         fprintf (vect_dump, "Build SLP failed: two constant "
2486                                  "oprnds in stmt");
2487                       return false;
2488                     }
2489                   *first_stmt_const_oprnd = oprnd;
2490                 }
2491             }
2492           else
2493             {
2494               /* Not first stmt of the group, check that the def-stmt/s match
2495                  the def-stmt/s of the first stmt.  */
2496               if ((i == 0
2497                    && (*first_stmt_dt0 != dt[i]
2498                        || (*first_stmt_def0_type && def
2499                            && *first_stmt_def0_type != TREE_TYPE (def))))
2500                   || (i == 1
2501                       && (*first_stmt_dt1 != dt[i]
2502                           || (*first_stmt_def1_type && def
2503                               && *first_stmt_def1_type != TREE_TYPE (def))))
2504                   || (!def
2505                       && TREE_TYPE (*first_stmt_const_oprnd)
2506                       != TREE_TYPE (oprnd)))
2507                 {
2508                   if (vect_print_dump_info (REPORT_SLP))
2509                     fprintf (vect_dump, "Build SLP failed: different types ");
2510
2511                   return false;
2512                 }
2513             }
2514         }
2515
2516       /* Check the types of the definitions.  */
2517       switch (dt[i])
2518         {
2519         case vect_constant_def:
2520         case vect_invariant_def:
2521           break;
2522
2523         case vect_loop_def:
2524           if (i == 0)
2525             VEC_safe_push (tree, heap, *def_stmts0, def_stmt);
2526           else
2527             VEC_safe_push (tree, heap, *def_stmts1, def_stmt);
2528           break;
2529
2530         default:
2531           /* FORNOW: Not supported.  */
2532           if (vect_print_dump_info (REPORT_SLP))
2533             {
2534               fprintf (vect_dump, "Build SLP failed: illegal type of def ");
2535               print_generic_expr (vect_dump, def, TDF_SLIM);
2536             }
2537
2538           return false;
2539         }
2540     }
2541
2542   return true;
2543 }
2544
2545
2546 /* Recursively build an SLP tree starting from NODE.
2547    Fail (and return FALSE) if def-stmts are not isomorphic, require data
2548    permutation or are of unsupported types of operation. Otherwise, return
2549    TRUE.
2550    SLP_IMPOSSIBLE is TRUE if it is impossible to SLP in the loop, for example
2551    in the case of multiple types for now.  */
2552
2553 static bool
2554 vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
2555                      unsigned int group_size, bool *slp_impossible,
2556                      int *inside_cost, int *outside_cost,
2557                      int ncopies_for_cost)
2558 {
2559   VEC (tree, heap) *def_stmts0 = VEC_alloc (tree, heap, group_size);
2560   VEC (tree, heap) *def_stmts1 =  VEC_alloc (tree, heap, group_size);
2561   unsigned int i;
2562   VEC (tree, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
2563   tree stmt = VEC_index (tree, stmts, 0);
2564   enum vect_def_type first_stmt_dt0 = 0, first_stmt_dt1 = 0;
2565   enum tree_code first_stmt_code = 0;
2566   tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
2567   tree lhs, rhs, prev_stmt = NULL_TREE;
2568   bool stop_recursion = false, need_same_oprnds = false;
2569   tree vectype, scalar_type, first_op1 = NULL_TREE;
2570   unsigned int vectorization_factor = 0, ncopies;
2571   optab optab;
2572   int icode;
2573   enum machine_mode optab_op2_mode;
2574   enum machine_mode vec_mode;
2575   tree first_stmt_const_oprnd = NULL_TREE;
2576   struct data_reference *first_dr;
2577
2578   /* For every stmt in NODE find its def stmt/s.  */
2579   for (i = 0; VEC_iterate (tree, stmts, i, stmt); i++)
2580     {
2581       if (vect_print_dump_info (REPORT_SLP))
2582         {
2583           fprintf (vect_dump, "Build SLP for ");
2584           print_generic_expr (vect_dump, stmt, TDF_SLIM);
2585         }
2586
2587       if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2588         {
2589           if (vect_print_dump_info (REPORT_SLP))
2590             {
2591               fprintf (vect_dump, "Build SLP failed: not MODIFY_STMT ");
2592               print_generic_expr (vect_dump, stmt, TDF_SLIM);
2593             }
2594
2595           return false;
2596         }
2597
2598       scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
2599       vectype = get_vectype_for_scalar_type (scalar_type);
2600       gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2601       vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2602       ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
2603       if (ncopies > 1)
2604         {
2605           /* FORNOW.  */
2606           if (vect_print_dump_info (REPORT_SLP))
2607             fprintf (vect_dump, "SLP failed - multiple types ");
2608
2609           *slp_impossible = true;
2610           return false;
2611         }
2612
2613       lhs = GIMPLE_STMT_OPERAND (stmt, 0);
2614       rhs = GIMPLE_STMT_OPERAND (stmt, 1);
2615
2616       /* Check the operation.  */
2617       if (i == 0)
2618         {
2619           first_stmt_code = TREE_CODE (rhs);
2620
2621           /* Shift arguments should be equal in all the packed stmts for a
2622              vector shift with scalar shift operand.  */
2623           if (TREE_CODE (rhs) == LSHIFT_EXPR || TREE_CODE (rhs) == RSHIFT_EXPR)
2624             {
2625               vec_mode = TYPE_MODE (vectype);
2626               optab = optab_for_tree_code (TREE_CODE (rhs), vectype);
2627               if (!optab)
2628                 {
2629                   if (vect_print_dump_info (REPORT_SLP))
2630                     fprintf (vect_dump, "Build SLP failed: no optab.");
2631                   return false;
2632                 }
2633               icode = (int) optab->handlers[(int) vec_mode].insn_code;
2634               optab_op2_mode = insn_data[icode].operand[2].mode;
2635               if (!VECTOR_MODE_P (optab_op2_mode))
2636                 {
2637                   need_same_oprnds = true;
2638                   first_op1 = TREE_OPERAND (rhs, 1);
2639                 }
2640             }
2641         }
2642       else
2643         {
2644           if (first_stmt_code != TREE_CODE (rhs))
2645             {
2646               if (vect_print_dump_info (REPORT_SLP))
2647                 {
2648                   fprintf (vect_dump,
2649                            "Build SLP failed: different operation in stmt ");
2650                   print_generic_expr (vect_dump, stmt, TDF_SLIM);
2651                 }
2652
2653               return false;
2654             }
2655
2656           if (need_same_oprnds
2657               && !operand_equal_p (first_op1, TREE_OPERAND (rhs, 1), 0))
2658             {
2659               if (vect_print_dump_info (REPORT_SLP))
2660                 {
2661                   fprintf (vect_dump,
2662                            "Build SLP failed: different shift arguments in ");
2663                   print_generic_expr (vect_dump, stmt, TDF_SLIM);
2664                 }
2665
2666               return false;
2667             }
2668         }
2669
2670       /* Strided store or load.  */
2671       if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
2672         {
2673           if (REFERENCE_CLASS_P (lhs))
2674             {
2675               /* Store.  */
2676               if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs,
2677                                                 &def_stmts0, &def_stmts1,
2678                                                 &first_stmt_dt0,
2679                                                 &first_stmt_dt1,
2680                                                 &first_stmt_def0_type,
2681                                                 &first_stmt_def1_type,
2682                                                 &first_stmt_const_oprnd,
2683                                                 ncopies_for_cost))
2684                 return false;
2685             }
2686             else
2687               {
2688                 /* Load.  */
2689                 if (i == 0)
2690                   {
2691                     /* First stmt of the SLP group should be the first load of
2692                        the interleaving loop if data permutation is not
2693                        allowed.  */
2694                     if  (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt)
2695                       {
2696                         /* FORNOW: data permutations are not supported.  */
2697                         if (vect_print_dump_info (REPORT_SLP))
2698                           {
2699                             fprintf (vect_dump, "Build SLP failed: strided "
2700                                      " loads need permutation ");
2701                             print_generic_expr (vect_dump, stmt, TDF_SLIM);
2702                           }
2703
2704                         return false;
2705                       }
2706
2707                     first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
2708                     if (vect_supportable_dr_alignment (first_dr)
2709                         == dr_unaligned_unsupported)
2710                       {
2711                         if (vect_print_dump_info (REPORT_SLP))
2712                           {
2713                             fprintf (vect_dump, "Build SLP failed: unsupported "
2714                                      " unaligned load ");
2715                             print_generic_expr (vect_dump, stmt, TDF_SLIM);
2716                           }
2717
2718                         return false;
2719                       }
2720
2721                     /* Analyze costs (for the first stmt in the group).  */
2722                     vect_model_load_cost (vinfo_for_stmt (stmt),
2723                                           ncopies_for_cost, *node);
2724                   }
2725                 else
2726                   {
2727                     if (DR_GROUP_NEXT_DR (vinfo_for_stmt (prev_stmt)) != stmt)
2728                       {
2729                         /* FORNOW: data permutations are not supported.  */
2730                         if (vect_print_dump_info (REPORT_SLP))
2731                           {
2732                             fprintf (vect_dump, "Build SLP failed: strided "
2733                                      " loads need permutation ");
2734                             print_generic_expr (vect_dump, stmt, TDF_SLIM);
2735                           }
2736                         return false;
2737                       }
2738                   }
2739
2740                 prev_stmt = stmt;
2741
2742                 /* We stop the tree when we reach a group of loads.  */
2743                 stop_recursion = true;
2744                 continue;
2745               }
2746         } /* Strided access.  */
2747       else
2748         {
2749           if (REFERENCE_CLASS_P (rhs))
2750             {
2751               /* Not strided load. */
2752               if (vect_print_dump_info (REPORT_SLP))
2753                 {
2754                   fprintf (vect_dump, "Build SLP failed: not strided load ");
2755                   print_generic_expr (vect_dump, stmt, TDF_SLIM);
2756                 }
2757
2758               /* FORNOW: Not strided loads are not supported.  */
2759               return false;
2760             }
2761
2762           /* Not memory operation.  */
2763           if (!BINARY_CLASS_P (rhs) && !UNARY_CLASS_P (rhs))
2764             {
2765               if (vect_print_dump_info (REPORT_SLP))
2766                 {
2767                   fprintf (vect_dump, "Build SLP failed: operation");
2768                   fprintf (vect_dump, " unsupported ");
2769                   print_generic_expr (vect_dump, stmt, TDF_SLIM);
2770                 }
2771
2772               return false;
2773             }
2774
2775           /* Find the def-stmts.  */
2776           if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs, &def_stmts0,
2777                                             &def_stmts1, &first_stmt_dt0,
2778                                             &first_stmt_dt1,
2779                                             &first_stmt_def0_type,
2780                                             &first_stmt_def1_type,
2781                                             &first_stmt_const_oprnd,
2782                                             ncopies_for_cost))
2783             return false;
2784         }
2785     }
2786
2787   /* Add the costs of the node to the overall instance costs.  */
2788   *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
2789   *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
2790
2791   /* Strided loads were reached - stop the recursion.  */
2792   if (stop_recursion)
2793     return true;
2794
2795   /* Create SLP_TREE nodes for the definition node/s.  */
2796   if (first_stmt_dt0 == vect_loop_def)
2797     {
2798       slp_tree left_node = XNEW (struct _slp_tree);
2799       SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
2800       SLP_TREE_VEC_STMTS (left_node) = NULL;
2801       SLP_TREE_LEFT (left_node) = NULL;
2802       SLP_TREE_RIGHT (left_node) = NULL;
2803       SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
2804       SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
2805       if (!vect_build_slp_tree (loop_vinfo, &left_node, group_size,
2806                                 slp_impossible, inside_cost, outside_cost,
2807                                 ncopies_for_cost))
2808         return false;
2809
2810       SLP_TREE_LEFT (*node) = left_node;
2811     }
2812
2813   if (first_stmt_dt1 == vect_loop_def)
2814     {
2815       slp_tree right_node = XNEW (struct _slp_tree);
2816       SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
2817       SLP_TREE_VEC_STMTS (right_node) = NULL;
2818       SLP_TREE_LEFT (right_node) = NULL;
2819       SLP_TREE_RIGHT (right_node) = NULL;
2820       SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
2821       SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
2822       if (!vect_build_slp_tree (loop_vinfo, &right_node, group_size,
2823                                 slp_impossible, inside_cost, outside_cost,
2824                                 ncopies_for_cost))
2825         return false;
2826
2827       SLP_TREE_RIGHT (*node) = right_node;
2828     }
2829
2830   return true;
2831 }
2832
2833
2834 static void
2835 vect_print_slp_tree (slp_tree node)
2836 {
2837   int i;
2838   tree stmt;
2839
2840   if (!node)
2841     return;
2842
2843   fprintf (vect_dump, "node ");
2844   for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
2845     {
2846       fprintf (vect_dump, "\n\tstmt %d ", i);
2847       print_generic_expr (vect_dump, stmt, TDF_SLIM);
2848     }
2849   fprintf (vect_dump, "\n");
2850
2851   vect_print_slp_tree (SLP_TREE_LEFT (node));
2852   vect_print_slp_tree (SLP_TREE_RIGHT (node));
2853 }
2854
2855
2856 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
2857    If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
2858    J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
2859    stmts in NODE are to be marked.  */
2860
2861 static void
2862 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
2863 {
2864   int i;
2865   tree stmt;
2866
2867   if (!node)
2868     return;
2869
2870   for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
2871     if (j < 0 || i == j)
2872       STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
2873
2874   vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
2875   vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
2876 }
2877
2878
2879 /* Analyze an SLP instance starting from a group of strided stores. Call
2880    vect_build_slp_tree to build a tree of packed stmts if possible.
2881    Return FALSE if it's impossible to SLP any stmt in the loop.  */
2882
2883 static bool
2884 vect_analyze_slp_instance (loop_vec_info loop_vinfo, tree stmt)
2885 {
2886   slp_instance new_instance;
2887   slp_tree node = XNEW (struct _slp_tree);
2888   unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
2889   unsigned int unrolling_factor = 1, nunits;
2890   tree vectype, scalar_type, next;
2891   unsigned int vectorization_factor = 0, ncopies;
2892   bool slp_impossible = false;
2893   int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
2894
2895   /* FORNOW: multiple types are not supported.  */
2896   scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))));
2897   vectype = get_vectype_for_scalar_type (scalar_type);
2898   nunits = TYPE_VECTOR_SUBPARTS (vectype);
2899   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2900   ncopies = vectorization_factor / nunits;
2901   if (ncopies > 1)
2902     {
2903       if (vect_print_dump_info (REPORT_SLP))
2904           fprintf (vect_dump, "SLP failed - multiple types ");
2905
2906       return false;
2907     }
2908
2909   /* Create a node (a root of the SLP tree) for the packed strided stores.  */
2910   SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (tree, heap, group_size);
2911   next = stmt;
2912   /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS.  */
2913   while (next)
2914     {
2915       VEC_safe_push (tree, heap, SLP_TREE_SCALAR_STMTS (node), next);
2916       next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2917     }
2918
2919   SLP_TREE_VEC_STMTS (node) = NULL;
2920   SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
2921   SLP_TREE_LEFT (node) = NULL;
2922   SLP_TREE_RIGHT (node) = NULL;
2923   SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
2924   SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
2925
2926   /* Calculate the unrolling factor.  */
2927   unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
2928
2929   /* Calculate the number of vector stmts to create based on the unrolling
2930      factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
2931      GROUP_SIZE / NUNITS otherwise.  */
2932   ncopies_for_cost = unrolling_factor * group_size / nunits;
2933
2934   /* Build the tree for the SLP instance.  */
2935   if (vect_build_slp_tree (loop_vinfo, &node, group_size, &slp_impossible,
2936                            &inside_cost, &outside_cost, ncopies_for_cost))
2937     {
2938       /* Create a new SLP instance.  */
2939       new_instance = XNEW (struct _slp_instance);
2940       SLP_INSTANCE_TREE (new_instance) = node;
2941       SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
2942       SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
2943       SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
2944       SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
2945       VEC_safe_push (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
2946                      new_instance);
2947       if (vect_print_dump_info (REPORT_SLP))
2948         vect_print_slp_tree (node);
2949
2950       return true;
2951     }
2952
2953   /* Failed to SLP.  */
2954   /* Free the allocated memory.  */
2955   vect_free_slp_tree (node);
2956
2957   if (slp_impossible)
2958     return false;
2959
2960   /* SLP failed for this instance, but it is still possible to SLP other stmts
2961      in the loop.  */
2962   return true;
2963 }
2964
2965
2966 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2967    trees of packed scalar stmts if SLP is possible.  */
2968
2969 static bool
2970 vect_analyze_slp (loop_vec_info loop_vinfo)
2971 {
2972   unsigned int i;
2973   VEC (tree, heap) *strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
2974   tree store;
2975
2976   if (vect_print_dump_info (REPORT_SLP))
2977     fprintf (vect_dump, "=== vect_analyze_slp ===");
2978
2979   for (i = 0; VEC_iterate (tree, strided_stores, i, store); i++)
2980     if (!vect_analyze_slp_instance (loop_vinfo, store))
2981       {
2982         /* SLP failed. No instance can be SLPed in the loop.  */
2983         if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2984           fprintf (vect_dump, "SLP failed.");
2985
2986         return false;
2987       }
2988
2989   return true;
2990 }
2991
2992
2993 /* For each possible SLP instance decide whether to SLP it and calculate overall
2994    unrolling factor needed to SLP the loop.  */
2995
2996 static void
2997 vect_make_slp_decision (loop_vec_info loop_vinfo)
2998 {
2999   unsigned int i, unrolling_factor = 1;
3000   VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3001   slp_instance instance;
3002   int decided_to_slp = 0;
3003
3004   if (vect_print_dump_info (REPORT_SLP))
3005     fprintf (vect_dump, "=== vect_make_slp_decision ===");
3006
3007   for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3008     {
3009       /* FORNOW: SLP if you can.  */
3010       if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
3011         unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
3012
3013       /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
3014          call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
3015          loop-based vectorization. Such stmts will be marked as HYBRID.  */
3016       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
3017       decided_to_slp++;
3018     }
3019
3020   LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
3021
3022   if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
3023     fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
3024              decided_to_slp, unrolling_factor);
3025 }
3026
3027
3028 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
3029    can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID.  */
3030
3031 static void
3032 vect_detect_hybrid_slp_stmts (slp_tree node)
3033 {
3034   int i;
3035   tree stmt;
3036   imm_use_iterator imm_iter;
3037   tree use_stmt;
3038
3039   if (!node)
3040     return;
3041
3042   for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
3043     if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
3044         && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
3045       FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, GIMPLE_STMT_OPERAND (stmt, 0))
3046         if (vinfo_for_stmt (use_stmt)
3047             && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt)))
3048           vect_mark_slp_stmts (node, hybrid, i);
3049
3050   vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
3051   vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
3052 }
3053
3054
3055 /* Find stmts that must be both vectorized and SLPed.  */
3056
3057 static void
3058 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
3059 {
3060   unsigned int i;
3061   VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3062   slp_instance instance;
3063
3064   if (vect_print_dump_info (REPORT_SLP))
3065     fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
3066
3067   for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3068     vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
3069 }
3070
3071
3072 /* Function vect_analyze_data_refs.
3073
3074   Find all the data references in the loop.
3075
3076    The general structure of the analysis of data refs in the vectorizer is as
3077    follows:
3078    1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
3079       find and analyze all data-refs in the loop and their dependences.
3080    2- vect_analyze_dependences(): apply dependence testing using ddrs.
3081    3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
3082    4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
3083
3084 */
3085
3086 static bool
3087 vect_analyze_data_refs (loop_vec_info loop_vinfo)
3088 {
3089   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3090   unsigned int i;
3091   VEC (data_reference_p, heap) *datarefs;
3092   struct data_reference *dr;
3093   tree scalar_type;
3094
3095   if (vect_print_dump_info (REPORT_DETAILS))
3096     fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
3097
3098   compute_data_dependences_for_loop (loop, true,
3099                                      &LOOP_VINFO_DATAREFS (loop_vinfo),
3100                                      &LOOP_VINFO_DDRS (loop_vinfo));
3101
3102   /* Go through the data-refs, check that the analysis succeeded. Update pointer
3103      from stmt_vec_info struct to DR and vectype.  */
3104   datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
3105
3106   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
3107     {
3108       tree stmt;
3109       stmt_vec_info stmt_info;
3110       basic_block bb;
3111       tree base, offset, init;
3112
3113       if (!dr || !DR_REF (dr))
3114         {
3115           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3116             fprintf (vect_dump, "not vectorized: unhandled data-ref ");
3117           return false;
3118         }
3119
3120       stmt = DR_STMT (dr);
3121       stmt_info = vinfo_for_stmt (stmt);
3122
3123       /* Check that analysis of the data-ref succeeded.  */
3124       if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
3125           || !DR_STEP (dr))
3126         {
3127           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3128             {
3129               fprintf (vect_dump, "not vectorized: data ref analysis failed ");
3130               print_generic_expr (vect_dump, stmt, TDF_SLIM);
3131             }
3132           return false;
3133         }
3134
3135       if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3136         {
3137           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3138             fprintf (vect_dump, "not vectorized: base addr of dr is a "
3139                      "constant");
3140           return false;
3141         }
3142
3143       if (!DR_SYMBOL_TAG (dr))
3144         {
3145           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3146             {
3147               fprintf (vect_dump, "not vectorized: no memory tag for ");
3148               print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
3149             }
3150           return false;
3151         }
3152
3153       base = unshare_expr (DR_BASE_ADDRESS (dr));
3154       offset = unshare_expr (DR_OFFSET (dr));
3155       init = unshare_expr (DR_INIT (dr));
3156
3157       /* Update DR field in stmt_vec_info struct.  */
3158       bb = bb_for_stmt (stmt);
3159
3160       /* If the dataref is in an inner-loop of the loop that is considered for
3161          for vectorization, we also want to analyze the access relative to
3162          the outer-loop (DR contains information only relative to the
3163          inner-most enclosing loop).  We do that by building a reference to the
3164          first location accessed by the inner-loop, and analyze it relative to
3165          the outer-loop.  */
3166       if (nested_in_vect_loop_p (loop, stmt))
3167         {
3168           tree outer_step, outer_base, outer_init;
3169           HOST_WIDE_INT pbitsize, pbitpos;
3170           tree poffset;
3171           enum machine_mode pmode;
3172           int punsignedp, pvolatilep;
3173           affine_iv base_iv, offset_iv;
3174           tree dinit;
3175
3176           /* Build a reference to the first location accessed by the
3177              inner-loop: *(BASE+INIT). (The first location is actually
3178              BASE+INIT+OFFSET, but we add OFFSET separately later.  */
3179           tree inner_base = build_fold_indirect_ref
3180                                 (fold_build2 (PLUS_EXPR, TREE_TYPE (base), base, init));
3181
3182           if (vect_print_dump_info (REPORT_DETAILS))
3183             {
3184               fprintf (dump_file, "analyze in outer-loop: ");
3185               print_generic_expr (dump_file, inner_base, TDF_SLIM);
3186             }
3187
3188           outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
3189                           &poffset, &pmode, &punsignedp, &pvolatilep, false);
3190           gcc_assert (outer_base != NULL_TREE);
3191
3192           if (pbitpos % BITS_PER_UNIT != 0)
3193             {
3194               if (vect_print_dump_info (REPORT_DETAILS))
3195                 fprintf (dump_file, "failed: bit offset alignment.\n");
3196               return false;
3197             }
3198
3199           outer_base = build_fold_addr_expr (outer_base);
3200           if (!simple_iv (loop, stmt, outer_base, &base_iv, false))
3201             {
3202               if (vect_print_dump_info (REPORT_DETAILS))
3203                 fprintf (dump_file, "failed: evolution of base is not affine.\n");
3204               return false;
3205             }
3206
3207           if (offset)
3208             {
3209               if (poffset)
3210                 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, poffset);
3211               else
3212                 poffset = offset;
3213             }
3214
3215           if (!poffset)
3216             {
3217               offset_iv.base = ssize_int (0);
3218               offset_iv.step = ssize_int (0);
3219             }
3220           else if (!simple_iv (loop, stmt, poffset, &offset_iv, false))
3221             {
3222               if (vect_print_dump_info (REPORT_DETAILS))
3223                 fprintf (dump_file, "evolution of offset is not affine.\n");
3224               return false;
3225             }
3226
3227           outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3228           split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3229           outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);
3230           split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3231           outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);
3232
3233           outer_step = size_binop (PLUS_EXPR,
3234                                 fold_convert (ssizetype, base_iv.step),
3235                                 fold_convert (ssizetype, offset_iv.step));
3236
3237           STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3238           /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3239           STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3240           STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3241           STMT_VINFO_DR_OFFSET (stmt_info) =
3242                                 fold_convert (ssizetype, offset_iv.base);
3243           STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3244                                 size_int (highest_pow2_factor (offset_iv.base));
3245
3246           if (dump_file && (dump_flags & TDF_DETAILS))
3247             {
3248               fprintf (dump_file, "\touter base_address: ");
3249               print_generic_expr (dump_file, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
3250               fprintf (dump_file, "\n\touter offset from base address: ");
3251               print_generic_expr (dump_file, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
3252               fprintf (dump_file, "\n\touter constant offset from base address: ");
3253               print_generic_expr (dump_file, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
3254               fprintf (dump_file, "\n\touter step: ");
3255               print_generic_expr (dump_file, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
3256               fprintf (dump_file, "\n\touter aligned to: ");
3257               print_generic_expr (dump_file, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
3258             }
3259         }
3260
3261       if (STMT_VINFO_DATA_REF (stmt_info))
3262         {
3263           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3264             {
3265               fprintf (vect_dump,
3266                        "not vectorized: more than one data ref in stmt: ");
3267               print_generic_expr (vect_dump, stmt, TDF_SLIM);
3268             }
3269           return false;
3270         }
3271       STMT_VINFO_DATA_REF (stmt_info) = dr;
3272
3273       /* Set vectype for STMT.  */
3274       scalar_type = TREE_TYPE (DR_REF (dr));
3275       STMT_VINFO_VECTYPE (stmt_info) =
3276                 get_vectype_for_scalar_type (scalar_type);
3277       if (!STMT_VINFO_VECTYPE (stmt_info))
3278         {
3279           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3280             {
3281               fprintf (vect_dump,
3282                        "not vectorized: no vectype for stmt: ");
3283               print_generic_expr (vect_dump, stmt, TDF_SLIM);
3284               fprintf (vect_dump, " scalar_type: ");
3285               print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
3286             }
3287           return false;
3288         }
3289     }
3290
3291   return true;
3292 }
3293
3294
3295 /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
3296
3297 /* Function vect_mark_relevant.
3298
3299    Mark STMT as "relevant for vectorization" and add it to WORKLIST.  */
3300
3301 static void
3302 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
3303                     enum vect_relevant relevant, bool live_p)
3304 {
3305   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3306   enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3307   bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3308
3309   if (vect_print_dump_info (REPORT_DETAILS))
3310     fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
3311
3312   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
3313     {
3314       tree pattern_stmt;
3315
3316       /* This is the last stmt in a sequence that was detected as a
3317          pattern that can potentially be vectorized.  Don't mark the stmt
3318          as relevant/live because it's not going to be vectorized.
3319          Instead mark the pattern-stmt that replaces it.  */
3320
3321       pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3322
3323       if (vect_print_dump_info (REPORT_DETAILS))
3324         fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
3325       stmt_info = vinfo_for_stmt (pattern_stmt);
3326       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
3327       save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3328       save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3329       stmt = pattern_stmt;
3330     }
3331
3332   STMT_VINFO_LIVE_P (stmt_info) |= live_p;
3333   if (relevant > STMT_VINFO_RELEVANT (stmt_info))
3334     STMT_VINFO_RELEVANT (stmt_info) = relevant;
3335
3336   if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
3337       && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
3338     {
3339       if (vect_print_dump_info (REPORT_DETAILS))
3340         fprintf (vect_dump, "already marked relevant/live.");
3341       return;
3342     }
3343
3344   VEC_safe_push (tree, heap, *worklist, stmt);
3345 }
3346
3347
3348 /* Function vect_stmt_relevant_p.
3349
3350    Return true if STMT in loop that is represented by LOOP_VINFO is
3351    "relevant for vectorization".
3352
3353    A stmt is considered "relevant for vectorization" if:
3354    - it has uses outside the loop.
3355    - it has vdefs (it alters memory).
3356    - control stmts in the loop (except for the exit condition).
3357
3358    CHECKME: what other side effects would the vectorizer allow?  */
3359
3360 static bool
3361 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
3362                       enum vect_relevant *relevant, bool *live_p)
3363 {
3364   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3365   ssa_op_iter op_iter;
3366   imm_use_iterator imm_iter;
3367   use_operand_p use_p;
3368   def_operand_p def_p;
3369
3370   *relevant = vect_unused_in_loop;
3371   *live_p = false;
3372
3373   /* cond stmt other than loop exit cond.  */
3374   if (is_ctrl_stmt (stmt)
3375       && STMT_VINFO_TYPE (vinfo_for_stmt (stmt)) != loop_exit_ctrl_vec_info_type)
3376     *relevant = vect_used_in_loop;
3377
3378   /* changing memory.  */
3379   if (TREE_CODE (stmt) != PHI_NODE)
3380     if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
3381       {
3382         if (vect_print_dump_info (REPORT_DETAILS))
3383           fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
3384         *relevant = vect_used_in_loop;
3385       }
3386
3387   /* uses outside the loop.  */
3388   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
3389     {
3390       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
3391         {
3392           basic_block bb = bb_for_stmt (USE_STMT (use_p));
3393           if (!flow_bb_inside_loop_p (loop, bb))
3394             {
3395               if (vect_print_dump_info (REPORT_DETAILS))
3396                 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
3397
3398               /* We expect all such uses to be in the loop exit phis
3399                  (because of loop closed form)   */
3400               gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
3401               gcc_assert (bb == single_exit (loop)->dest);
3402
3403               *live_p = true;
3404             }
3405         }
3406     }
3407
3408   return (*live_p || *relevant);
3409 }
3410
3411
3412 /*
3413    Function process_use.
3414
3415    Inputs:
3416    - a USE in STMT in a loop represented by LOOP_VINFO
3417    - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
3418      that defined USE. This is dont by calling mark_relevant and passing it
3419      the WORKLIST (to add DEF_STMT to the WORKlist in case itis relevant).
3420
3421    Outputs:
3422    Generally, LIVE_P and RELEVANT are used to define the liveness and
3423    relevance info of the DEF_STMT of this USE:
3424        STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
3425        STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
3426    Exceptions:
3427    - case 1: If USE is used only for address computations (e.g. array indexing),
3428    which does not need to be directly vectorized, then the liveness/relevance
3429    of the respective DEF_STMT is left unchanged.
3430    - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
3431    skip DEF_STMT cause it had already been processed.
3432    - case 3: If DEF_STMT and STMT are in different nests, then  "relevant" will
3433    be modified accordingly.
3434
3435    Return true if everything is as expected. Return false otherwise.  */
3436
3437 static bool
3438 process_use (tree stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
3439              enum vect_relevant relevant, VEC(tree,heap) **worklist)
3440 {
3441   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3442   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3443   stmt_vec_info dstmt_vinfo;
3444   basic_block bb, def_bb;
3445   tree def, def_stmt;
3446   enum vect_def_type dt;
3447
3448   /* case 1: we are only interested in uses that need to be vectorized.  Uses
3449      that are used for address computation are not considered relevant.  */
3450   if (!exist_non_indexing_operands_for_use_p (use, stmt))
3451      return true;
3452
3453   if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
3454     {
3455       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3456         fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
3457       return false;
3458     }
3459
3460   if (!def_stmt || IS_EMPTY_STMT (def_stmt))
3461     return true;
3462
3463   def_bb = bb_for_stmt (def_stmt);
3464   if (!flow_bb_inside_loop_p (loop, def_bb))
3465     {
3466       if (vect_print_dump_info (REPORT_DETAILS))
3467         fprintf (vect_dump, "def_stmt is out of loop.");
3468       return true;
3469     }
3470
3471   /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
3472      DEF_STMT must have already been processed, because this should be the
3473      only way that STMT, which is a reduction-phi, was put in the worklist,
3474      as there should be no other uses for DEF_STMT in the loop.  So we just
3475      check that everything is as expected, and we are done.  */
3476   dstmt_vinfo = vinfo_for_stmt (def_stmt);
3477   bb = bb_for_stmt (stmt);
3478   if (TREE_CODE (stmt) == PHI_NODE
3479       && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
3480       && TREE_CODE (def_stmt) != PHI_NODE
3481       && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
3482       && bb->loop_father == def_bb->loop_father)
3483     {
3484       if (vect_print_dump_info (REPORT_DETAILS))
3485         fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
3486       if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
3487         dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
3488       gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
3489       gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
3490                   || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_loop);
3491       return true;
3492     }
3493
3494   /* case 3a: outer-loop stmt defining an inner-loop stmt:
3495         outer-loop-header-bb:
3496                 d = def_stmt
3497         inner-loop:
3498                 stmt # use (d)
3499         outer-loop-tail-bb:
3500                 ...               */
3501   if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
3502     {
3503       if (vect_print_dump_info (REPORT_DETAILS))
3504         fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
3505       switch (relevant)
3506         {
3507         case vect_unused_in_loop:
3508           relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3509                         vect_used_by_reduction : vect_unused_in_loop;
3510           break;
3511         case vect_used_in_outer_by_reduction:
3512           relevant = vect_used_by_reduction;
3513           break;
3514         case vect_used_in_outer:
3515           relevant = vect_used_in_loop;
3516           break;
3517         case vect_used_by_reduction:
3518         case vect_used_in_loop:
3519           break;
3520
3521         default:
3522           gcc_unreachable ();
3523         }
3524     }
3525
3526   /* case 3b: inner-loop stmt defining an outer-loop stmt:
3527         outer-loop-header-bb:
3528                 ...
3529         inner-loop:
3530                 d = def_stmt
3531         outer-loop-tail-bb:
3532                 stmt # use (d)          */
3533   else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
3534     {
3535       if (vect_print_dump_info (REPORT_DETAILS))
3536         fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
3537       switch (relevant)
3538         {
3539         case vect_unused_in_loop:
3540           relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3541                         vect_used_in_outer_by_reduction : vect_unused_in_loop;
3542           break;
3543
3544         case vect_used_in_outer_by_reduction:
3545         case vect_used_in_outer:
3546           break;
3547
3548         case vect_used_by_reduction:
3549           relevant = vect_used_in_outer_by_reduction;
3550           break;
3551
3552         case vect_used_in_loop:
3553           relevant = vect_used_in_outer;
3554           break;
3555
3556         default:
3557           gcc_unreachable ();
3558         }
3559     }
3560
3561   vect_mark_relevant (worklist, def_stmt, relevant, live_p);
3562   return true;
3563 }
3564
3565
3566 /* Function vect_mark_stmts_to_be_vectorized.
3567
3568    Not all stmts in the loop need to be vectorized. For example:
3569
3570      for i...
3571        for j...
3572    1.    T0 = i + j
3573    2.    T1 = a[T0]
3574
3575    3.    j = j + 1
3576
3577    Stmt 1 and 3 do not need to be vectorized, because loop control and
3578    addressing of vectorized data-refs are handled differently.
3579
3580    This pass detects such stmts.  */
3581
3582 static bool
3583 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
3584 {
3585   VEC(tree,heap) *worklist;
3586   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3587   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3588   unsigned int nbbs = loop->num_nodes;
3589   block_stmt_iterator si;
3590   tree stmt;
3591   stmt_ann_t ann;
3592   unsigned int i;
3593   stmt_vec_info stmt_vinfo;
3594   basic_block bb;
3595   tree phi;
3596   bool live_p;
3597   enum vect_relevant relevant;
3598
3599   if (vect_print_dump_info (REPORT_DETAILS))
3600     fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
3601
3602   worklist = VEC_alloc (tree, heap, 64);
3603
3604   /* 1. Init worklist.  */
3605   for (i = 0; i < nbbs; i++)
3606     {
3607       bb = bbs[i];
3608       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3609         {
3610           if (vect_print_dump_info (REPORT_DETAILS))
3611             {
3612               fprintf (vect_dump, "init: phi relevant? ");
3613               print_generic_expr (vect_dump, phi, TDF_SLIM);
3614             }
3615
3616           if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
3617             vect_mark_relevant (&worklist, phi, relevant, live_p);
3618         }
3619       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3620         {
3621           stmt = bsi_stmt (si);
3622           if (vect_print_dump_info (REPORT_DETAILS))
3623             {
3624               fprintf (vect_dump, "init: stmt relevant? ");
3625               print_generic_expr (vect_dump, stmt, TDF_SLIM);
3626             }
3627
3628           if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
3629             vect_mark_relevant (&worklist, stmt, relevant, live_p);
3630         }
3631     }
3632
3633   /* 2. Process_worklist */
3634   while (VEC_length (tree, worklist) > 0)
3635     {
3636       use_operand_p use_p;
3637       ssa_op_iter iter;
3638
3639       stmt = VEC_pop (tree, worklist);
3640       if (vect_print_dump_info (REPORT_DETAILS))
3641         {
3642           fprintf (vect_dump, "worklist: examine stmt: ");
3643           print_generic_expr (vect_dump, stmt, TDF_SLIM);
3644         }
3645
3646       /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
3647          (DEF_STMT) as relevant/irrelevant and live/dead according to the
3648          liveness and relevance properties of STMT.  */
3649       ann = stmt_ann (stmt);
3650       stmt_vinfo = vinfo_for_stmt (stmt);
3651       relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
3652       live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
3653
3654       /* Generally, the liveness and relevance properties of STMT are
3655          propagated as is to the DEF_STMTs of its USEs:
3656           live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
3657           relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
3658
3659          One exception is when STMT has been identified as defining a reduction
3660          variable; in this case we set the liveness/relevance as follows:
3661            live_p = false
3662            relevant = vect_used_by_reduction
3663          This is because we distinguish between two kinds of relevant stmts -
3664          those that are used by a reduction computation, and those that are
3665          (also) used by a regular computation. This allows us later on to
3666          identify stmts that are used solely by a reduction, and therefore the
3667          order of the results that they produce does not have to be kept.
3668
3669          Reduction phis are expected to be used by a reduction stmt, or by
3670          in an outer loop;  Other reduction stmts are expected to be
3671          in the loop, and possibly used by a stmt in an outer loop.
3672          Here are the expected values of "relevant" for reduction phis/stmts:
3673
3674          relevance:                             phi     stmt
3675          vect_unused_in_loop                            ok
3676          vect_used_in_outer_by_reduction        ok      ok
3677          vect_used_in_outer                     ok      ok
3678          vect_used_by_reduction                 ok
3679          vect_used_in_loop                                                */
3680
3681       if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
3682         {
3683           enum vect_relevant tmp_relevant = relevant;
3684           switch (tmp_relevant)
3685             {
3686             case vect_unused_in_loop:
3687               gcc_assert (TREE_CODE (stmt) != PHI_NODE);
3688               relevant = vect_used_by_reduction;
3689               break;
3690
3691             case vect_used_in_outer_by_reduction:
3692             case vect_used_in_outer:
3693               gcc_assert (TREE_CODE (stmt) != WIDEN_SUM_EXPR
3694                           && TREE_CODE (stmt) != DOT_PROD_EXPR);
3695               break;
3696
3697             case vect_used_by_reduction:
3698               if (TREE_CODE (stmt) == PHI_NODE)
3699                 break;
3700               /* fall through */
3701             case vect_used_in_loop:
3702             default:
3703               if (vect_print_dump_info (REPORT_DETAILS))
3704                 fprintf (vect_dump, "unsupported use of reduction.");
3705               VEC_free (tree, heap, worklist);
3706               return false;
3707             }
3708           live_p = false;
3709         }
3710
3711       FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3712         {
3713           tree op = USE_FROM_PTR (use_p);
3714           if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
3715             {
3716               VEC_free (tree, heap, worklist);
3717               return false;
3718             }
3719         }
3720     } /* while worklist */
3721
3722   VEC_free (tree, heap, worklist);
3723   return true;
3724 }
3725
3726
3727 /* Function vect_can_advance_ivs_p
3728
3729    In case the number of iterations that LOOP iterates is unknown at compile
3730    time, an epilog loop will be generated, and the loop induction variables
3731    (IVs) will be "advanced" to the value they are supposed to take just before
3732    the epilog loop.  Here we check that the access function of the loop IVs
3733    and the expression that represents the loop bound are simple enough.
3734    These restrictions will be relaxed in the future.  */
3735
3736 static bool
3737 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
3738 {
3739   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3740   basic_block bb = loop->header;
3741   tree phi;
3742
3743   /* Analyze phi functions of the loop header.  */
3744
3745   if (vect_print_dump_info (REPORT_DETAILS))
3746     fprintf (vect_dump, "vect_can_advance_ivs_p:");
3747
3748   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3749     {
3750       tree access_fn = NULL;
3751       tree evolution_part;
3752
3753       if (vect_print_dump_info (REPORT_DETAILS))
3754         {
3755           fprintf (vect_dump, "Analyze phi: ");
3756           print_generic_expr (vect_dump, phi, TDF_SLIM);
3757         }
3758
3759       /* Skip virtual phi's. The data dependences that are associated with
3760          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
3761
3762       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3763         {
3764           if (vect_print_dump_info (REPORT_DETAILS))
3765             fprintf (vect_dump, "virtual phi. skip.");
3766           continue;
3767         }
3768
3769       /* Skip reduction phis.  */
3770
3771       if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
3772         {
3773           if (vect_print_dump_info (REPORT_DETAILS))
3774             fprintf (vect_dump, "reduc phi. skip.");
3775           continue;
3776         }
3777
3778       /* Analyze the evolution function.  */
3779
3780       access_fn = instantiate_parameters
3781         (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
3782
3783       if (!access_fn)
3784         {
3785           if (vect_print_dump_info (REPORT_DETAILS))
3786             fprintf (vect_dump, "No Access function.");
3787           return false;
3788         }
3789
3790       if (vect_print_dump_info (REPORT_DETAILS))
3791         {
3792           fprintf (vect_dump, "Access function of PHI: ");
3793           print_generic_expr (vect_dump, access_fn, TDF_SLIM);
3794         }
3795
3796       evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
3797
3798       if (evolution_part == NULL_TREE)
3799         {
3800           if (vect_print_dump_info (REPORT_DETAILS))
3801             fprintf (vect_dump, "No evolution.");
3802           return false;
3803         }
3804
3805       /* FORNOW: We do not transform initial conditions of IVs
3806          which evolution functions are a polynomial of degree >= 2.  */
3807
3808       if (tree_is_chrec (evolution_part))
3809         return false;
3810     }
3811
3812   return true;
3813 }
3814
3815
3816 /* Function vect_get_loop_niters.
3817
3818    Determine how many iterations the loop is executed.
3819    If an expression that represents the number of iterations
3820    can be constructed, place it in NUMBER_OF_ITERATIONS.
3821    Return the loop exit condition.  */
3822
3823 static tree
3824 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
3825 {
3826   tree niters;
3827
3828   if (vect_print_dump_info (REPORT_DETAILS))
3829     fprintf (vect_dump, "=== get_loop_niters ===");
3830
3831   niters = number_of_exit_cond_executions (loop);
3832
3833   if (niters != NULL_TREE
3834       && niters != chrec_dont_know)
3835     {
3836       *number_of_iterations = niters;
3837
3838       if (vect_print_dump_info (REPORT_DETAILS))
3839         {
3840           fprintf (vect_dump, "==> get_loop_niters:" );
3841           print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
3842         }
3843     }
3844
3845   return get_loop_exit_condition (loop);
3846 }
3847
3848
3849 /* Function vect_analyze_loop_1.
3850
3851    Apply a set of analyses on LOOP, and create a loop_vec_info struct
3852    for it. The different analyses will record information in the
3853    loop_vec_info struct.  This is a subset of the analyses applied in
3854    vect_analyze_loop, to be applied on an inner-loop nested in the loop
3855    that is now considered for (outer-loop) vectorization.  */
3856
3857 static loop_vec_info
3858 vect_analyze_loop_1 (struct loop *loop)
3859 {
3860   loop_vec_info loop_vinfo;
3861
3862   if (vect_print_dump_info (REPORT_DETAILS))
3863     fprintf (vect_dump, "===== analyze_loop_nest_1 =====");
3864
3865   /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
3866
3867   loop_vinfo = vect_analyze_loop_form (loop);
3868   if (!loop_vinfo)
3869     {
3870       if (vect_print_dump_info (REPORT_DETAILS))
3871         fprintf (vect_dump, "bad inner-loop form.");
3872       return NULL;
3873     }
3874
3875   return loop_vinfo;
3876 }
3877
3878
3879 /* Function vect_analyze_loop_form.
3880
3881    Verify that certain CFG restrictions hold, including:
3882    - the loop has a pre-header
3883    - the loop has a single entry and exit
3884    - the loop exit condition is simple enough, and the number of iterations
3885      can be analyzed (a countable loop).  */
3886
3887 static loop_vec_info
3888 vect_analyze_loop_form (struct loop *loop)
3889 {
3890   loop_vec_info loop_vinfo;
3891   tree loop_cond;
3892   tree number_of_iterations = NULL;
3893   loop_vec_info inner_loop_vinfo = NULL;
3894
3895   if (vect_print_dump_info (REPORT_DETAILS))
3896     fprintf (vect_dump, "=== vect_analyze_loop_form ===");
3897
3898   /* Different restrictions apply when we are considering an inner-most loop,
3899      vs. an outer (nested) loop.
3900      (FORNOW. May want to relax some of these restrictions in the future).  */
3901
3902   if (!loop->inner)
3903     {
3904       /* Inner-most loop.  We currently require that the number of BBs is
3905          exactly 2 (the header and latch).  Vectorizable inner-most loops
3906          look like this:
3907
3908                         (pre-header)
3909                            |
3910                           header <--------+
3911                            | |            |
3912                            | +--> latch --+
3913                            |
3914                         (exit-bb)  */
3915
3916       if (loop->num_nodes != 2)
3917         {
3918           if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3919             fprintf (vect_dump, "not vectorized: too many BBs in loop.");
3920           return NULL;
3921         }
3922
3923       if (empty_block_p (loop->header))
3924     {
3925           if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3926             fprintf (vect_dump, "not vectorized: empty loop.");
3927       return NULL;
3928     }
3929     }
3930   else
3931     {
3932       struct loop *innerloop = loop->inner;
3933       edge backedge, entryedge;
3934
3935       /* Nested loop. We currently require that the loop is doubly-nested,
3936          contains a single inner loop, and the number of BBs is exactly 5.
3937          Vectorizable outer-loops look like this:
3938
3939                         (pre-header)
3940                            |
3941                           header <---+
3942                            |         |
3943                           inner-loop |
3944                            |         |
3945                           tail ------+
3946                            |
3947                         (exit-bb)
3948
3949          The inner-loop has the properties expected of inner-most loops
3950          as described above.  */
3951
3952       if ((loop->inner)->inner || (loop->inner)->next)
3953         {
3954           if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3955             fprintf (vect_dump, "not vectorized: multiple nested loops.");
3956           return NULL;
3957         }
3958
3959       /* Analyze the inner-loop.  */
3960       inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
3961       if (!inner_loop_vinfo)
3962         {
3963           if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3964             fprintf (vect_dump, "not vectorized: Bad inner loop.");
3965           return NULL;
3966         }
3967
3968       if (!expr_invariant_in_loop_p (loop,
3969                                         LOOP_VINFO_NITERS (inner_loop_vinfo)))
3970         {
3971           if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3972             fprintf (vect_dump,
3973                      "not vectorized: inner-loop count not invariant.");
3974           destroy_loop_vec_info (inner_loop_vinfo, true);
3975           return NULL;
3976         }
3977
3978       if (loop->num_nodes != 5)
3979         {
3980           if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3981             fprintf (vect_dump, "not vectorized: too many BBs in loop.");
3982           destroy_loop_vec_info (inner_loop_vinfo, true);
3983           return NULL;
3984         }
3985
3986       gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
3987       backedge = EDGE_PRED (innerloop->header, 1);
3988       entryedge = EDGE_PRED (innerloop->header, 0);
3989       if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
3990         {
3991           backedge = EDGE_PRED (innerloop->header, 0);
3992           entryedge = EDGE_PRED (innerloop->header, 1);
3993         }
3994
3995       if (entryedge->src != loop->header
3996           || !single_exit (innerloop)
3997           || single_exit (innerloop)->dest !=  EDGE_PRED (loop->latch, 0)->src)
3998         {
3999           if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4000             fprintf (vect_dump, "not vectorized: unsupported outerloop form.");
4001           destroy_loop_vec_info (inner_loop_vinfo, true);
4002           return NULL;
4003         }
4004
4005       if (vect_print_dump_info (REPORT_DETAILS))
4006         fprintf (vect_dump, "Considering outer-loop vectorization.");
4007     }
4008
4009   if (!single_exit (loop)
4010       || EDGE_COUNT (loop->header->preds) != 2)
4011     {
4012       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4013         {
4014           if (!single_exit (loop))
4015             fprintf (vect_dump, "not vectorized: multiple exits.");
4016           else if (EDGE_COUNT (loop->header->preds) != 2)
4017             fprintf (vect_dump, "not vectorized: too many incoming edges.");
4018         }
4019       if (inner_loop_vinfo)
4020         destroy_loop_vec_info (inner_loop_vinfo, true);
4021       return NULL;
4022     }
4023
4024   /* We assume that the loop exit condition is at the end of the loop. i.e,
4025      that the loop is represented as a do-while (with a proper if-guard
4026      before the loop if needed), where the loop header contains all the
4027      executable statements, and the latch is empty.  */
4028   if (!empty_block_p (loop->latch)
4029         || phi_nodes (loop->latch))
4030     {
4031       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4032         fprintf (vect_dump, "not vectorized: unexpected loop form.");
4033       if (inner_loop_vinfo)
4034         destroy_loop_vec_info (inner_loop_vinfo, true);
4035       return NULL;
4036     }
4037
4038   /* Make sure there exists a single-predecessor exit bb:  */
4039   if (!single_pred_p (single_exit (loop)->dest))
4040     {
4041       edge e = single_exit (loop);
4042       if (!(e->flags & EDGE_ABNORMAL))
4043         {
4044           split_loop_exit_edge (e);
4045           if (vect_print_dump_info (REPORT_DETAILS))
4046             fprintf (vect_dump, "split exit edge.");
4047         }
4048       else
4049         {
4050           if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4051             fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
4052           if (inner_loop_vinfo)
4053             destroy_loop_vec_info (inner_loop_vinfo, true);
4054           return NULL;
4055         }
4056     }
4057
4058   loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
4059   if (!loop_cond)
4060     {
4061       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4062         fprintf (vect_dump, "not vectorized: complicated exit condition.");
4063       if (inner_loop_vinfo)
4064         destroy_loop_vec_info (inner_loop_vinfo, true);
4065       return NULL;
4066     }
4067
4068   if (!number_of_iterations)
4069     {
4070       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4071         fprintf (vect_dump,
4072                  "not vectorized: number of iterations cannot be computed.");
4073       if (inner_loop_vinfo)
4074         destroy_loop_vec_info (inner_loop_vinfo, true);
4075       return NULL;
4076     }
4077
4078   if (chrec_contains_undetermined (number_of_iterations))
4079     {
4080       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4081         fprintf (vect_dump, "Infinite number of iterations.");
4082       if (inner_loop_vinfo)
4083         destroy_loop_vec_info (inner_loop_vinfo, true);
4084       return NULL;
4085     }
4086
4087   if (!NITERS_KNOWN_P (number_of_iterations))
4088     {
4089       if (vect_print_dump_info (REPORT_DETAILS))
4090         {
4091           fprintf (vect_dump, "Symbolic number of iterations is ");
4092           print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
4093         }
4094     }
4095   else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
4096     {
4097       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
4098         fprintf (vect_dump, "not vectorized: number of iterations = 0.");
4099       if (inner_loop_vinfo)
4100         destroy_loop_vec_info (inner_loop_vinfo, false);
4101       return NULL;
4102     }
4103
4104   loop_vinfo = new_loop_vec_info (loop);
4105   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
4106
4107   STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
4108
4109   /* CHECKME: May want to keep it around it in the future.  */
4110   if (inner_loop_vinfo)
4111     destroy_loop_vec_info (inner_loop_vinfo, false);
4112
4113   gcc_assert (!loop->aux);
4114   loop->aux = loop_vinfo;
4115   return loop_vinfo;
4116 }
4117
4118
4119 /* Function vect_analyze_loop.
4120
4121    Apply a set of analyses on LOOP, and create a loop_vec_info struct
4122    for it. The different analyses will record information in the
4123    loop_vec_info struct.  */
4124 loop_vec_info
4125 vect_analyze_loop (struct loop *loop)
4126 {
4127   bool ok;
4128   loop_vec_info loop_vinfo;
4129
4130   if (vect_print_dump_info (REPORT_DETAILS))
4131     fprintf (vect_dump, "===== analyze_loop_nest =====");
4132
4133   if (loop_outer (loop)
4134       && loop_vec_info_for_loop (loop_outer (loop))
4135       && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
4136     {
4137       if (vect_print_dump_info (REPORT_DETAILS))
4138         fprintf (vect_dump, "outer-loop already vectorized.");
4139       return NULL;
4140     }
4141
4142   /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
4143
4144   loop_vinfo = vect_analyze_loop_form (loop);
4145   if (!loop_vinfo)
4146     {
4147       if (vect_print_dump_info (REPORT_DETAILS))
4148         fprintf (vect_dump, "bad loop form.");
4149       return NULL;
4150     }
4151
4152   /* Find all data references in the loop (which correspond to vdefs/vuses)
4153      and analyze their evolution in the loop.
4154
4155      FORNOW: Handle only simple, array references, which
4156      alignment can be forced, and aligned pointer-references.  */
4157
4158   ok = vect_analyze_data_refs (loop_vinfo);
4159   if (!ok)
4160     {
4161       if (vect_print_dump_info (REPORT_DETAILS))
4162         fprintf (vect_dump, "bad data references.");
4163       destroy_loop_vec_info (loop_vinfo, true);
4164       return NULL;
4165     }
4166
4167   /* Classify all cross-iteration scalar data-flow cycles.
4168      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
4169
4170   vect_analyze_scalar_cycles (loop_vinfo);
4171
4172   vect_pattern_recog (loop_vinfo);
4173
4174   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
4175
4176   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
4177   if (!ok)
4178     {
4179       if (vect_print_dump_info (REPORT_DETAILS))
4180         fprintf (vect_dump, "unexpected pattern.");
4181       destroy_loop_vec_info (loop_vinfo, true);
4182       return NULL;
4183     }
4184
4185   /* Analyze the alignment of the data-refs in the loop.
4186      Fail if a data reference is found that cannot be vectorized.  */
4187
4188   ok = vect_analyze_data_refs_alignment (loop_vinfo);
4189   if (!ok)
4190     {
4191       if (vect_print_dump_info (REPORT_DETAILS))
4192         fprintf (vect_dump, "bad data alignment.");
4193       destroy_loop_vec_info (loop_vinfo, true);
4194       return NULL;
4195     }
4196
4197   ok = vect_determine_vectorization_factor (loop_vinfo);
4198   if (!ok)
4199     {
4200       if (vect_print_dump_info (REPORT_DETAILS))
4201         fprintf (vect_dump, "can't determine vectorization factor.");
4202       destroy_loop_vec_info (loop_vinfo, true);
4203       return NULL;
4204     }
4205
4206   /* Analyze data dependences between the data-refs in the loop.
4207      FORNOW: fail at the first data dependence that we encounter.  */
4208
4209   ok = vect_analyze_data_ref_dependences (loop_vinfo);
4210   if (!ok)
4211     {
4212       if (vect_print_dump_info (REPORT_DETAILS))
4213         fprintf (vect_dump, "bad data dependence.");
4214       destroy_loop_vec_info (loop_vinfo, true);
4215       return NULL;
4216     }
4217
4218   /* Analyze the access patterns of the data-refs in the loop (consecutive,
4219      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
4220
4221   ok = vect_analyze_data_ref_accesses (loop_vinfo);
4222   if (!ok)
4223     {
4224       if (vect_print_dump_info (REPORT_DETAILS))
4225         fprintf (vect_dump, "bad data access.");
4226       destroy_loop_vec_info (loop_vinfo, true);
4227       return NULL;
4228     }
4229
4230   /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
4231   ok = vect_analyze_slp (loop_vinfo);
4232   if (ok)
4233     {
4234       /* Decide which possible SLP instances to SLP.  */
4235       vect_make_slp_decision (loop_vinfo);
4236
4237       /* Find stmts that need to be both vectorized and SLPed.  */
4238       vect_detect_hybrid_slp (loop_vinfo);
4239     }
4240
4241   /* This pass will decide on using loop versioning and/or loop peeling in
4242      order to enhance the alignment of data references in the loop.  */
4243
4244   ok = vect_enhance_data_refs_alignment (loop_vinfo);
4245   if (!ok)
4246     {
4247       if (vect_print_dump_info (REPORT_DETAILS))
4248         fprintf (vect_dump, "bad data alignment.");
4249       destroy_loop_vec_info (loop_vinfo, true);
4250       return NULL;
4251     }
4252
4253   /* Scan all the operations in the loop and make sure they are
4254      vectorizable.  */
4255
4256   ok = vect_analyze_operations (loop_vinfo);
4257   if (!ok)
4258     {
4259       if (vect_print_dump_info (REPORT_DETAILS))
4260         fprintf (vect_dump, "bad operation or unsupported loop bound.");
4261       destroy_loop_vec_info (loop_vinfo, true);
4262       return NULL;
4263     }
4264
4265   LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
4266
4267   return loop_vinfo;
4268 }