gcc/tree-vect-data-refs.c

   1 /* Data References Analysis and Manipulation Utilities for Vectorization.
   2    Copyright (C) 2003-2018 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4    and Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "target.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "gimple.h"
  30 #include "predict.h"
  31 #include "memmodel.h"
  32 #include "tm_p.h"
  33 #include "ssa.h"
  34 #include "optabs-tree.h"
  35 #include "cgraph.h"
  36 #include "dumpfile.h"
  37 #include "alias.h"
  38 #include "fold-const.h"
  39 #include "stor-layout.h"
  40 #include "tree-eh.h"
  41 #include "gimplify.h"
  42 #include "gimple-iterator.h"
  43 #include "gimplify-me.h"
  44 #include "tree-ssa-loop-ivopts.h"
  45 #include "tree-ssa-loop-manip.h"
  46 #include "tree-ssa-loop.h"
  47 #include "cfgloop.h"
  48 #include "tree-scalar-evolution.h"
  49 #include "tree-vectorizer.h"
  50 #include "expr.h"
  51 #include "builtins.h"
  52 #include "params.h"
  53 #include "tree-cfg.h"
  54 #include "tree-hash-traits.h"
  55 #include "vec-perm-indices.h"
  56 #include "internal-fn.h"
  57
  58 /* Return true if load- or store-lanes optab OPTAB is implemented for
  59    COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.  */
  60
  61 static bool
  62 vect_lanes_optab_supported_p (const char *name, convert_optab optab,
  63                               tree vectype, unsigned HOST_WIDE_INT count)
  64 {
  65   machine_mode mode, array_mode;
  66   bool limit_p;
  67
  68   mode = TYPE_MODE (vectype);
  69   if (!targetm.array_mode (mode, count).exists (&array_mode))
  70     {
  71       poly_uint64 bits = count * GET_MODE_BITSIZE (mode);
  72       limit_p = !targetm.array_mode_supported_p (mode, count);
  73       if (!int_mode_for_size (bits, limit_p).exists (&array_mode))
  74         {
  75           if (dump_enabled_p ())
  76             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
  77                              "no array mode for %s["
  78                              HOST_WIDE_INT_PRINT_DEC "]\n",
  79                              GET_MODE_NAME (mode), count);
  80           return false;
  81         }
  82     }
  83
  84   if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
  85     {
  86       if (dump_enabled_p ())
  87         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
  88                          "cannot use %s<%s><%s>\n", name,
  89                          GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
  90       return false;
  91     }
  92
  93   if (dump_enabled_p ())
  94     dump_printf_loc (MSG_NOTE, vect_location,
  95                      "can use %s<%s><%s>\n", name, GET_MODE_NAME (array_mode),
  96                      GET_MODE_NAME (mode));
  97
  98   return true;
  99 }
 100
 101
 102 /* Return the smallest scalar part of STMT.
 103    This is used to determine the vectype of the stmt.  We generally set the
 104    vectype according to the type of the result (lhs).  For stmts whose
 105    result-type is different than the type of the arguments (e.g., demotion,
 106    promotion), vectype will be reset appropriately (later).  Note that we have
 107    to visit the smallest datatype in this function, because that determines the
 108    VF.  If the smallest datatype in the loop is present only as the rhs of a
 109    promotion operation - we'd miss it.
 110    Such a case, where a variable of this datatype does not appear in the lhs
 111    anywhere in the loop, can only occur if it's an invariant: e.g.:
 112    'int_x = (int) short_inv', which we'd expect to have been optimized away by
 113    invariant motion.  However, we cannot rely on invariant motion to always
 114    take invariants out of the loop, and so in the case of promotion we also
 115    have to check the rhs.
 116    LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
 117    types.  */
 118
 119 tree
 120 vect_get_smallest_scalar_type (gimple *stmt, HOST_WIDE_INT *lhs_size_unit,
 121                                HOST_WIDE_INT *rhs_size_unit)
 122 {
 123   tree scalar_type = gimple_expr_type (stmt);
 124   HOST_WIDE_INT lhs, rhs;
 125
 126   /* During the analysis phase, this function is called on arbitrary
 127      statements that might not have scalar results.  */
 128   if (!tree_fits_uhwi_p (TYPE_SIZE_UNIT (scalar_type)))
 129     return scalar_type;
 130
 131   lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
 132
 133   if (is_gimple_assign (stmt)
 134       && (gimple_assign_cast_p (stmt)
 135           || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
 136           || gimple_assign_rhs_code (stmt) == WIDEN_LSHIFT_EXPR
 137           || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
 138     {
 139       tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
 140
 141       rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
 142       if (rhs < lhs)
 143         scalar_type = rhs_type;
 144     }
 145
 146   *lhs_size_unit = lhs;
 147   *rhs_size_unit = rhs;
 148   return scalar_type;
 149 }
 150
 151
 152 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
 153    tested at run-time.  Return TRUE if DDR was successfully inserted.
 154    Return false if versioning is not supported.  */
 155
 156 static bool
 157 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
 158 {
 159   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 160
 161   if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
 162     return false;
 163
 164   if (!runtime_alias_check_p (ddr, loop,
 165                               optimize_loop_nest_for_speed_p (loop)))
 166     return false;
 167
 168   LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).safe_push (ddr);
 169   return true;
 170 }
 171
 172
 173 /* A subroutine of vect_analyze_data_ref_dependence.  Handle
 174    DDR_COULD_BE_INDEPENDENT_P ddr DDR that has a known set of dependence
 175    distances.  These distances are conservatively correct but they don't
 176    reflect a guaranteed dependence.
 177
 178    Return true if this function does all the work necessary to avoid
 179    an alias or false if the caller should use the dependence distances
 180    to limit the vectorization factor in the usual way.  LOOP_DEPTH is
 181    the depth of the loop described by LOOP_VINFO and the other arguments
 182    are as for vect_analyze_data_ref_dependence.  */
 183
 184 static bool
 185 vect_analyze_possibly_independent_ddr (data_dependence_relation *ddr,
 186                                        loop_vec_info loop_vinfo,
 187                                        int loop_depth, unsigned int *max_vf)
 188 {
 189   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 190   lambda_vector dist_v;
 191   unsigned int i;
 192   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
 193     {
 194       int dist = dist_v[loop_depth];
 195       if (dist != 0 && !(dist > 0 && DDR_REVERSED_P (ddr)))
 196         {
 197           /* If the user asserted safelen >= DIST consecutive iterations
 198              can be executed concurrently, assume independence.
 199
 200              ??? An alternative would be to add the alias check even
 201              in this case, and vectorize the fallback loop with the
 202              maximum VF set to safelen.  However, if the user has
 203              explicitly given a length, it's less likely that that
 204              would be a win.  */
 205           if (loop->safelen >= 2 && abs_hwi (dist) <= loop->safelen)
 206             {
 207               if ((unsigned int) loop->safelen < *max_vf)
 208                 *max_vf = loop->safelen;
 209               LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
 210               continue;
 211             }
 212
 213           /* For dependence distances of 2 or more, we have the option
 214              of limiting VF or checking for an alias at runtime.
 215              Prefer to check at runtime if we can, to avoid limiting
 216              the VF unnecessarily when the bases are in fact independent.
 217
 218              Note that the alias checks will be removed if the VF ends up
 219              being small enough.  */
 220           return vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
 221         }
 222     }
 223   return true;
 224 }
 225
 226
 227 /* Function vect_analyze_data_ref_dependence.
 228
 229    Return TRUE if there (might) exist a dependence between a memory-reference
 230    DRA and a memory-reference DRB.  When versioning for alias may check a
 231    dependence at run-time, return FALSE.  Adjust *MAX_VF according to
 232    the data dependence.  */
 233
 234 static bool
 235 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
 236                                   loop_vec_info loop_vinfo,
 237                                   unsigned int *max_vf)
 238 {
 239   unsigned int i;
 240   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 241   struct data_reference *dra = DDR_A (ddr);
 242   struct data_reference *drb = DDR_B (ddr);
 243   stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
 244   stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
 245   lambda_vector dist_v;
 246   unsigned int loop_depth;
 247
 248   /* In loop analysis all data references should be vectorizable.  */
 249   if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
 250       || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
 251     gcc_unreachable ();
 252
 253   /* Independent data accesses.  */
 254   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 255     return false;
 256
 257   if (dra == drb
 258       || (DR_IS_READ (dra) && DR_IS_READ (drb)))
 259     return false;
 260
 261   /* We do not have to consider dependences between accesses that belong
 262      to the same group.  */
 263   if (GROUP_FIRST_ELEMENT (stmtinfo_a)
 264       && GROUP_FIRST_ELEMENT (stmtinfo_a) == GROUP_FIRST_ELEMENT (stmtinfo_b))
 265     return false;
 266
 267   /* Even if we have an anti-dependence then, as the vectorized loop covers at
 268      least two scalar iterations, there is always also a true dependence.
 269      As the vectorizer does not re-order loads and stores we can ignore
 270      the anti-dependence if TBAA can disambiguate both DRs similar to the
 271      case with known negative distance anti-dependences (positive
 272      distance anti-dependences would violate TBAA constraints).  */
 273   if (((DR_IS_READ (dra) && DR_IS_WRITE (drb))
 274        || (DR_IS_WRITE (dra) && DR_IS_READ (drb)))
 275       && !alias_sets_conflict_p (get_alias_set (DR_REF (dra)),
 276                                  get_alias_set (DR_REF (drb))))
 277     return false;
 278
 279   /* Unknown data dependence.  */
 280   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 281     {
 282       /* If user asserted safelen consecutive iterations can be
 283          executed concurrently, assume independence.  */
 284       if (loop->safelen >= 2)
 285         {
 286           if ((unsigned int) loop->safelen < *max_vf)
 287             *max_vf = loop->safelen;
 288           LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
 289           return false;
 290         }
 291
 292       if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
 293           || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
 294         {
 295           if (dump_enabled_p ())
 296             {
 297               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 298                                "versioning for alias not supported for: "
 299                                "can't determine dependence between ");
 300               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 301                                  DR_REF (dra));
 302               dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 303               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 304                                  DR_REF (drb));
 305               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 306             }
 307           return true;
 308         }
 309
 310       if (dump_enabled_p ())
 311         {
 312           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 313                            "versioning for alias required: "
 314                            "can't determine dependence between ");
 315           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 316                              DR_REF (dra));
 317           dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 318           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 319                              DR_REF (drb));
 320           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 321         }
 322
 323       /* Add to list of ddrs that need to be tested at run-time.  */
 324       return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
 325     }
 326
 327   /* Known data dependence.  */
 328   if (DDR_NUM_DIST_VECTS (ddr) == 0)
 329     {
 330       /* If user asserted safelen consecutive iterations can be
 331          executed concurrently, assume independence.  */
 332       if (loop->safelen >= 2)
 333         {
 334           if ((unsigned int) loop->safelen < *max_vf)
 335             *max_vf = loop->safelen;
 336           LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
 337           return false;
 338         }
 339
 340       if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
 341           || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
 342         {
 343           if (dump_enabled_p ())
 344             {
 345               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 346                                "versioning for alias not supported for: "
 347                                "bad dist vector for ");
 348               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 349                                  DR_REF (dra));
 350               dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 351               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 352                                  DR_REF (drb));
 353               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 354             }
 355           return true;
 356         }
 357
 358       if (dump_enabled_p ())
 359         {
 360           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 361                            "versioning for alias required: "
 362                            "bad dist vector for ");
 363           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dra));
 364           dump_printf (MSG_MISSED_OPTIMIZATION,  " and ");
 365           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (drb));
 366           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 367         }
 368       /* Add to list of ddrs that need to be tested at run-time.  */
 369       return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
 370     }
 371
 372   loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
 373
 374   if (DDR_COULD_BE_INDEPENDENT_P (ddr)
 375       && vect_analyze_possibly_independent_ddr (ddr, loop_vinfo,
 376                                                 loop_depth, max_vf))
 377     return false;
 378
 379   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
 380     {
 381       int dist = dist_v[loop_depth];
 382
 383       if (dump_enabled_p ())
 384         dump_printf_loc (MSG_NOTE, vect_location,
 385                          "dependence distance  = %d.\n", dist);
 386
 387       if (dist == 0)
 388         {
 389           if (dump_enabled_p ())
 390             {
 391               dump_printf_loc (MSG_NOTE, vect_location,
 392                                "dependence distance == 0 between ");
 393               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
 394               dump_printf (MSG_NOTE, " and ");
 395               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
 396               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 397             }
 398
 399           /* When we perform grouped accesses and perform implicit CSE
 400              by detecting equal accesses and doing disambiguation with
 401              runtime alias tests like for
 402                 .. = a[i];
 403                 .. = a[i+1];
 404                 a[i] = ..;
 405                 a[i+1] = ..;
 406                 *p = ..;
 407                 .. = a[i];
 408                 .. = a[i+1];
 409              where we will end up loading { a[i], a[i+1] } once, make
 410              sure that inserting group loads before the first load and
 411              stores after the last store will do the right thing.
 412              Similar for groups like
 413                 a[i] = ...;
 414                 ... = a[i];
 415                 a[i+1] = ...;
 416              where loads from the group interleave with the store.  */
 417           if (STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)
 418               || STMT_VINFO_GROUPED_ACCESS (stmtinfo_b))
 419             {
 420               gimple *earlier_stmt;
 421               earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
 422               if (DR_IS_WRITE
 423                     (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
 424                 {
 425                   if (dump_enabled_p ())
 426                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 427                                      "READ_WRITE dependence in interleaving."
 428                                      "\n");
 429                   return true;
 430                 }
 431             }
 432
 433           continue;
 434         }
 435
 436       if (dist > 0 && DDR_REVERSED_P (ddr))
 437         {
 438           /* If DDR_REVERSED_P the order of the data-refs in DDR was
 439              reversed (to make distance vector positive), and the actual
 440              distance is negative.  */
 441           if (dump_enabled_p ())
 442             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 443                              "dependence distance negative.\n");
 444           /* Record a negative dependence distance to later limit the
 445              amount of stmt copying / unrolling we can perform.
 446              Only need to handle read-after-write dependence.  */
 447           if (DR_IS_READ (drb)
 448               && (STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) == 0
 449                   || STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) > (unsigned)dist))
 450             STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) = dist;
 451           continue;
 452         }
 453
 454       unsigned int abs_dist = abs (dist);
 455       if (abs_dist >= 2 && abs_dist < *max_vf)
 456         {
 457           /* The dependence distance requires reduction of the maximal
 458              vectorization factor.  */
 459           *max_vf = abs (dist);
 460           if (dump_enabled_p ())
 461             dump_printf_loc (MSG_NOTE, vect_location,
 462                              "adjusting maximal vectorization factor to %i\n",
 463                              *max_vf);
 464         }
 465
 466       if (abs_dist >= *max_vf)
 467         {
 468           /* Dependence distance does not create dependence, as far as
 469              vectorization is concerned, in this case.  */
 470           if (dump_enabled_p ())
 471             dump_printf_loc (MSG_NOTE, vect_location,
 472                              "dependence distance >= VF.\n");
 473           continue;
 474         }
 475
 476       if (dump_enabled_p ())
 477         {
 478           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 479                        "not vectorized, possible dependence "
 480                        "between data-refs ");
 481           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
 482           dump_printf (MSG_NOTE,  " and ");
 483           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
 484           dump_printf (MSG_NOTE,  "\n");
 485         }
 486
 487       return true;
 488     }
 489
 490   return false;
 491 }
 492
 493 /* Function vect_analyze_data_ref_dependences.
 494
 495    Examine all the data references in the loop, and make sure there do not
 496    exist any data dependences between them.  Set *MAX_VF according to
 497    the maximum vectorization factor the data dependences allow.  */
 498
 499 bool
 500 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
 501                                    unsigned int *max_vf)
 502 {
 503   unsigned int i;
 504   struct data_dependence_relation *ddr;
 505
 506   if (dump_enabled_p ())
 507     dump_printf_loc (MSG_NOTE, vect_location,
 508                      "=== vect_analyze_data_ref_dependences ===\n");
 509
 510   LOOP_VINFO_DDRS (loop_vinfo)
 511     .create (LOOP_VINFO_DATAREFS (loop_vinfo).length ()
 512              * LOOP_VINFO_DATAREFS (loop_vinfo).length ());
 513   LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = true;
 514   /* We need read-read dependences to compute STMT_VINFO_SAME_ALIGN_REFS.  */
 515   if (!compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo),
 516                                 &LOOP_VINFO_DDRS (loop_vinfo),
 517                                 LOOP_VINFO_LOOP_NEST (loop_vinfo), true))
 518     return false;
 519
 520   /* For epilogues we either have no aliases or alias versioning
 521      was applied to original loop.  Therefore we may just get max_vf
 522      using VF of original loop.  */
 523   if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
 524     *max_vf = LOOP_VINFO_ORIG_MAX_VECT_FACTOR (loop_vinfo);
 525   else
 526     FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr)
 527       if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
 528         return false;
 529
 530   return true;
 531 }
 532
 533
 534 /* Function vect_slp_analyze_data_ref_dependence.
 535
 536    Return TRUE if there (might) exist a dependence between a memory-reference
 537    DRA and a memory-reference DRB.  When versioning for alias may check a
 538    dependence at run-time, return FALSE.  Adjust *MAX_VF according to
 539    the data dependence.  */
 540
 541 static bool
 542 vect_slp_analyze_data_ref_dependence (struct data_dependence_relation *ddr)
 543 {
 544   struct data_reference *dra = DDR_A (ddr);
 545   struct data_reference *drb = DDR_B (ddr);
 546
 547   /* We need to check dependences of statements marked as unvectorizable
 548      as well, they still can prohibit vectorization.  */
 549
 550   /* Independent data accesses.  */
 551   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 552     return false;
 553
 554   if (dra == drb)
 555     return false;
 556
 557   /* Read-read is OK.  */
 558   if (DR_IS_READ (dra) && DR_IS_READ (drb))
 559     return false;
 560
 561   /* If dra and drb are part of the same interleaving chain consider
 562      them independent.  */
 563   if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (DR_STMT (dra)))
 564       && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (dra)))
 565           == GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (drb)))))
 566     return false;
 567
 568   /* Unknown data dependence.  */
 569   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 570     {
 571       if  (dump_enabled_p ())
 572         {
 573           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 574                            "can't determine dependence between ");
 575           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dra));
 576           dump_printf (MSG_MISSED_OPTIMIZATION,  " and ");
 577           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (drb));
 578           dump_printf (MSG_MISSED_OPTIMIZATION,  "\n");
 579         }
 580     }
 581   else if (dump_enabled_p ())
 582     {
 583       dump_printf_loc (MSG_NOTE, vect_location,
 584                        "determined dependence between ");
 585       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
 586       dump_printf (MSG_NOTE, " and ");
 587       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
 588       dump_printf (MSG_NOTE,  "\n");
 589     }
 590
 591   return true;
 592 }
 593
 594
 595 /* Analyze dependences involved in the transform of SLP NODE.  STORES
 596    contain the vector of scalar stores of this instance if we are
 597    disambiguating the loads.  */
 598
 599 static bool
 600 vect_slp_analyze_node_dependences (slp_instance instance, slp_tree node,
 601                                    vec<gimple *> stores, gimple *last_store)
 602 {
 603   /* This walks over all stmts involved in the SLP load/store done
 604      in NODE verifying we can sink them up to the last stmt in the
 605      group.  */
 606   gimple *last_access = vect_find_last_scalar_stmt_in_slp (node);
 607   for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k)
 608     {
 609       gimple *access = SLP_TREE_SCALAR_STMTS (node)[k];
 610       if (access == last_access)
 611         continue;
 612       data_reference *dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (access));
 613       for (gimple_stmt_iterator gsi = gsi_for_stmt (access);
 614            gsi_stmt (gsi) != last_access; gsi_next (&gsi))
 615         {
 616           gimple *stmt = gsi_stmt (gsi);
 617           if (! gimple_vuse (stmt)
 618               || (DR_IS_READ (dr_a) && ! gimple_vdef (stmt)))
 619             continue;
 620
 621           /* If we couldn't record a (single) data reference for this
 622              stmt we have to give up.  */
 623           /* ???  Here and below if dependence analysis fails we can resort
 624              to the alias oracle which can handle more kinds of stmts.  */
 625           data_reference *dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
 626           if (!dr_b)
 627             return false;
 628
 629           bool dependent = false;
 630           /* If we run into a store of this same instance (we've just
 631              marked those) then delay dependence checking until we run
 632              into the last store because this is where it will have
 633              been sunk to (and we verify if we can do that as well).  */
 634           if (gimple_visited_p (stmt))
 635             {
 636               if (stmt != last_store)
 637                 continue;
 638               unsigned i;
 639               gimple *store;
 640               FOR_EACH_VEC_ELT (stores, i, store)
 641                 {
 642                   data_reference *store_dr
 643                     = STMT_VINFO_DATA_REF (vinfo_for_stmt (store));
 644                   ddr_p ddr = initialize_data_dependence_relation
 645                                 (dr_a, store_dr, vNULL);
 646                   dependent = vect_slp_analyze_data_ref_dependence (ddr);
 647                   free_dependence_relation (ddr);
 648                   if (dependent)
 649                     break;
 650                 }
 651             }
 652           else
 653             {
 654               ddr_p ddr = initialize_data_dependence_relation (dr_a,
 655                                                                dr_b, vNULL);
 656               dependent = vect_slp_analyze_data_ref_dependence (ddr);
 657               free_dependence_relation (ddr);
 658             }
 659           if (dependent)
 660             return false;
 661         }
 662     }
 663   return true;
 664 }
 665
 666
 667 /* Function vect_analyze_data_ref_dependences.
 668
 669    Examine all the data references in the basic-block, and make sure there
 670    do not exist any data dependences between them.  Set *MAX_VF according to
 671    the maximum vectorization factor the data dependences allow.  */
 672
 673 bool
 674 vect_slp_analyze_instance_dependence (slp_instance instance)
 675 {
 676   if (dump_enabled_p ())
 677     dump_printf_loc (MSG_NOTE, vect_location,
 678                      "=== vect_slp_analyze_instance_dependence ===\n");
 679
 680   /* The stores of this instance are at the root of the SLP tree.  */
 681   slp_tree store = SLP_INSTANCE_TREE (instance);
 682   if (! STMT_VINFO_DATA_REF (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (store)[0])))
 683     store = NULL;
 684
 685   /* Verify we can sink stores to the vectorized stmt insert location.  */
 686   gimple *last_store = NULL;
 687   if (store)
 688     {
 689       if (! vect_slp_analyze_node_dependences (instance, store, vNULL, NULL))
 690         return false;
 691
 692       /* Mark stores in this instance and remember the last one.  */
 693       last_store = vect_find_last_scalar_stmt_in_slp (store);
 694       for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k)
 695         gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k], true);
 696     }
 697
 698   bool res = true;
 699
 700   /* Verify we can sink loads to the vectorized stmt insert location,
 701      special-casing stores of this instance.  */
 702   slp_tree load;
 703   unsigned int i;
 704   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load)
 705     if (! vect_slp_analyze_node_dependences (instance, load,
 706                                              store
 707                                              ? SLP_TREE_SCALAR_STMTS (store)
 708                                              : vNULL, last_store))
 709       {
 710         res = false;
 711         break;
 712       }
 713
 714   /* Unset the visited flag.  */
 715   if (store)
 716     for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k)
 717       gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k], false);
 718
 719   return res;
 720 }
 721
 722 /* Record in VINFO the base alignment guarantee given by DRB.  STMT is
 723    the statement that contains DRB, which is useful for recording in the
 724    dump file.  */
 725
 726 static void
 727 vect_record_base_alignment (vec_info *vinfo, gimple *stmt,
 728                             innermost_loop_behavior *drb)
 729 {
 730   bool existed;
 731   innermost_loop_behavior *&entry
 732     = vinfo->base_alignments.get_or_insert (drb->base_address, &existed);
 733   if (!existed || entry->base_alignment < drb->base_alignment)
 734     {
 735       entry = drb;
 736       if (dump_enabled_p ())
 737         {
 738           dump_printf_loc (MSG_NOTE, vect_location,
 739                            "recording new base alignment for ");
 740           dump_generic_expr (MSG_NOTE, TDF_SLIM, drb->base_address);
 741           dump_printf (MSG_NOTE, "\n");
 742           dump_printf_loc (MSG_NOTE, vect_location,
 743                            "  alignment:    %d\n", drb->base_alignment);
 744           dump_printf_loc (MSG_NOTE, vect_location,
 745                            "  misalignment: %d\n", drb->base_misalignment);
 746           dump_printf_loc (MSG_NOTE, vect_location,
 747                            "  based on:     ");
 748           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
 749         }
 750     }
 751 }
 752
 753 /* If the region we're going to vectorize is reached, all unconditional
 754    data references occur at least once.  We can therefore pool the base
 755    alignment guarantees from each unconditional reference.  Do this by
 756    going through all the data references in VINFO and checking whether
 757    the containing statement makes the reference unconditionally.  If so,
 758    record the alignment of the base address in VINFO so that it can be
 759    used for all other references with the same base.  */
 760
 761 void
 762 vect_record_base_alignments (vec_info *vinfo)
 763 {
 764   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
 765   struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
 766   data_reference *dr;
 767   unsigned int i;
 768   FOR_EACH_VEC_ELT (vinfo->datarefs, i, dr)
 769     if (!DR_IS_CONDITIONAL_IN_STMT (dr))
 770       {
 771         gimple *stmt = DR_STMT (dr);
 772         vect_record_base_alignment (vinfo, stmt, &DR_INNERMOST (dr));
 773
 774         /* If DR is nested in the loop that is being vectorized, we can also
 775            record the alignment of the base wrt the outer loop.  */
 776         if (loop && nested_in_vect_loop_p (loop, stmt))
 777           {
 778             stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 779             vect_record_base_alignment
 780               (vinfo, stmt, &STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info));
 781           }
 782       }
 783 }
 784
 785 /* Return the target alignment for the vectorized form of DR.  */
 786
 787 static unsigned int
 788 vect_calculate_target_alignment (struct data_reference *dr)
 789 {
 790   gimple *stmt = DR_STMT (dr);
 791   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 792   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
 793   return targetm.vectorize.preferred_vector_alignment (vectype);
 794 }
 795
 796 /* Function vect_compute_data_ref_alignment
 797
 798    Compute the misalignment of the data reference DR.
 799
 800    Output:
 801    1. If during the misalignment computation it is found that the data reference
 802       cannot be vectorized then false is returned.
 803    2. DR_MISALIGNMENT (DR) is defined.
 804
 805    FOR NOW: No analysis is actually performed. Misalignment is calculated
 806    only for trivial cases. TODO.  */
 807
 808 bool
 809 vect_compute_data_ref_alignment (struct data_reference *dr)
 810 {
 811   gimple *stmt = DR_STMT (dr);
 812   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 813   vec_base_alignments *base_alignments = &stmt_info->vinfo->base_alignments;
 814   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
 815   struct loop *loop = NULL;
 816   tree ref = DR_REF (dr);
 817   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
 818
 819   if (dump_enabled_p ())
 820     dump_printf_loc (MSG_NOTE, vect_location,
 821                      "vect_compute_data_ref_alignment:\n");
 822
 823   if (loop_vinfo)
 824     loop = LOOP_VINFO_LOOP (loop_vinfo);
 825
 826   /* Initialize misalignment to unknown.  */
 827   SET_DR_MISALIGNMENT (dr, DR_MISALIGNMENT_UNKNOWN);
 828
 829   innermost_loop_behavior *drb = vect_dr_behavior (dr);
 830   bool step_preserves_misalignment_p;
 831
 832   unsigned HOST_WIDE_INT vector_alignment
 833     = vect_calculate_target_alignment (dr) / BITS_PER_UNIT;
 834   DR_TARGET_ALIGNMENT (dr) = vector_alignment;
 835
 836   /* No step for BB vectorization.  */
 837   if (!loop)
 838     {
 839       gcc_assert (integer_zerop (drb->step));
 840       step_preserves_misalignment_p = true;
 841     }
 842
 843   /* In case the dataref is in an inner-loop of the loop that is being
 844      vectorized (LOOP), we use the base and misalignment information
 845      relative to the outer-loop (LOOP).  This is ok only if the misalignment
 846      stays the same throughout the execution of the inner-loop, which is why
 847      we have to check that the stride of the dataref in the inner-loop evenly
 848      divides by the vector alignment.  */
 849   else if (nested_in_vect_loop_p (loop, stmt))
 850     {
 851       step_preserves_misalignment_p
 852         = (DR_STEP_ALIGNMENT (dr) % vector_alignment) == 0;
 853
 854       if (dump_enabled_p ())
 855         {
 856           if (step_preserves_misalignment_p)
 857             dump_printf_loc (MSG_NOTE, vect_location,
 858                              "inner step divides the vector alignment.\n");
 859           else
 860             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 861                              "inner step doesn't divide the vector"
 862                              " alignment.\n");
 863         }
 864     }
 865
 866   /* Similarly we can only use base and misalignment information relative to
 867      an innermost loop if the misalignment stays the same throughout the
 868      execution of the loop.  As above, this is the case if the stride of
 869      the dataref evenly divides by the alignment.  */
 870   else
 871     {
 872       poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
 873       step_preserves_misalignment_p
 874         = multiple_p (DR_STEP_ALIGNMENT (dr) * vf, vector_alignment);
 875
 876       if (!step_preserves_misalignment_p && dump_enabled_p ())
 877         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 878                          "step doesn't divide the vector alignment.\n");
 879     }
 880
 881   unsigned int base_alignment = drb->base_alignment;
 882   unsigned int base_misalignment = drb->base_misalignment;
 883
 884   /* Calculate the maximum of the pooled base address alignment and the
 885      alignment that we can compute for DR itself.  */
 886   innermost_loop_behavior **entry = base_alignments->get (drb->base_address);
 887   if (entry && base_alignment < (*entry)->base_alignment)
 888     {
 889       base_alignment = (*entry)->base_alignment;
 890       base_misalignment = (*entry)->base_misalignment;
 891     }
 892
 893   if (drb->offset_alignment < vector_alignment
 894       || !step_preserves_misalignment_p
 895       /* We need to know whether the step wrt the vectorized loop is
 896          negative when computing the starting misalignment below.  */
 897       || TREE_CODE (drb->step) != INTEGER_CST)
 898     {
 899       if (dump_enabled_p ())
 900         {
 901           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 902                            "Unknown alignment for access: ");
 903           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, ref);
 904           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 905         }
 906       return true;
 907     }
 908
 909   if (base_alignment < vector_alignment)
 910     {
 911       tree base = drb->base_address;
 912       if (TREE_CODE (base) == ADDR_EXPR)
 913         base = TREE_OPERAND (base, 0);
 914       if (!vect_can_force_dr_alignment_p (base,
 915                                           vector_alignment * BITS_PER_UNIT))
 916         {
 917           if (dump_enabled_p ())
 918             {
 919               dump_printf_loc (MSG_NOTE, vect_location,
 920                                "can't force alignment of ref: ");
 921               dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
 922               dump_printf (MSG_NOTE, "\n");
 923             }
 924           return true;
 925         }
 926
 927       /* Force the alignment of the decl.
 928          NOTE: This is the only change to the code we make during
 929          the analysis phase, before deciding to vectorize the loop.  */
 930       if (dump_enabled_p ())
 931         {
 932           dump_printf_loc (MSG_NOTE, vect_location, "force alignment of ");
 933           dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
 934           dump_printf (MSG_NOTE, "\n");
 935         }
 936
 937       DR_VECT_AUX (dr)->base_decl = base;
 938       DR_VECT_AUX (dr)->base_misaligned = true;
 939       base_misalignment = 0;
 940     }
 941   poly_int64 misalignment
 942     = base_misalignment + wi::to_poly_offset (drb->init).force_shwi ();
 943
 944   /* If this is a backward running DR then first access in the larger
 945      vectype actually is N-1 elements before the address in the DR.
 946      Adjust misalign accordingly.  */
 947   if (tree_int_cst_sgn (drb->step) < 0)
 948     /* PLUS because STEP is negative.  */
 949     misalignment += ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
 950                      * TREE_INT_CST_LOW (drb->step));
 951
 952   unsigned int const_misalignment;
 953   if (!known_misalignment (misalignment, vector_alignment,
 954                            &const_misalignment))
 955     {
 956       if (dump_enabled_p ())
 957         {
 958           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 959                            "Non-constant misalignment for access: ");
 960           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, ref);
 961           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 962         }
 963       return true;
 964     }
 965
 966   SET_DR_MISALIGNMENT (dr, const_misalignment);
 967
 968   if (dump_enabled_p ())
 969     {
 970       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 971                        "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
 972       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, ref);
 973       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 974     }
 975
 976   return true;
 977 }
 978
 979 /* Function vect_update_misalignment_for_peel.
 980    Sets DR's misalignment
 981    - to 0 if it has the same alignment as DR_PEEL,
 982    - to the misalignment computed using NPEEL if DR's salignment is known,
 983    - to -1 (unknown) otherwise.
 984
 985    DR - the data reference whose misalignment is to be adjusted.
 986    DR_PEEL - the data reference whose misalignment is being made
 987              zero in the vector loop by the peel.
 988    NPEEL - the number of iterations in the peel loop if the misalignment
 989            of DR_PEEL is known at compile time.  */
 990
 991 static void
 992 vect_update_misalignment_for_peel (struct data_reference *dr,
 993                                    struct data_reference *dr_peel, int npeel)
 994 {
 995   unsigned int i;
 996   vec<dr_p> same_aligned_drs;
 997   struct data_reference *current_dr;
 998   int dr_size = vect_get_scalar_dr_size (dr);
 999   int dr_peel_size = vect_get_scalar_dr_size (dr_peel);
1000   stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1001   stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1002
1003  /* For interleaved data accesses the step in the loop must be multiplied by
1004      the size of the interleaving group.  */
1005   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1006     dr_size *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
1007   if (STMT_VINFO_GROUPED_ACCESS (peel_stmt_info))
1008     dr_peel_size *= GROUP_SIZE (peel_stmt_info);
1009
1010   /* It can be assumed that the data refs with the same alignment as dr_peel
1011      are aligned in the vector loop.  */
1012   same_aligned_drs
1013     = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1014   FOR_EACH_VEC_ELT (same_aligned_drs, i, current_dr)
1015     {
1016       if (current_dr != dr)
1017         continue;
1018       gcc_assert (!known_alignment_for_access_p (dr)
1019                   || !known_alignment_for_access_p (dr_peel)
1020                   || (DR_MISALIGNMENT (dr) / dr_size
1021                       == DR_MISALIGNMENT (dr_peel) / dr_peel_size));
1022       SET_DR_MISALIGNMENT (dr, 0);
1023       return;
1024     }
1025
1026   if (known_alignment_for_access_p (dr)
1027       && known_alignment_for_access_p (dr_peel))
1028     {
1029       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1030       int misal = DR_MISALIGNMENT (dr);
1031       misal += negative ? -npeel * dr_size : npeel * dr_size;
1032       misal &= DR_TARGET_ALIGNMENT (dr) - 1;
1033       SET_DR_MISALIGNMENT (dr, misal);
1034       return;
1035     }
1036
1037   if (dump_enabled_p ())
1038     dump_printf_loc (MSG_NOTE, vect_location, "Setting misalignment " \
1039                      "to unknown (-1).\n");
1040   SET_DR_MISALIGNMENT (dr, DR_MISALIGNMENT_UNKNOWN);
1041 }
1042
1043
1044 /* Function verify_data_ref_alignment
1045
1046    Return TRUE if DR can be handled with respect to alignment.  */
1047
1048 static bool
1049 verify_data_ref_alignment (data_reference_p dr)
1050 {
1051   enum dr_alignment_support supportable_dr_alignment
1052     = vect_supportable_dr_alignment (dr, false);
1053   if (!supportable_dr_alignment)
1054     {
1055       if (dump_enabled_p ())
1056         {
1057           if (DR_IS_READ (dr))
1058             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1059                              "not vectorized: unsupported unaligned load.");
1060           else
1061             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1062                              "not vectorized: unsupported unaligned "
1063                              "store.");
1064
1065           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1066                              DR_REF (dr));
1067           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1068         }
1069       return false;
1070     }
1071
1072   if (supportable_dr_alignment != dr_aligned && dump_enabled_p ())
1073     dump_printf_loc (MSG_NOTE, vect_location,
1074                      "Vectorizing an unaligned access.\n");
1075
1076   return true;
1077 }
1078
1079 /* Function vect_verify_datarefs_alignment
1080
1081    Return TRUE if all data references in the loop can be
1082    handled with respect to alignment.  */
1083
1084 bool
1085 vect_verify_datarefs_alignment (loop_vec_info vinfo)
1086 {
1087   vec<data_reference_p> datarefs = vinfo->datarefs;
1088   struct data_reference *dr;
1089   unsigned int i;
1090
1091   FOR_EACH_VEC_ELT (datarefs, i, dr)
1092     {
1093       gimple *stmt = DR_STMT (dr);
1094       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1095
1096       if (!STMT_VINFO_RELEVANT_P (stmt_info))
1097         continue;
1098
1099       /* For interleaving, only the alignment of the first access matters.   */
1100       if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1101           && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1102         continue;
1103
1104       /* Strided accesses perform only component accesses, alignment is
1105          irrelevant for them.  */
1106       if (STMT_VINFO_STRIDED_P (stmt_info)
1107           && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1108         continue;
1109
1110       if (! verify_data_ref_alignment (dr))
1111         return false;
1112     }
1113
1114   return true;
1115 }
1116
1117 /* Given an memory reference EXP return whether its alignment is less
1118    than its size.  */
1119
1120 static bool
1121 not_size_aligned (tree exp)
1122 {
1123   if (!tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (exp))))
1124     return true;
1125
1126   return (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (exp)))
1127           > get_object_alignment (exp));
1128 }
1129
1130 /* Function vector_alignment_reachable_p
1131
1132    Return true if vector alignment for DR is reachable by peeling
1133    a few loop iterations.  Return false otherwise.  */
1134
1135 static bool
1136 vector_alignment_reachable_p (struct data_reference *dr)
1137 {
1138   gimple *stmt = DR_STMT (dr);
1139   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1140   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1141
1142   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1143     {
1144       /* For interleaved access we peel only if number of iterations in
1145          the prolog loop ({VF - misalignment}), is a multiple of the
1146          number of the interleaved accesses.  */
1147       int elem_size, mis_in_elements;
1148
1149       /* FORNOW: handle only known alignment.  */
1150       if (!known_alignment_for_access_p (dr))
1151         return false;
1152
1153       poly_uint64 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1154       poly_uint64 vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
1155       elem_size = vector_element_size (vector_size, nelements);
1156       mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1157
1158       if (!multiple_p (nelements - mis_in_elements, GROUP_SIZE (stmt_info)))
1159         return false;
1160     }
1161
1162   /* If misalignment is known at the compile time then allow peeling
1163      only if natural alignment is reachable through peeling.  */
1164   if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1165     {
1166       HOST_WIDE_INT elmsize =
1167                 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1168       if (dump_enabled_p ())
1169         {
1170           dump_printf_loc (MSG_NOTE, vect_location,
1171                            "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1172           dump_printf (MSG_NOTE,
1173                        ". misalignment = %d.\n", DR_MISALIGNMENT (dr));
1174         }
1175       if (DR_MISALIGNMENT (dr) % elmsize)
1176         {
1177           if (dump_enabled_p ())
1178             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1179                              "data size does not divide the misalignment.\n");
1180           return false;
1181         }
1182     }
1183
1184   if (!known_alignment_for_access_p (dr))
1185     {
1186       tree type = TREE_TYPE (DR_REF (dr));
1187       bool is_packed = not_size_aligned (DR_REF (dr));
1188       if (dump_enabled_p ())
1189         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1190                          "Unknown misalignment, %snaturally aligned\n",
1191                          is_packed ? "not " : "");
1192       return targetm.vectorize.vector_alignment_reachable (type, is_packed);
1193     }
1194
1195   return true;
1196 }
1197
1198
1199 /* Calculate the cost of the memory access represented by DR.  */
1200
1201 static void
1202 vect_get_data_access_cost (struct data_reference *dr,
1203                            unsigned int *inside_cost,
1204                            unsigned int *outside_cost,
1205                            stmt_vector_for_cost *body_cost_vec)
1206 {
1207   gimple *stmt = DR_STMT (dr);
1208   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1209   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1210   int ncopies;
1211
1212   if (PURE_SLP_STMT (stmt_info))
1213     ncopies = 1;
1214   else
1215     ncopies = vect_get_num_copies (loop_vinfo, STMT_VINFO_VECTYPE (stmt_info));
1216
1217   if (DR_IS_READ (dr))
1218     vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost,
1219                         NULL, body_cost_vec, false);
1220   else
1221     vect_get_store_cost (dr, ncopies, inside_cost, body_cost_vec);
1222
1223   if (dump_enabled_p ())
1224     dump_printf_loc (MSG_NOTE, vect_location,
1225                      "vect_get_data_access_cost: inside_cost = %d, "
1226                      "outside_cost = %d.\n", *inside_cost, *outside_cost);
1227 }
1228
1229
1230 typedef struct _vect_peel_info
1231 {
1232   struct data_reference *dr;
1233   int npeel;
1234   unsigned int count;
1235 } *vect_peel_info;
1236
1237 typedef struct _vect_peel_extended_info
1238 {
1239   struct _vect_peel_info peel_info;
1240   unsigned int inside_cost;
1241   unsigned int outside_cost;
1242 } *vect_peel_extended_info;
1243
1244
1245 /* Peeling hashtable helpers.  */
1246
1247 struct peel_info_hasher : free_ptr_hash <_vect_peel_info>
1248 {
1249   static inline hashval_t hash (const _vect_peel_info *);
1250   static inline bool equal (const _vect_peel_info *, const _vect_peel_info *);
1251 };
1252
1253 inline hashval_t
1254 peel_info_hasher::hash (const _vect_peel_info *peel_info)
1255 {
1256   return (hashval_t) peel_info->npeel;
1257 }
1258
1259 inline bool
1260 peel_info_hasher::equal (const _vect_peel_info *a, const _vect_peel_info *b)
1261 {
1262   return (a->npeel == b->npeel);
1263 }
1264
1265
1266 /* Insert DR into peeling hash table with NPEEL as key.  */
1267
1268 static void
1269 vect_peeling_hash_insert (hash_table<peel_info_hasher> *peeling_htab,
1270                           loop_vec_info loop_vinfo, struct data_reference *dr,
1271                           int npeel)
1272 {
1273   struct _vect_peel_info elem, *slot;
1274   _vect_peel_info **new_slot;
1275   bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1276
1277   elem.npeel = npeel;
1278   slot = peeling_htab->find (&elem);
1279   if (slot)
1280     slot->count++;
1281   else
1282     {
1283       slot = XNEW (struct _vect_peel_info);
1284       slot->npeel = npeel;
1285       slot->dr = dr;
1286       slot->count = 1;
1287       new_slot = peeling_htab->find_slot (slot, INSERT);
1288       *new_slot = slot;
1289     }
1290
1291   if (!supportable_dr_alignment
1292       && unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
1293     slot->count += VECT_MAX_COST;
1294 }
1295
1296
1297 /* Traverse peeling hash table to find peeling option that aligns maximum
1298    number of data accesses.  */
1299
1300 int
1301 vect_peeling_hash_get_most_frequent (_vect_peel_info **slot,
1302                                      _vect_peel_extended_info *max)
1303 {
1304   vect_peel_info elem = *slot;
1305
1306   if (elem->count > max->peel_info.count
1307       || (elem->count == max->peel_info.count
1308           && max->peel_info.npeel > elem->npeel))
1309     {
1310       max->peel_info.npeel = elem->npeel;
1311       max->peel_info.count = elem->count;
1312       max->peel_info.dr = elem->dr;
1313     }
1314
1315   return 1;
1316 }
1317
1318 /* Get the costs of peeling NPEEL iterations checking data access costs
1319    for all data refs.  If UNKNOWN_MISALIGNMENT is true, we assume DR0's
1320    misalignment will be zero after peeling.  */
1321
1322 static void
1323 vect_get_peeling_costs_all_drs (vec<data_reference_p> datarefs,
1324                                 struct data_reference *dr0,
1325                                 unsigned int *inside_cost,
1326                                 unsigned int *outside_cost,
1327                                 stmt_vector_for_cost *body_cost_vec,
1328                                 unsigned int npeel,
1329                                 bool unknown_misalignment)
1330 {
1331   unsigned i;
1332   data_reference *dr;
1333
1334   FOR_EACH_VEC_ELT (datarefs, i, dr)
1335     {
1336       gimple *stmt = DR_STMT (dr);
1337       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1338       if (!STMT_VINFO_RELEVANT_P (stmt_info))
1339         continue;
1340
1341       /* For interleaving, only the alignment of the first access
1342          matters.  */
1343       if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1344           && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1345         continue;
1346
1347       /* Strided accesses perform only component accesses, alignment is
1348          irrelevant for them.  */
1349       if (STMT_VINFO_STRIDED_P (stmt_info)
1350           && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1351         continue;
1352
1353       int save_misalignment;
1354       save_misalignment = DR_MISALIGNMENT (dr);
1355       if (npeel == 0)
1356         ;
1357       else if (unknown_misalignment && dr == dr0)
1358         SET_DR_MISALIGNMENT (dr, 0);
1359       else
1360         vect_update_misalignment_for_peel (dr, dr0, npeel);
1361       vect_get_data_access_cost (dr, inside_cost, outside_cost,
1362                                  body_cost_vec);
1363       SET_DR_MISALIGNMENT (dr, save_misalignment);
1364     }
1365 }
1366
1367 /* Traverse peeling hash table and calculate cost for each peeling option.
1368    Find the one with the lowest cost.  */
1369
1370 int
1371 vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
1372                                    _vect_peel_extended_info *min)
1373 {
1374   vect_peel_info elem = *slot;
1375   int dummy;
1376   unsigned int inside_cost = 0, outside_cost = 0;
1377   gimple *stmt = DR_STMT (elem->dr);
1378   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1379   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1380   stmt_vector_for_cost prologue_cost_vec, body_cost_vec,
1381                        epilogue_cost_vec;
1382
1383   prologue_cost_vec.create (2);
1384   body_cost_vec.create (2);
1385   epilogue_cost_vec.create (2);
1386
1387   vect_get_peeling_costs_all_drs (LOOP_VINFO_DATAREFS (loop_vinfo),
1388                                   elem->dr, &inside_cost, &outside_cost,
1389                                   &body_cost_vec, elem->npeel, false);
1390
1391   body_cost_vec.release ();
1392
1393   outside_cost += vect_get_known_peeling_cost
1394     (loop_vinfo, elem->npeel, &dummy,
1395      &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
1396      &prologue_cost_vec, &epilogue_cost_vec);
1397
1398   /* Prologue and epilogue costs are added to the target model later.
1399      These costs depend only on the scalar iteration cost, the
1400      number of peeling iterations finally chosen, and the number of
1401      misaligned statements.  So discard the information found here.  */
1402   prologue_cost_vec.release ();
1403   epilogue_cost_vec.release ();
1404
1405   if (inside_cost < min->inside_cost
1406       || (inside_cost == min->inside_cost
1407           && outside_cost < min->outside_cost))
1408     {
1409       min->inside_cost = inside_cost;
1410       min->outside_cost = outside_cost;
1411       min->peel_info.dr = elem->dr;
1412       min->peel_info.npeel = elem->npeel;
1413       min->peel_info.count = elem->count;
1414     }
1415
1416   return 1;
1417 }
1418
1419
1420 /* Choose best peeling option by traversing peeling hash table and either
1421    choosing an option with the lowest cost (if cost model is enabled) or the
1422    option that aligns as many accesses as possible.  */
1423
1424 static struct _vect_peel_extended_info
1425 vect_peeling_hash_choose_best_peeling (hash_table<peel_info_hasher> *peeling_htab,
1426                                        loop_vec_info loop_vinfo)
1427 {
1428    struct _vect_peel_extended_info res;
1429
1430    res.peel_info.dr = NULL;
1431
1432    if (!unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
1433      {
1434        res.inside_cost = INT_MAX;
1435        res.outside_cost = INT_MAX;
1436        peeling_htab->traverse <_vect_peel_extended_info *,
1437                                vect_peeling_hash_get_lowest_cost> (&res);
1438      }
1439    else
1440      {
1441        res.peel_info.count = 0;
1442        peeling_htab->traverse <_vect_peel_extended_info *,
1443                                vect_peeling_hash_get_most_frequent> (&res);
1444        res.inside_cost = 0;
1445        res.outside_cost = 0;
1446      }
1447
1448    return res;
1449 }
1450
1451 /* Return true if the new peeling NPEEL is supported.  */
1452
1453 static bool
1454 vect_peeling_supportable (loop_vec_info loop_vinfo, struct data_reference *dr0,
1455                           unsigned npeel)
1456 {
1457   unsigned i;
1458   struct data_reference *dr = NULL;
1459   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1460   gimple *stmt;
1461   stmt_vec_info stmt_info;
1462   enum dr_alignment_support supportable_dr_alignment;
1463
1464   /* Ensure that all data refs can be vectorized after the peel.  */
1465   FOR_EACH_VEC_ELT (datarefs, i, dr)
1466     {
1467       int save_misalignment;
1468
1469       if (dr == dr0)
1470         continue;
1471
1472       stmt = DR_STMT (dr);
1473       stmt_info = vinfo_for_stmt (stmt);
1474       /* For interleaving, only the alignment of the first access
1475          matters.  */
1476       if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1477           && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1478         continue;
1479
1480       /* Strided accesses perform only component accesses, alignment is
1481          irrelevant for them.  */
1482       if (STMT_VINFO_STRIDED_P (stmt_info)
1483           && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1484         continue;
1485
1486       save_misalignment = DR_MISALIGNMENT (dr);
1487       vect_update_misalignment_for_peel (dr, dr0, npeel);
1488       supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1489       SET_DR_MISALIGNMENT (dr, save_misalignment);
1490
1491       if (!supportable_dr_alignment)
1492         return false;
1493     }
1494
1495   return true;
1496 }
1497
1498 /* Function vect_enhance_data_refs_alignment
1499
1500    This pass will use loop versioning and loop peeling in order to enhance
1501    the alignment of data references in the loop.
1502
1503    FOR NOW: we assume that whatever versioning/peeling takes place, only the
1504    original loop is to be vectorized.  Any other loops that are created by
1505    the transformations performed in this pass - are not supposed to be
1506    vectorized.  This restriction will be relaxed.
1507
1508    This pass will require a cost model to guide it whether to apply peeling
1509    or versioning or a combination of the two.  For example, the scheme that
1510    intel uses when given a loop with several memory accesses, is as follows:
1511    choose one memory access ('p') which alignment you want to force by doing
1512    peeling.  Then, either (1) generate a loop in which 'p' is aligned and all
1513    other accesses are not necessarily aligned, or (2) use loop versioning to
1514    generate one loop in which all accesses are aligned, and another loop in
1515    which only 'p' is necessarily aligned.
1516
1517    ("Automatic Intra-Register Vectorization for the Intel Architecture",
1518    Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1519    Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1520
1521    Devising a cost model is the most critical aspect of this work.  It will
1522    guide us on which access to peel for, whether to use loop versioning, how
1523    many versions to create, etc.  The cost model will probably consist of
1524    generic considerations as well as target specific considerations (on
1525    powerpc for example, misaligned stores are more painful than misaligned
1526    loads).
1527
1528    Here are the general steps involved in alignment enhancements:
1529
1530      -- original loop, before alignment analysis:
1531         for (i=0; i<N; i++){
1532           x = q[i];                     # DR_MISALIGNMENT(q) = unknown
1533           p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
1534         }
1535
1536      -- After vect_compute_data_refs_alignment:
1537         for (i=0; i<N; i++){
1538           x = q[i];                     # DR_MISALIGNMENT(q) = 3
1539           p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
1540         }
1541
1542      -- Possibility 1: we do loop versioning:
1543      if (p is aligned) {
1544         for (i=0; i<N; i++){    # loop 1A
1545           x = q[i];                     # DR_MISALIGNMENT(q) = 3
1546           p[i] = y;                     # DR_MISALIGNMENT(p) = 0
1547         }
1548      }
1549      else {
1550         for (i=0; i<N; i++){    # loop 1B
1551           x = q[i];                     # DR_MISALIGNMENT(q) = 3
1552           p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
1553         }
1554      }
1555
1556      -- Possibility 2: we do loop peeling:
1557      for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
1558         x = q[i];
1559         p[i] = y;
1560      }
1561      for (i = 3; i < N; i++){   # loop 2A
1562         x = q[i];                       # DR_MISALIGNMENT(q) = 0
1563         p[i] = y;                       # DR_MISALIGNMENT(p) = unknown
1564      }
1565
1566      -- Possibility 3: combination of loop peeling and versioning:
1567      for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
1568         x = q[i];
1569         p[i] = y;
1570      }
1571      if (p is aligned) {
1572         for (i = 3; i<N; i++){  # loop 3A
1573           x = q[i];                     # DR_MISALIGNMENT(q) = 0
1574           p[i] = y;                     # DR_MISALIGNMENT(p) = 0
1575         }
1576      }
1577      else {
1578         for (i = 3; i<N; i++){  # loop 3B
1579           x = q[i];                     # DR_MISALIGNMENT(q) = 0
1580           p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
1581         }
1582      }
1583
1584      These loops are later passed to loop_transform to be vectorized.  The
1585      vectorizer will use the alignment information to guide the transformation
1586      (whether to generate regular loads/stores, or with special handling for
1587      misalignment).  */
1588
1589 bool
1590 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1591 {
1592   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1593   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1594   enum dr_alignment_support supportable_dr_alignment;
1595   struct data_reference *dr0 = NULL, *first_store = NULL;
1596   struct data_reference *dr;
1597   unsigned int i, j;
1598   bool do_peeling = false;
1599   bool do_versioning = false;
1600   bool stat;
1601   gimple *stmt;
1602   stmt_vec_info stmt_info;
1603   unsigned int npeel = 0;
1604   bool one_misalignment_known = false;
1605   bool one_misalignment_unknown = false;
1606   bool one_dr_unsupportable = false;
1607   struct data_reference *unsupportable_dr = NULL;
1608   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1609   unsigned possible_npeel_number = 1;
1610   tree vectype;
1611   unsigned int mis, same_align_drs_max = 0;
1612   hash_table<peel_info_hasher> peeling_htab (1);
1613
1614   if (dump_enabled_p ())
1615     dump_printf_loc (MSG_NOTE, vect_location,
1616                      "=== vect_enhance_data_refs_alignment ===\n");
1617
1618   /* Reset data so we can safely be called multiple times.  */
1619   LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
1620   LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = 0;
1621
1622   /* While cost model enhancements are expected in the future, the high level
1623      view of the code at this time is as follows:
1624
1625      A) If there is a misaligned access then see if peeling to align
1626         this access can make all data references satisfy
1627         vect_supportable_dr_alignment.  If so, update data structures
1628         as needed and return true.
1629
1630      B) If peeling wasn't possible and there is a data reference with an
1631         unknown misalignment that does not satisfy vect_supportable_dr_alignment
1632         then see if loop versioning checks can be used to make all data
1633         references satisfy vect_supportable_dr_alignment.  If so, update
1634         data structures as needed and return true.
1635
1636      C) If neither peeling nor versioning were successful then return false if
1637         any data reference does not satisfy vect_supportable_dr_alignment.
1638
1639      D) Return true (all data references satisfy vect_supportable_dr_alignment).
1640
1641      Note, Possibility 3 above (which is peeling and versioning together) is not
1642      being done at this time.  */
1643
1644   /* (1) Peeling to force alignment.  */
1645
1646   /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1647      Considerations:
1648      + How many accesses will become aligned due to the peeling
1649      - How many accesses will become unaligned due to the peeling,
1650        and the cost of misaligned accesses.
1651      - The cost of peeling (the extra runtime checks, the increase
1652        in code size).  */
1653
1654   FOR_EACH_VEC_ELT (datarefs, i, dr)
1655     {
1656       stmt = DR_STMT (dr);
1657       stmt_info = vinfo_for_stmt (stmt);
1658
1659       if (!STMT_VINFO_RELEVANT_P (stmt_info))
1660         continue;
1661
1662       /* For interleaving, only the alignment of the first access
1663          matters.  */
1664       if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1665           && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1666         continue;
1667
1668       /* For invariant accesses there is nothing to enhance.  */
1669       if (integer_zerop (DR_STEP (dr)))
1670         continue;
1671
1672       /* Strided accesses perform only component accesses, alignment is
1673          irrelevant for them.  */
1674       if (STMT_VINFO_STRIDED_P (stmt_info)
1675           && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1676         continue;
1677
1678       supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1679       do_peeling = vector_alignment_reachable_p (dr);
1680       if (do_peeling)
1681         {
1682           if (known_alignment_for_access_p (dr))
1683             {
1684               unsigned int npeel_tmp = 0;
1685               bool negative = tree_int_cst_compare (DR_STEP (dr),
1686                                                     size_zero_node) < 0;
1687
1688               vectype = STMT_VINFO_VECTYPE (stmt_info);
1689               unsigned int target_align = DR_TARGET_ALIGNMENT (dr);
1690               unsigned int dr_size = vect_get_scalar_dr_size (dr);
1691               mis = (negative ? DR_MISALIGNMENT (dr) : -DR_MISALIGNMENT (dr));
1692               if (DR_MISALIGNMENT (dr) != 0)
1693                 npeel_tmp = (mis & (target_align - 1)) / dr_size;
1694
1695               /* For multiple types, it is possible that the bigger type access
1696                  will have more than one peeling option.  E.g., a loop with two
1697                  types: one of size (vector size / 4), and the other one of
1698                  size (vector size / 8).  Vectorization factor will 8.  If both
1699                  accesses are misaligned by 3, the first one needs one scalar
1700                  iteration to be aligned, and the second one needs 5.  But the
1701                  first one will be aligned also by peeling 5 scalar
1702                  iterations, and in that case both accesses will be aligned.
1703                  Hence, except for the immediate peeling amount, we also want
1704                  to try to add full vector size, while we don't exceed
1705                  vectorization factor.
1706                  We do this automatically for cost model, since we calculate
1707                  cost for every peeling option.  */
1708               if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
1709                 {
1710                   poly_uint64 nscalars = (STMT_SLP_TYPE (stmt_info)
1711                                           ? vf * GROUP_SIZE (stmt_info) : vf);
1712                   possible_npeel_number
1713                     = vect_get_num_vectors (nscalars, vectype);
1714
1715                   /* NPEEL_TMP is 0 when there is no misalignment, but also
1716                      allow peeling NELEMENTS.  */
1717                   if (DR_MISALIGNMENT (dr) == 0)
1718                     possible_npeel_number++;
1719                 }
1720
1721               /* Save info about DR in the hash table.  Also include peeling
1722                  amounts according to the explanation above.  */
1723               for (j = 0; j < possible_npeel_number; j++)
1724                 {
1725                   vect_peeling_hash_insert (&peeling_htab, loop_vinfo,
1726                                             dr, npeel_tmp);
1727                   npeel_tmp += target_align / dr_size;
1728                 }
1729
1730               one_misalignment_known = true;
1731             }
1732           else
1733             {
1734               /* If we don't know any misalignment values, we prefer
1735                  peeling for data-ref that has the maximum number of data-refs
1736                  with the same alignment, unless the target prefers to align
1737                  stores over load.  */
1738               unsigned same_align_drs
1739                 = STMT_VINFO_SAME_ALIGN_REFS (stmt_info).length ();
1740               if (!dr0
1741                   || same_align_drs_max < same_align_drs)
1742                 {
1743                   same_align_drs_max = same_align_drs;
1744                   dr0 = dr;
1745                 }
1746               /* For data-refs with the same number of related
1747                  accesses prefer the one where the misalign
1748                  computation will be invariant in the outermost loop.  */
1749               else if (same_align_drs_max == same_align_drs)
1750                 {
1751                   struct loop *ivloop0, *ivloop;
1752                   ivloop0 = outermost_invariant_loop_for_expr
1753                     (loop, DR_BASE_ADDRESS (dr0));
1754                   ivloop = outermost_invariant_loop_for_expr
1755                     (loop, DR_BASE_ADDRESS (dr));
1756                   if ((ivloop && !ivloop0)
1757                       || (ivloop && ivloop0
1758                           && flow_loop_nested_p (ivloop, ivloop0)))
1759                     dr0 = dr;
1760                 }
1761
1762               one_misalignment_unknown = true;
1763
1764               /* Check for data refs with unsupportable alignment that
1765                  can be peeled.  */
1766               if (!supportable_dr_alignment)
1767               {
1768                 one_dr_unsupportable = true;
1769                 unsupportable_dr = dr;
1770               }
1771
1772               if (!first_store && DR_IS_WRITE (dr))
1773                 first_store = dr;
1774             }
1775         }
1776       else
1777         {
1778           if (!aligned_access_p (dr))
1779             {
1780               if (dump_enabled_p ())
1781                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1782                                  "vector alignment may not be reachable\n");
1783               break;
1784             }
1785         }
1786     }
1787
1788   /* Check if we can possibly peel the loop.  */
1789   if (!vect_can_advance_ivs_p (loop_vinfo)
1790       || !slpeel_can_duplicate_loop_p (loop, single_exit (loop))
1791       || loop->inner)
1792     do_peeling = false;
1793
1794   struct _vect_peel_extended_info peel_for_known_alignment;
1795   struct _vect_peel_extended_info peel_for_unknown_alignment;
1796   struct _vect_peel_extended_info best_peel;
1797
1798   peel_for_unknown_alignment.inside_cost = INT_MAX;
1799   peel_for_unknown_alignment.outside_cost = INT_MAX;
1800   peel_for_unknown_alignment.peel_info.count = 0;
1801
1802   if (do_peeling
1803       && one_misalignment_unknown)
1804     {
1805       /* Check if the target requires to prefer stores over loads, i.e., if
1806          misaligned stores are more expensive than misaligned loads (taking
1807          drs with same alignment into account).  */
1808       unsigned int load_inside_cost = 0;
1809       unsigned int load_outside_cost = 0;
1810       unsigned int store_inside_cost = 0;
1811       unsigned int store_outside_cost = 0;
1812       unsigned int estimated_npeels = vect_vf_for_cost (loop_vinfo) / 2;
1813
1814       stmt_vector_for_cost dummy;
1815       dummy.create (2);
1816       vect_get_peeling_costs_all_drs (datarefs, dr0,
1817                                       &load_inside_cost,
1818                                       &load_outside_cost,
1819                                       &dummy, estimated_npeels, true);
1820       dummy.release ();
1821
1822       if (first_store)
1823         {
1824           dummy.create (2);
1825           vect_get_peeling_costs_all_drs (datarefs, first_store,
1826                                           &store_inside_cost,
1827                                           &store_outside_cost,
1828                                           &dummy, estimated_npeels, true);
1829           dummy.release ();
1830         }
1831       else
1832         {
1833           store_inside_cost = INT_MAX;
1834           store_outside_cost = INT_MAX;
1835         }
1836
1837       if (load_inside_cost > store_inside_cost
1838           || (load_inside_cost == store_inside_cost
1839               && load_outside_cost > store_outside_cost))
1840         {
1841           dr0 = first_store;
1842           peel_for_unknown_alignment.inside_cost = store_inside_cost;
1843           peel_for_unknown_alignment.outside_cost = store_outside_cost;
1844         }
1845       else
1846         {
1847           peel_for_unknown_alignment.inside_cost = load_inside_cost;
1848           peel_for_unknown_alignment.outside_cost = load_outside_cost;
1849         }
1850
1851       stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
1852       prologue_cost_vec.create (2);
1853       epilogue_cost_vec.create (2);
1854
1855       int dummy2;
1856       peel_for_unknown_alignment.outside_cost += vect_get_known_peeling_cost
1857         (loop_vinfo, estimated_npeels, &dummy2,
1858          &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
1859          &prologue_cost_vec, &epilogue_cost_vec);
1860
1861       prologue_cost_vec.release ();
1862       epilogue_cost_vec.release ();
1863
1864       peel_for_unknown_alignment.peel_info.count = 1
1865         + STMT_VINFO_SAME_ALIGN_REFS
1866         (vinfo_for_stmt (DR_STMT (dr0))).length ();
1867     }
1868
1869   peel_for_unknown_alignment.peel_info.npeel = 0;
1870   peel_for_unknown_alignment.peel_info.dr = dr0;
1871
1872   best_peel = peel_for_unknown_alignment;
1873
1874   peel_for_known_alignment.inside_cost = INT_MAX;
1875   peel_for_known_alignment.outside_cost = INT_MAX;
1876   peel_for_known_alignment.peel_info.count = 0;
1877   peel_for_known_alignment.peel_info.dr = NULL;
1878
1879   if (do_peeling && one_misalignment_known)
1880     {
1881       /* Peeling is possible, but there is no data access that is not supported
1882          unless aligned.  So we try to choose the best possible peeling from
1883          the hash table.  */
1884       peel_for_known_alignment = vect_peeling_hash_choose_best_peeling
1885         (&peeling_htab, loop_vinfo);
1886     }
1887
1888   /* Compare costs of peeling for known and unknown alignment. */
1889   if (peel_for_known_alignment.peel_info.dr != NULL
1890       && peel_for_unknown_alignment.inside_cost
1891       >= peel_for_known_alignment.inside_cost)
1892     {
1893       best_peel = peel_for_known_alignment;
1894
1895       /* If the best peeling for known alignment has NPEEL == 0, perform no
1896          peeling at all except if there is an unsupportable dr that we can
1897          align.  */
1898       if (best_peel.peel_info.npeel == 0 && !one_dr_unsupportable)
1899         do_peeling = false;
1900     }
1901
1902   /* If there is an unsupportable data ref, prefer this over all choices so far
1903      since we'd have to discard a chosen peeling except when it accidentally
1904      aligned the unsupportable data ref.  */
1905   if (one_dr_unsupportable)
1906     dr0 = unsupportable_dr;
1907   else if (do_peeling)
1908     {
1909       /* Calculate the penalty for no peeling, i.e. leaving everything as-is.
1910          TODO: Use nopeel_outside_cost or get rid of it?  */
1911       unsigned nopeel_inside_cost = 0;
1912       unsigned nopeel_outside_cost = 0;
1913
1914       stmt_vector_for_cost dummy;
1915       dummy.create (2);
1916       vect_get_peeling_costs_all_drs (datarefs, NULL, &nopeel_inside_cost,
1917                                       &nopeel_outside_cost, &dummy, 0, false);
1918       dummy.release ();
1919
1920       /* Add epilogue costs.  As we do not peel for alignment here, no prologue
1921          costs will be recorded.  */
1922       stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
1923       prologue_cost_vec.create (2);
1924       epilogue_cost_vec.create (2);
1925
1926       int dummy2;
1927       nopeel_outside_cost += vect_get_known_peeling_cost
1928         (loop_vinfo, 0, &dummy2,
1929          &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
1930          &prologue_cost_vec, &epilogue_cost_vec);
1931
1932       prologue_cost_vec.release ();
1933       epilogue_cost_vec.release ();
1934
1935       npeel = best_peel.peel_info.npeel;
1936       dr0 = best_peel.peel_info.dr;
1937
1938       /* If no peeling is not more expensive than the best peeling we
1939          have so far, don't perform any peeling.  */
1940       if (nopeel_inside_cost <= best_peel.inside_cost)
1941         do_peeling = false;
1942     }
1943
1944   if (do_peeling)
1945     {
1946       stmt = DR_STMT (dr0);
1947       stmt_info = vinfo_for_stmt (stmt);
1948       vectype = STMT_VINFO_VECTYPE (stmt_info);
1949
1950       if (known_alignment_for_access_p (dr0))
1951         {
1952           bool negative = tree_int_cst_compare (DR_STEP (dr0),
1953                                                 size_zero_node) < 0;
1954           if (!npeel)
1955             {
1956               /* Since it's known at compile time, compute the number of
1957                  iterations in the peeled loop (the peeling factor) for use in
1958                  updating DR_MISALIGNMENT values.  The peeling factor is the
1959                  vectorization factor minus the misalignment as an element
1960                  count.  */
1961               mis = negative ? DR_MISALIGNMENT (dr0) : -DR_MISALIGNMENT (dr0);
1962               unsigned int target_align = DR_TARGET_ALIGNMENT (dr0);
1963               npeel = ((mis & (target_align - 1))
1964                        / vect_get_scalar_dr_size (dr0));
1965             }
1966
1967           /* For interleaved data access every iteration accesses all the
1968              members of the group, therefore we divide the number of iterations
1969              by the group size.  */
1970           stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1971           if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1972             npeel /= GROUP_SIZE (stmt_info);
1973
1974           if (dump_enabled_p ())
1975             dump_printf_loc (MSG_NOTE, vect_location,
1976                              "Try peeling by %d\n", npeel);
1977         }
1978
1979       /* Ensure that all datarefs can be vectorized after the peel.  */
1980       if (!vect_peeling_supportable (loop_vinfo, dr0, npeel))
1981         do_peeling = false;
1982
1983       /* Check if all datarefs are supportable and log.  */
1984       if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1985         {
1986           stat = vect_verify_datarefs_alignment (loop_vinfo);
1987           if (!stat)
1988             do_peeling = false;
1989           else
1990             return stat;
1991         }
1992
1993       /* Cost model #1 - honor --param vect-max-peeling-for-alignment.  */
1994       if (do_peeling)
1995         {
1996           unsigned max_allowed_peel
1997             = PARAM_VALUE (PARAM_VECT_MAX_PEELING_FOR_ALIGNMENT);
1998           if (max_allowed_peel != (unsigned)-1)
1999             {
2000               unsigned max_peel = npeel;
2001               if (max_peel == 0)
2002                 {
2003                   unsigned int target_align = DR_TARGET_ALIGNMENT (dr0);
2004                   max_peel = target_align / vect_get_scalar_dr_size (dr0) - 1;
2005                 }
2006               if (max_peel > max_allowed_peel)
2007                 {
2008                   do_peeling = false;
2009                   if (dump_enabled_p ())
2010                     dump_printf_loc (MSG_NOTE, vect_location,
2011                         "Disable peeling, max peels reached: %d\n", max_peel);
2012                 }
2013             }
2014         }
2015
2016       /* Cost model #2 - if peeling may result in a remaining loop not
2017          iterating enough to be vectorized then do not peel.  Since this
2018          is a cost heuristic rather than a correctness decision, use the
2019          most likely runtime value for variable vectorization factors.  */
2020       if (do_peeling
2021           && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2022         {
2023           unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
2024           unsigned int max_peel = npeel == 0 ? assumed_vf - 1 : npeel;
2025           if ((unsigned HOST_WIDE_INT) LOOP_VINFO_INT_NITERS (loop_vinfo)
2026               < assumed_vf + max_peel)
2027             do_peeling = false;
2028         }
2029
2030       if (do_peeling)
2031         {
2032           /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
2033              If the misalignment of DR_i is identical to that of dr0 then set
2034              DR_MISALIGNMENT (DR_i) to zero.  If the misalignment of DR_i and
2035              dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
2036              by the peeling factor times the element size of DR_i (MOD the
2037              vectorization factor times the size).  Otherwise, the
2038              misalignment of DR_i must be set to unknown.  */
2039           FOR_EACH_VEC_ELT (datarefs, i, dr)
2040             if (dr != dr0)
2041               {
2042                 /* Strided accesses perform only component accesses, alignment
2043                    is irrelevant for them.  */
2044                 stmt_info = vinfo_for_stmt (DR_STMT (dr));
2045                 if (STMT_VINFO_STRIDED_P (stmt_info)
2046                     && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
2047                   continue;
2048
2049                 vect_update_misalignment_for_peel (dr, dr0, npeel);
2050               }
2051
2052           LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
2053           if (npeel)
2054             LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
2055           else
2056             LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
2057               = DR_MISALIGNMENT (dr0);
2058           SET_DR_MISALIGNMENT (dr0, 0);
2059           if (dump_enabled_p ())
2060             {
2061               dump_printf_loc (MSG_NOTE, vect_location,
2062                                "Alignment of access forced using peeling.\n");
2063               dump_printf_loc (MSG_NOTE, vect_location,
2064                                "Peeling for alignment will be applied.\n");
2065             }
2066
2067           /* The inside-loop cost will be accounted for in vectorizable_load
2068              and vectorizable_store correctly with adjusted alignments.
2069              Drop the body_cst_vec on the floor here.  */
2070           stat = vect_verify_datarefs_alignment (loop_vinfo);
2071           gcc_assert (stat);
2072           return stat;
2073         }
2074     }
2075
2076   /* (2) Versioning to force alignment.  */
2077
2078   /* Try versioning if:
2079      1) optimize loop for speed
2080      2) there is at least one unsupported misaligned data ref with an unknown
2081         misalignment, and
2082      3) all misaligned data refs with a known misalignment are supported, and
2083      4) the number of runtime alignment checks is within reason.  */
2084
2085   do_versioning =
2086         optimize_loop_nest_for_speed_p (loop)
2087         && (!loop->inner); /* FORNOW */
2088
2089   if (do_versioning)
2090     {
2091       FOR_EACH_VEC_ELT (datarefs, i, dr)
2092         {
2093           stmt = DR_STMT (dr);
2094           stmt_info = vinfo_for_stmt (stmt);
2095
2096           /* For interleaving, only the alignment of the first access
2097              matters.  */
2098           if (aligned_access_p (dr)
2099               || (STMT_VINFO_GROUPED_ACCESS (stmt_info)
2100                   && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
2101             continue;
2102
2103           if (STMT_VINFO_STRIDED_P (stmt_info))
2104             {
2105               /* Strided loads perform only component accesses, alignment is
2106                  irrelevant for them.  */
2107               if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
2108                 continue;
2109               do_versioning = false;
2110               break;
2111             }
2112
2113           supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
2114
2115           if (!supportable_dr_alignment)
2116             {
2117               gimple *stmt;
2118               int mask;
2119               tree vectype;
2120
2121               if (known_alignment_for_access_p (dr)
2122                   || LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ()
2123                      >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
2124                 {
2125                   do_versioning = false;
2126                   break;
2127                 }
2128
2129               stmt = DR_STMT (dr);
2130               vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2131               gcc_assert (vectype);
2132
2133               /* At present we don't support versioning for alignment
2134                  with variable VF, since there's no guarantee that the
2135                  VF is a power of two.  We could relax this if we added
2136                  a way of enforcing a power-of-two size.  */
2137               unsigned HOST_WIDE_INT size;
2138               if (!GET_MODE_SIZE (TYPE_MODE (vectype)).is_constant (&size))
2139                 {
2140                   do_versioning = false;
2141                   break;
2142                 }
2143
2144               /* The rightmost bits of an aligned address must be zeros.
2145                  Construct the mask needed for this test.  For example,
2146                  GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
2147                  mask must be 15 = 0xf. */
2148               mask = size - 1;
2149
2150               /* FORNOW: use the same mask to test all potentially unaligned
2151                  references in the loop.  The vectorizer currently supports
2152                  a single vector size, see the reference to
2153                  GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2154                  vectorization factor is computed.  */
2155               gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
2156                           || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
2157               LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2158               LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).safe_push (
2159                       DR_STMT (dr));
2160             }
2161         }
2162
2163       /* Versioning requires at least one misaligned data reference.  */
2164       if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2165         do_versioning = false;
2166       else if (!do_versioning)
2167         LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
2168     }
2169
2170   if (do_versioning)
2171     {
2172       vec<gimple *> may_misalign_stmts
2173         = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2174       gimple *stmt;
2175
2176       /* It can now be assumed that the data references in the statements
2177          in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2178          of the loop being vectorized.  */
2179       FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt)
2180         {
2181           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2182           dr = STMT_VINFO_DATA_REF (stmt_info);
2183           SET_DR_MISALIGNMENT (dr, 0);
2184           if (dump_enabled_p ())
2185             dump_printf_loc (MSG_NOTE, vect_location,
2186                              "Alignment of access forced using versioning.\n");
2187         }
2188
2189       if (dump_enabled_p ())
2190         dump_printf_loc (MSG_NOTE, vect_location,
2191                          "Versioning for alignment will be applied.\n");
2192
2193       /* Peeling and versioning can't be done together at this time.  */
2194       gcc_assert (! (do_peeling && do_versioning));
2195
2196       stat = vect_verify_datarefs_alignment (loop_vinfo);
2197       gcc_assert (stat);
2198       return stat;
2199     }
2200
2201   /* This point is reached if neither peeling nor versioning is being done.  */
2202   gcc_assert (! (do_peeling || do_versioning));
2203
2204   stat = vect_verify_datarefs_alignment (loop_vinfo);
2205   return stat;
2206 }
2207
2208
2209 /* Function vect_find_same_alignment_drs.
2210
2211    Update group and alignment relations according to the chosen
2212    vectorization factor.  */
2213
2214 static void
2215 vect_find_same_alignment_drs (struct data_dependence_relation *ddr)
2216 {
2217   struct data_reference *dra = DDR_A (ddr);
2218   struct data_reference *drb = DDR_B (ddr);
2219   stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
2220   stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
2221
2222   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
2223     return;
2224
2225   if (dra == drb)
2226     return;
2227
2228   if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
2229       || !operand_equal_p (DR_OFFSET (dra), DR_OFFSET (drb), 0)
2230       || !operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
2231     return;
2232
2233   /* Two references with distance zero have the same alignment.  */
2234   poly_offset_int diff = (wi::to_poly_offset (DR_INIT (dra))
2235                           - wi::to_poly_offset (DR_INIT (drb)));
2236   if (maybe_ne (diff, 0))
2237     {
2238       /* Get the wider of the two alignments.  */
2239       unsigned int align_a = (vect_calculate_target_alignment (dra)
2240                               / BITS_PER_UNIT);
2241       unsigned int align_b = (vect_calculate_target_alignment (drb)
2242                               / BITS_PER_UNIT);
2243       unsigned int max_align = MAX (align_a, align_b);
2244
2245       /* Require the gap to be a multiple of the larger vector alignment.  */
2246       if (!multiple_p (diff, max_align))
2247         return;
2248     }
2249
2250   STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a).safe_push (drb);
2251   STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b).safe_push (dra);
2252   if (dump_enabled_p ())
2253     {
2254       dump_printf_loc (MSG_NOTE, vect_location,
2255                        "accesses have the same alignment: ");
2256       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
2257       dump_printf (MSG_NOTE,  " and ");
2258       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
2259       dump_printf (MSG_NOTE, "\n");
2260     }
2261 }
2262
2263
2264 /* Function vect_analyze_data_refs_alignment
2265
2266    Analyze the alignment of the data-references in the loop.
2267    Return FALSE if a data reference is found that cannot be vectorized.  */
2268
2269 bool
2270 vect_analyze_data_refs_alignment (loop_vec_info vinfo)
2271 {
2272   if (dump_enabled_p ())
2273     dump_printf_loc (MSG_NOTE, vect_location,
2274                      "=== vect_analyze_data_refs_alignment ===\n");
2275
2276   /* Mark groups of data references with same alignment using
2277      data dependence information.  */
2278   vec<ddr_p> ddrs = vinfo->ddrs;
2279   struct data_dependence_relation *ddr;
2280   unsigned int i;
2281
2282   FOR_EACH_VEC_ELT (ddrs, i, ddr)
2283     vect_find_same_alignment_drs (ddr);
2284
2285   vec<data_reference_p> datarefs = vinfo->datarefs;
2286   struct data_reference *dr;
2287
2288   vect_record_base_alignments (vinfo);
2289   FOR_EACH_VEC_ELT (datarefs, i, dr)
2290     {
2291       stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
2292       if (STMT_VINFO_VECTORIZABLE (stmt_info)
2293           && !vect_compute_data_ref_alignment (dr))
2294         {
2295           /* Strided accesses perform only component accesses, misalignment
2296              information is irrelevant for them.  */
2297           if (STMT_VINFO_STRIDED_P (stmt_info)
2298               && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
2299             continue;
2300
2301           if (dump_enabled_p ())
2302             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2303                              "not vectorized: can't calculate alignment "
2304                              "for data ref.\n");
2305
2306           return false;
2307         }
2308     }
2309
2310   return true;
2311 }
2312
2313
2314 /* Analyze alignment of DRs of stmts in NODE.  */
2315
2316 static bool
2317 vect_slp_analyze_and_verify_node_alignment (slp_tree node)
2318 {
2319   /* We vectorize from the first scalar stmt in the node unless
2320      the node is permuted in which case we start from the first
2321      element in the group.  */
2322   gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2323   data_reference_p first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2324   if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
2325     first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
2326
2327   data_reference_p dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2328   if (! vect_compute_data_ref_alignment (dr)
2329       /* For creating the data-ref pointer we need alignment of the
2330          first element anyway.  */
2331       || (dr != first_dr
2332           && ! vect_compute_data_ref_alignment (first_dr))
2333       || ! verify_data_ref_alignment (dr))
2334     {
2335       if (dump_enabled_p ())
2336         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2337                          "not vectorized: bad data alignment in basic "
2338                          "block.\n");
2339       return false;
2340     }
2341
2342   return true;
2343 }
2344
2345 /* Function vect_slp_analyze_instance_alignment
2346
2347    Analyze the alignment of the data-references in the SLP instance.
2348    Return FALSE if a data reference is found that cannot be vectorized.  */
2349
2350 bool
2351 vect_slp_analyze_and_verify_instance_alignment (slp_instance instance)
2352 {
2353   if (dump_enabled_p ())
2354     dump_printf_loc (MSG_NOTE, vect_location,
2355                      "=== vect_slp_analyze_and_verify_instance_alignment ===\n");
2356
2357   slp_tree node;
2358   unsigned i;
2359   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, node)
2360     if (! vect_slp_analyze_and_verify_node_alignment (node))
2361       return false;
2362
2363   node = SLP_INSTANCE_TREE (instance);
2364   if (STMT_VINFO_DATA_REF (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]))
2365       && ! vect_slp_analyze_and_verify_node_alignment
2366              (SLP_INSTANCE_TREE (instance)))
2367     return false;
2368
2369   return true;
2370 }
2371
2372
2373 /* Analyze groups of accesses: check that DR belongs to a group of
2374    accesses of legal size, step, etc.  Detect gaps, single element
2375    interleaving, and other special cases. Set grouped access info.
2376    Collect groups of strided stores for further use in SLP analysis.
2377    Worker for vect_analyze_group_access.  */
2378
2379 static bool
2380 vect_analyze_group_access_1 (struct data_reference *dr)
2381 {
2382   tree step = DR_STEP (dr);
2383   tree scalar_type = TREE_TYPE (DR_REF (dr));
2384   HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2385   gimple *stmt = DR_STMT (dr);
2386   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2387   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2388   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2389   HOST_WIDE_INT dr_step = -1;
2390   HOST_WIDE_INT groupsize, last_accessed_element = 1;
2391   bool slp_impossible = false;
2392
2393   /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
2394      size of the interleaving group (including gaps).  */
2395   if (tree_fits_shwi_p (step))
2396     {
2397       dr_step = tree_to_shwi (step);
2398       /* Check that STEP is a multiple of type size.  Otherwise there is
2399          a non-element-sized gap at the end of the group which we
2400          cannot represent in GROUP_GAP or GROUP_SIZE.
2401          ???  As we can handle non-constant step fine here we should
2402          simply remove uses of GROUP_GAP between the last and first
2403          element and instead rely on DR_STEP.  GROUP_SIZE then would
2404          simply not include that gap.  */
2405       if ((dr_step % type_size) != 0)
2406         {
2407           if (dump_enabled_p ())
2408             {
2409               dump_printf_loc (MSG_NOTE, vect_location,
2410                                "Step ");
2411               dump_generic_expr (MSG_NOTE, TDF_SLIM, step);
2412               dump_printf (MSG_NOTE,
2413                            " is not a multiple of the element size for ");
2414               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr));
2415               dump_printf (MSG_NOTE, "\n");
2416             }
2417           return false;
2418         }
2419       groupsize = absu_hwi (dr_step) / type_size;
2420     }
2421   else
2422     groupsize = 0;
2423
2424   /* Not consecutive access is possible only if it is a part of interleaving.  */
2425   if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2426     {
2427       /* Check if it this DR is a part of interleaving, and is a single
2428          element of the group that is accessed in the loop.  */
2429
2430       /* Gaps are supported only for loads. STEP must be a multiple of the type
2431          size.  */
2432       if (DR_IS_READ (dr)
2433           && (dr_step % type_size) == 0
2434           && groupsize > 0)
2435         {
2436           GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt;
2437           GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
2438           GROUP_GAP (stmt_info) = groupsize - 1;
2439           if (dump_enabled_p ())
2440             {
2441               dump_printf_loc (MSG_NOTE, vect_location,
2442                                "Detected single element interleaving ");
2443               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr));
2444               dump_printf (MSG_NOTE, " step ");
2445               dump_generic_expr (MSG_NOTE, TDF_SLIM, step);
2446               dump_printf (MSG_NOTE, "\n");
2447             }
2448
2449           return true;
2450         }
2451
2452       if (dump_enabled_p ())
2453         {
2454           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2455                            "not consecutive access ");
2456           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2457         }
2458
2459       if (bb_vinfo)
2460         {
2461           /* Mark the statement as unvectorizable.  */
2462           STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2463           return true;
2464         }
2465
2466       dump_printf_loc (MSG_NOTE, vect_location, "using strided accesses\n");
2467       STMT_VINFO_STRIDED_P (stmt_info) = true;
2468       return true;
2469     }
2470
2471   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
2472     {
2473       /* First stmt in the interleaving chain. Check the chain.  */
2474       gimple *next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
2475       struct data_reference *data_ref = dr;
2476       unsigned int count = 1;
2477       tree prev_init = DR_INIT (data_ref);
2478       gimple *prev = stmt;
2479       HOST_WIDE_INT diff, gaps = 0;
2480
2481       /* By construction, all group members have INTEGER_CST DR_INITs.  */
2482       while (next)
2483         {
2484           /* Skip same data-refs.  In case that two or more stmts share
2485              data-ref (supported only for loads), we vectorize only the first
2486              stmt, and the rest get their vectorized loads from the first
2487              one.  */
2488           if (!tree_int_cst_compare (DR_INIT (data_ref),
2489                                      DR_INIT (STMT_VINFO_DATA_REF (
2490                                                    vinfo_for_stmt (next)))))
2491             {
2492               if (DR_IS_WRITE (data_ref))
2493                 {
2494                   if (dump_enabled_p ())
2495                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2496                                      "Two store stmts share the same dr.\n");
2497                   return false;
2498                 }
2499
2500               if (dump_enabled_p ())
2501                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2502                                  "Two or more load stmts share the same dr.\n");
2503
2504               /* For load use the same data-ref load.  */
2505               GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2506
2507               prev = next;
2508               next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2509               continue;
2510             }
2511
2512           prev = next;
2513           data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2514
2515           /* All group members have the same STEP by construction.  */
2516           gcc_checking_assert (operand_equal_p (DR_STEP (data_ref), step, 0));
2517
2518           /* Check that the distance between two accesses is equal to the type
2519              size. Otherwise, we have gaps.  */
2520           diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2521                   - TREE_INT_CST_LOW (prev_init)) / type_size;
2522           if (diff != 1)
2523             {
2524               /* FORNOW: SLP of accesses with gaps is not supported.  */
2525               slp_impossible = true;
2526               if (DR_IS_WRITE (data_ref))
2527                 {
2528                   if (dump_enabled_p ())
2529                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2530                                      "interleaved store with gaps\n");
2531                   return false;
2532                 }
2533
2534               gaps += diff - 1;
2535             }
2536
2537           last_accessed_element += diff;
2538
2539           /* Store the gap from the previous member of the group. If there is no
2540              gap in the access, GROUP_GAP is always 1.  */
2541           GROUP_GAP (vinfo_for_stmt (next)) = diff;
2542
2543           prev_init = DR_INIT (data_ref);
2544           next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2545           /* Count the number of data-refs in the chain.  */
2546           count++;
2547         }
2548
2549       if (groupsize == 0)
2550         groupsize = count + gaps;
2551
2552       /* This could be UINT_MAX but as we are generating code in a very
2553          inefficient way we have to cap earlier.  See PR78699 for example.  */
2554       if (groupsize > 4096)
2555         {
2556           if (dump_enabled_p ())
2557             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2558                              "group is too large\n");
2559           return false;
2560         }
2561
2562       /* Check that the size of the interleaving is equal to count for stores,
2563          i.e., that there are no gaps.  */
2564       if (groupsize != count
2565           && !DR_IS_READ (dr))
2566         {
2567           if (dump_enabled_p ())
2568             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2569                              "interleaved store with gaps\n");
2570           return false;
2571         }
2572
2573       /* If there is a gap after the last load in the group it is the
2574          difference between the groupsize and the last accessed
2575          element.
2576          When there is no gap, this difference should be 0.  */
2577       GROUP_GAP (vinfo_for_stmt (stmt)) = groupsize - last_accessed_element;
2578
2579       GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
2580       if (dump_enabled_p ())
2581         {
2582           dump_printf_loc (MSG_NOTE, vect_location,
2583                            "Detected interleaving ");
2584           if (DR_IS_READ (dr))
2585             dump_printf (MSG_NOTE, "load ");
2586           else
2587             dump_printf (MSG_NOTE, "store ");
2588           dump_printf (MSG_NOTE, "of size %u starting with ",
2589                        (unsigned)groupsize);
2590           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2591           if (GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
2592             dump_printf_loc (MSG_NOTE, vect_location,
2593                              "There is a gap of %u elements after the group\n",
2594                              GROUP_GAP (vinfo_for_stmt (stmt)));
2595         }
2596
2597       /* SLP: create an SLP data structure for every interleaving group of
2598          stores for further analysis in vect_analyse_slp.  */
2599       if (DR_IS_WRITE (dr) && !slp_impossible)
2600         {
2601           if (loop_vinfo)
2602             LOOP_VINFO_GROUPED_STORES (loop_vinfo).safe_push (stmt);
2603           if (bb_vinfo)
2604             BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt);
2605         }
2606     }
2607
2608   return true;
2609 }
2610
2611 /* Analyze groups of accesses: check that DR belongs to a group of
2612    accesses of legal size, step, etc.  Detect gaps, single element
2613    interleaving, and other special cases. Set grouped access info.
2614    Collect groups of strided stores for further use in SLP analysis.  */
2615
2616 static bool
2617 vect_analyze_group_access (struct data_reference *dr)
2618 {
2619   if (!vect_analyze_group_access_1 (dr))
2620     {
2621       /* Dissolve the group if present.  */
2622       gimple *next;
2623       gimple *stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (dr)));
2624       while (stmt)
2625         {
2626           stmt_vec_info vinfo = vinfo_for_stmt (stmt);
2627           next = GROUP_NEXT_ELEMENT (vinfo);
2628           GROUP_FIRST_ELEMENT (vinfo) = NULL;
2629           GROUP_NEXT_ELEMENT (vinfo) = NULL;
2630           stmt = next;
2631         }
2632       return false;
2633     }
2634   return true;
2635 }
2636
2637 /* Analyze the access pattern of the data-reference DR.
2638    In case of non-consecutive accesses call vect_analyze_group_access() to
2639    analyze groups of accesses.  */
2640
2641 static bool
2642 vect_analyze_data_ref_access (struct data_reference *dr)
2643 {
2644   tree step = DR_STEP (dr);
2645   tree scalar_type = TREE_TYPE (DR_REF (dr));
2646   gimple *stmt = DR_STMT (dr);
2647   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2648   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2649   struct loop *loop = NULL;
2650
2651   if (loop_vinfo)
2652     loop = LOOP_VINFO_LOOP (loop_vinfo);
2653
2654   if (loop_vinfo && !step)
2655     {
2656       if (dump_enabled_p ())
2657         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2658                          "bad data-ref access in loop\n");
2659       return false;
2660     }
2661
2662   /* Allow loads with zero step in inner-loop vectorization.  */
2663   if (loop_vinfo && integer_zerop (step))
2664     {
2665       GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2666       if (!nested_in_vect_loop_p (loop, stmt))
2667         return DR_IS_READ (dr);
2668       /* Allow references with zero step for outer loops marked
2669          with pragma omp simd only - it guarantees absence of
2670          loop-carried dependencies between inner loop iterations.  */
2671       if (!loop->force_vectorize)
2672         {
2673           if (dump_enabled_p ())
2674             dump_printf_loc (MSG_NOTE, vect_location,
2675                              "zero step in inner loop of nest\n");
2676           return false;
2677         }
2678     }
2679
2680   if (loop && nested_in_vect_loop_p (loop, stmt))
2681     {
2682       /* Interleaved accesses are not yet supported within outer-loop
2683         vectorization for references in the inner-loop.  */
2684       GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2685
2686       /* For the rest of the analysis we use the outer-loop step.  */
2687       step = STMT_VINFO_DR_STEP (stmt_info);
2688       if (integer_zerop (step))
2689         {
2690           if (dump_enabled_p ())
2691             dump_printf_loc (MSG_NOTE, vect_location,
2692                              "zero step in outer loop.\n");
2693           return DR_IS_READ (dr);
2694         }
2695     }
2696
2697   /* Consecutive?  */
2698   if (TREE_CODE (step) == INTEGER_CST)
2699     {
2700       HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2701       if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2702           || (dr_step < 0
2703               && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2704         {
2705           /* Mark that it is not interleaving.  */
2706           GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2707           return true;
2708         }
2709     }
2710
2711   if (loop && nested_in_vect_loop_p (loop, stmt))
2712     {
2713       if (dump_enabled_p ())
2714         dump_printf_loc (MSG_NOTE, vect_location,
2715                          "grouped access in outer loop.\n");
2716       return false;
2717     }
2718
2719
2720   /* Assume this is a DR handled by non-constant strided load case.  */
2721   if (TREE_CODE (step) != INTEGER_CST)
2722     return (STMT_VINFO_STRIDED_P (stmt_info)
2723             && (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2724                 || vect_analyze_group_access (dr)));
2725
2726   /* Not consecutive access - check if it's a part of interleaving group.  */
2727   return vect_analyze_group_access (dr);
2728 }
2729
2730 /* Compare two data-references DRA and DRB to group them into chunks
2731    suitable for grouping.  */
2732
2733 static int
2734 dr_group_sort_cmp (const void *dra_, const void *drb_)
2735 {
2736   data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_);
2737   data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_);
2738   int cmp;
2739
2740   /* Stabilize sort.  */
2741   if (dra == drb)
2742     return 0;
2743
2744   /* DRs in different loops never belong to the same group.  */
2745   loop_p loopa = gimple_bb (DR_STMT (dra))->loop_father;
2746   loop_p loopb = gimple_bb (DR_STMT (drb))->loop_father;
2747   if (loopa != loopb)
2748     return loopa->num < loopb->num ? -1 : 1;
2749
2750   /* Ordering of DRs according to base.  */
2751   cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra),
2752                                DR_BASE_ADDRESS (drb));
2753   if (cmp != 0)
2754     return cmp;
2755
2756   /* And according to DR_OFFSET.  */
2757   cmp = data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
2758   if (cmp != 0)
2759     return cmp;
2760
2761   /* Put reads before writes.  */
2762   if (DR_IS_READ (dra) != DR_IS_READ (drb))
2763     return DR_IS_READ (dra) ? -1 : 1;
2764
2765   /* Then sort after access size.  */
2766   cmp = data_ref_compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
2767                                TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
2768   if (cmp != 0)
2769     return cmp;
2770
2771   /* And after step.  */
2772   cmp = data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb));
2773   if (cmp != 0)
2774     return cmp;
2775
2776   /* Then sort after DR_INIT.  In case of identical DRs sort after stmt UID.  */
2777   cmp = data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb));
2778   if (cmp == 0)
2779     return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -1 : 1;
2780   return cmp;
2781 }
2782
2783 /* If OP is the result of a conversion, return the unconverted value,
2784    otherwise return null.  */
2785
2786 static tree
2787 strip_conversion (tree op)
2788 {
2789   if (TREE_CODE (op) != SSA_NAME)
2790     return NULL_TREE;
2791   gimple *stmt = SSA_NAME_DEF_STMT (op);
2792   if (!is_gimple_assign (stmt)
2793       || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)))
2794     return NULL_TREE;
2795   return gimple_assign_rhs1 (stmt);
2796 }
2797
2798 /* Return true if vectorizable_* routines can handle statements STMT1
2799    and STMT2 being in a single group.  */
2800
2801 static bool
2802 can_group_stmts_p (gimple *stmt1, gimple *stmt2)
2803 {
2804   if (gimple_assign_single_p (stmt1))
2805     return gimple_assign_single_p (stmt2);
2806
2807   if (is_gimple_call (stmt1) && gimple_call_internal_p (stmt1))
2808     {
2809       /* Check for two masked loads or two masked stores.  */
2810       if (!is_gimple_call (stmt2) || !gimple_call_internal_p (stmt2))
2811         return false;
2812       internal_fn ifn = gimple_call_internal_fn (stmt1);
2813       if (ifn != IFN_MASK_LOAD && ifn != IFN_MASK_STORE)
2814         return false;
2815       if (ifn != gimple_call_internal_fn (stmt2))
2816         return false;
2817
2818       /* Check that the masks are the same.  Cope with casts of masks,
2819          like those created by build_mask_conversion.  */
2820       tree mask1 = gimple_call_arg (stmt1, 2);
2821       tree mask2 = gimple_call_arg (stmt2, 2);
2822       if (!operand_equal_p (mask1, mask2, 0))
2823         {
2824           mask1 = strip_conversion (mask1);
2825           if (!mask1)
2826             return false;
2827           mask2 = strip_conversion (mask2);
2828           if (!mask2)
2829             return false;
2830           if (!operand_equal_p (mask1, mask2, 0))
2831             return false;
2832         }
2833       return true;
2834     }
2835
2836   return false;
2837 }
2838
2839 /* Function vect_analyze_data_ref_accesses.
2840
2841    Analyze the access pattern of all the data references in the loop.
2842
2843    FORNOW: the only access pattern that is considered vectorizable is a
2844            simple step 1 (consecutive) access.
2845
2846    FORNOW: handle only arrays and pointer accesses.  */
2847
2848 bool
2849 vect_analyze_data_ref_accesses (vec_info *vinfo)
2850 {
2851   unsigned int i;
2852   vec<data_reference_p> datarefs = vinfo->datarefs;
2853   struct data_reference *dr;
2854
2855   if (dump_enabled_p ())
2856     dump_printf_loc (MSG_NOTE, vect_location,
2857                      "=== vect_analyze_data_ref_accesses ===\n");
2858
2859   if (datarefs.is_empty ())
2860     return true;
2861
2862   /* Sort the array of datarefs to make building the interleaving chains
2863      linear.  Don't modify the original vector's order, it is needed for
2864      determining what dependencies are reversed.  */
2865   vec<data_reference_p> datarefs_copy = datarefs.copy ();
2866   datarefs_copy.qsort (dr_group_sort_cmp);
2867
2868   /* Build the interleaving chains.  */
2869   for (i = 0; i < datarefs_copy.length () - 1;)
2870     {
2871       data_reference_p dra = datarefs_copy[i];
2872       stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
2873       stmt_vec_info lastinfo = NULL;
2874       if (! STMT_VINFO_VECTORIZABLE (stmtinfo_a))
2875         {
2876           ++i;
2877           continue;
2878         }
2879       for (i = i + 1; i < datarefs_copy.length (); ++i)
2880         {
2881           data_reference_p drb = datarefs_copy[i];
2882           stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
2883           if (! STMT_VINFO_VECTORIZABLE (stmtinfo_b))
2884             break;
2885
2886           /* ???  Imperfect sorting (non-compatible types, non-modulo
2887              accesses, same accesses) can lead to a group to be artificially
2888              split here as we don't just skip over those.  If it really
2889              matters we can push those to a worklist and re-iterate
2890              over them.  The we can just skip ahead to the next DR here.  */
2891
2892           /* DRs in a different loop should not be put into the same
2893              interleaving group.  */
2894           if (gimple_bb (DR_STMT (dra))->loop_father
2895               != gimple_bb (DR_STMT (drb))->loop_father)
2896             break;
2897
2898           /* Check that the data-refs have same first location (except init)
2899              and they are both either store or load (not load and store,
2900              not masked loads or stores).  */
2901           if (DR_IS_READ (dra) != DR_IS_READ (drb)
2902               || data_ref_compare_tree (DR_BASE_ADDRESS (dra),
2903                                         DR_BASE_ADDRESS (drb)) != 0
2904               || data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)) != 0
2905               || !can_group_stmts_p (DR_STMT (dra), DR_STMT (drb)))
2906             break;
2907
2908           /* Check that the data-refs have the same constant size.  */
2909           tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
2910           tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
2911           if (!tree_fits_uhwi_p (sza)
2912               || !tree_fits_uhwi_p (szb)
2913               || !tree_int_cst_equal (sza, szb))
2914             break;
2915
2916           /* Check that the data-refs have the same step.  */
2917           if (data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb)) != 0)
2918             break;
2919
2920           /* Check the types are compatible.
2921              ???  We don't distinguish this during sorting.  */
2922           if (!types_compatible_p (TREE_TYPE (DR_REF (dra)),
2923                                    TREE_TYPE (DR_REF (drb))))
2924             break;
2925
2926           /* Check that the DR_INITs are compile-time constants.  */
2927           if (TREE_CODE (DR_INIT (dra)) != INTEGER_CST
2928               || TREE_CODE (DR_INIT (drb)) != INTEGER_CST)
2929             break;
2930
2931           /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb).  */
2932           HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
2933           HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
2934           HOST_WIDE_INT init_prev
2935             = TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1]));
2936           gcc_assert (init_a <= init_b
2937                       && init_a <= init_prev
2938                       && init_prev <= init_b);
2939
2940           /* Do not place the same access in the interleaving chain twice.  */
2941           if (init_b == init_prev)
2942             {
2943               gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1]))
2944                           < gimple_uid (DR_STMT (drb)));
2945               /* ???  For now we simply "drop" the later reference which is
2946                  otherwise the same rather than finishing off this group.
2947                  In the end we'd want to re-process duplicates forming
2948                  multiple groups from the refs, likely by just collecting
2949                  all candidates (including duplicates and split points
2950                  below) in a vector and then process them together.  */
2951               continue;
2952             }
2953
2954           /* If init_b == init_a + the size of the type * k, we have an
2955              interleaving, and DRA is accessed before DRB.  */
2956           HOST_WIDE_INT type_size_a = tree_to_uhwi (sza);
2957           if (type_size_a == 0
2958               || (init_b - init_a) % type_size_a != 0)
2959             break;
2960
2961           /* If we have a store, the accesses are adjacent.  This splits
2962              groups into chunks we support (we don't support vectorization
2963              of stores with gaps).  */
2964           if (!DR_IS_READ (dra) && init_b - init_prev != type_size_a)
2965             break;
2966
2967           /* If the step (if not zero or non-constant) is greater than the
2968              difference between data-refs' inits this splits groups into
2969              suitable sizes.  */
2970           if (tree_fits_shwi_p (DR_STEP (dra)))
2971             {
2972               HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
2973               if (step != 0 && step <= (init_b - init_a))
2974                 break;
2975             }
2976
2977           if (dump_enabled_p ())
2978             {
2979               dump_printf_loc (MSG_NOTE, vect_location,
2980                                "Detected interleaving ");
2981               if (DR_IS_READ (dra))
2982                 dump_printf (MSG_NOTE, "load ");
2983               else
2984                 dump_printf (MSG_NOTE, "store ");
2985               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
2986               dump_printf (MSG_NOTE,  " and ");
2987               dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
2988               dump_printf (MSG_NOTE, "\n");
2989             }
2990
2991           /* Link the found element into the group list.  */
2992           if (!GROUP_FIRST_ELEMENT (stmtinfo_a))
2993             {
2994               GROUP_FIRST_ELEMENT (stmtinfo_a) = DR_STMT (dra);
2995               lastinfo = stmtinfo_a;
2996             }
2997           GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (dra);
2998           GROUP_NEXT_ELEMENT (lastinfo) = DR_STMT (drb);
2999           lastinfo = stmtinfo_b;
3000         }
3001     }
3002
3003   FOR_EACH_VEC_ELT (datarefs_copy, i, dr)
3004     if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
3005         && !vect_analyze_data_ref_access (dr))
3006       {
3007         if (dump_enabled_p ())
3008           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3009                            "not vectorized: complicated access pattern.\n");
3010
3011         if (is_a <bb_vec_info> (vinfo))
3012           {
3013             /* Mark the statement as not vectorizable.  */
3014             STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
3015             continue;
3016           }
3017         else
3018           {
3019             datarefs_copy.release ();
3020             return false;
3021           }
3022       }
3023
3024   datarefs_copy.release ();
3025   return true;
3026 }
3027
3028 /* Function vect_vfa_segment_size.
3029
3030    Create an expression that computes the size of segment
3031    that will be accessed for a data reference.  The functions takes into
3032    account that realignment loads may access one more vector.
3033
3034    Input:
3035      DR: The data reference.
3036      LENGTH_FACTOR: segment length to consider.
3037
3038    Return an expression whose value is the size of segment which will be
3039    accessed by DR.  */
3040
3041 static tree
3042 vect_vfa_segment_size (struct data_reference *dr, tree length_factor)
3043 {
3044   tree segment_length;
3045
3046   if (integer_zerop (DR_STEP (dr)))
3047     segment_length = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
3048   else
3049     segment_length = size_binop (MULT_EXPR,
3050                                  fold_convert (sizetype, DR_STEP (dr)),
3051                                  fold_convert (sizetype, length_factor));
3052
3053   if (vect_supportable_dr_alignment (dr, false)
3054         == dr_explicit_realign_optimized)
3055     {
3056       tree vector_size = TYPE_SIZE_UNIT
3057                           (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
3058
3059       segment_length = size_binop (PLUS_EXPR, segment_length, vector_size);
3060     }
3061   return segment_length;
3062 }
3063
3064 /* Function vect_no_alias_p.
3065
3066    Given data references A and B with equal base and offset, see whether
3067    the alias relation can be decided at compilation time.  Return 1 if
3068    it can and the references alias, 0 if it can and the references do
3069    not alias, and -1 if we cannot decide at compile time.  SEGMENT_LENGTH_A
3070    and SEGMENT_LENGTH_B are the memory lengths accessed by A and B
3071    respectively.  */
3072
3073 static int
3074 vect_compile_time_alias (struct data_reference *a, struct data_reference *b,
3075                          tree segment_length_a, tree segment_length_b)
3076 {
3077   poly_offset_int offset_a = wi::to_poly_offset (DR_INIT (a));
3078   poly_offset_int offset_b = wi::to_poly_offset (DR_INIT (b));
3079   poly_uint64 const_length_a;
3080   poly_uint64 const_length_b;
3081
3082   /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
3083      bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
3084      [a, a+12) */
3085   if (tree_int_cst_compare (DR_STEP (a), size_zero_node) < 0)
3086     {
3087       const_length_a = (-wi::to_poly_wide (segment_length_a)).force_uhwi ();
3088       offset_a = (offset_a + vect_get_scalar_dr_size (a)) - const_length_a;
3089     }
3090   else
3091     const_length_a = tree_to_poly_uint64 (segment_length_a);
3092   if (tree_int_cst_compare (DR_STEP (b), size_zero_node) < 0)
3093     {
3094       const_length_b = (-wi::to_poly_wide (segment_length_b)).force_uhwi ();
3095       offset_b = (offset_b + vect_get_scalar_dr_size (b)) - const_length_b;
3096     }
3097   else
3098     const_length_b = tree_to_poly_uint64 (segment_length_b);
3099
3100   if (ranges_known_overlap_p (offset_a, const_length_a,
3101                               offset_b, const_length_b))
3102     return 1;
3103
3104   if (!ranges_maybe_overlap_p (offset_a, const_length_a,
3105                                offset_b, const_length_b))
3106     return 0;
3107
3108   return -1;
3109 }
3110
3111 /* Return true if the minimum nonzero dependence distance for loop LOOP_DEPTH
3112    in DDR is >= VF.  */
3113
3114 static bool
3115 dependence_distance_ge_vf (data_dependence_relation *ddr,
3116                            unsigned int loop_depth, poly_uint64 vf)
3117 {
3118   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE
3119       || DDR_NUM_DIST_VECTS (ddr) == 0)
3120     return false;
3121
3122   /* If the dependence is exact, we should have limited the VF instead.  */
3123   gcc_checking_assert (DDR_COULD_BE_INDEPENDENT_P (ddr));
3124
3125   unsigned int i;
3126   lambda_vector dist_v;
3127   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
3128     {
3129       HOST_WIDE_INT dist = dist_v[loop_depth];
3130       if (dist != 0
3131           && !(dist > 0 && DDR_REVERSED_P (ddr))
3132           && maybe_lt ((unsigned HOST_WIDE_INT) abs_hwi (dist), vf))
3133         return false;
3134     }
3135
3136   if (dump_enabled_p ())
3137     {
3138       dump_printf_loc (MSG_NOTE, vect_location,
3139                        "dependence distance between ");
3140       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
3141       dump_printf (MSG_NOTE,  " and ");
3142       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
3143       dump_printf (MSG_NOTE,  " is >= VF\n");
3144     }
3145
3146   return true;
3147 }
3148
3149 /* Function vect_prune_runtime_alias_test_list.
3150
3151    Prune a list of ddrs to be tested at run-time by versioning for alias.
3152    Merge several alias checks into one if possible.
3153    Return FALSE if resulting list of ddrs is longer then allowed by
3154    PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE.  */
3155
3156 bool
3157 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
3158 {
3159   typedef pair_hash <tree_operand_hash, tree_operand_hash> tree_pair_hash;
3160   hash_set <tree_pair_hash> compared_objects;
3161
3162   vec<ddr_p> may_alias_ddrs = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
3163   vec<dr_with_seg_len_pair_t> &comp_alias_ddrs
3164     = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
3165   vec<vec_object_pair> &check_unequal_addrs
3166     = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
3167   poly_uint64 vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3168   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
3169
3170   ddr_p ddr;
3171   unsigned int i;
3172   tree length_factor;
3173
3174   if (dump_enabled_p ())
3175     dump_printf_loc (MSG_NOTE, vect_location,
3176                      "=== vect_prune_runtime_alias_test_list ===\n");
3177
3178   if (may_alias_ddrs.is_empty ())
3179     return true;
3180
3181   comp_alias_ddrs.create (may_alias_ddrs.length ());
3182
3183   unsigned int loop_depth
3184     = index_in_loop_nest (LOOP_VINFO_LOOP (loop_vinfo)->num,
3185                           LOOP_VINFO_LOOP_NEST (loop_vinfo));
3186
3187   /* First, we collect all data ref pairs for aliasing checks.  */
3188   FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
3189     {
3190       int comp_res;
3191       struct data_reference *dr_a, *dr_b;
3192       gimple *dr_group_first_a, *dr_group_first_b;
3193       tree segment_length_a, segment_length_b;
3194       gimple *stmt_a, *stmt_b;
3195
3196       /* Ignore the alias if the VF we chose ended up being no greater
3197          than the dependence distance.  */
3198       if (dependence_distance_ge_vf (ddr, loop_depth, vect_factor))
3199         continue;
3200
3201       if (DDR_OBJECT_A (ddr))
3202         {
3203           vec_object_pair new_pair (DDR_OBJECT_A (ddr), DDR_OBJECT_B (ddr));
3204           if (!compared_objects.add (new_pair))
3205             {
3206               if (dump_enabled_p ())
3207                 {
3208                   dump_printf_loc (MSG_NOTE, vect_location, "checking that ");
3209                   dump_generic_expr (MSG_NOTE, TDF_SLIM, new_pair.first);
3210                   dump_printf (MSG_NOTE, " and ");
3211                   dump_generic_expr (MSG_NOTE, TDF_SLIM, new_pair.second);
3212                   dump_printf (MSG_NOTE, " have different addresses\n");
3213                 }
3214               LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).safe_push (new_pair);
3215             }
3216           continue;
3217         }
3218
3219       dr_a = DDR_A (ddr);
3220       stmt_a = DR_STMT (DDR_A (ddr));
3221       dr_group_first_a = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a));
3222       if (dr_group_first_a)
3223         {
3224           stmt_a = dr_group_first_a;
3225           dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
3226         }
3227
3228       dr_b = DDR_B (ddr);
3229       stmt_b = DR_STMT (DDR_B (ddr));
3230       dr_group_first_b = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_b));
3231       if (dr_group_first_b)
3232         {
3233           stmt_b = dr_group_first_b;
3234           dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
3235         }
3236
3237       if (!operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0))
3238         length_factor = scalar_loop_iters;
3239       else
3240         length_factor = size_int (vect_factor);
3241       segment_length_a = vect_vfa_segment_size (dr_a, length_factor);
3242       segment_length_b = vect_vfa_segment_size (dr_b, length_factor);
3243
3244       comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
3245                                         DR_BASE_ADDRESS (dr_b));
3246       if (comp_res == 0)
3247         comp_res = data_ref_compare_tree (DR_OFFSET (dr_a),
3248                                           DR_OFFSET (dr_b));
3249
3250       /* See whether the alias is known at compilation time.  */
3251       if (comp_res == 0
3252           && TREE_CODE (DR_STEP (dr_a)) == INTEGER_CST
3253           && TREE_CODE (DR_STEP (dr_b)) == INTEGER_CST
3254           && poly_int_tree_p (segment_length_a)
3255           && poly_int_tree_p (segment_length_b))
3256         {
3257           int res = vect_compile_time_alias (dr_a, dr_b,
3258                                              segment_length_a,
3259                                              segment_length_b);
3260           if (res == 0)
3261             continue;
3262
3263           if (res == 1)
3264             {
3265               if (dump_enabled_p ())
3266                 dump_printf_loc (MSG_NOTE, vect_location,
3267                                  "not vectorized: compilation time alias.\n");
3268               return false;
3269             }
3270         }
3271
3272       dr_with_seg_len_pair_t dr_with_seg_len_pair
3273           (dr_with_seg_len (dr_a, segment_length_a),
3274            dr_with_seg_len (dr_b, segment_length_b));
3275
3276       /* Canonicalize pairs by sorting the two DR members.  */
3277       if (comp_res > 0)
3278         std::swap (dr_with_seg_len_pair.first, dr_with_seg_len_pair.second);
3279
3280       comp_alias_ddrs.safe_push (dr_with_seg_len_pair);
3281     }
3282
3283   prune_runtime_alias_test_list (&comp_alias_ddrs, vect_factor);
3284
3285   unsigned int count = (comp_alias_ddrs.length ()
3286                         + check_unequal_addrs.length ());
3287   dump_printf_loc (MSG_NOTE, vect_location,
3288                    "improved number of alias checks from %d to %d\n",
3289                    may_alias_ddrs.length (), count);
3290   if ((int) count > PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
3291     {
3292       if (dump_enabled_p ())
3293         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3294                          "number of versioning for alias "
3295                          "run-time tests exceeds %d "
3296                          "(--param vect-max-version-for-alias-checks)\n",
3297                          PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
3298       return false;
3299     }
3300
3301   return true;
3302 }
3303
3304 /* Check whether we can use an internal function for a gather load
3305    or scatter store.  READ_P is true for loads and false for stores.
3306    MASKED_P is true if the load or store is conditional.  MEMORY_TYPE is
3307    the type of the memory elements being loaded or stored.  OFFSET_BITS
3308    is the number of bits in each scalar offset and OFFSET_SIGN is the
3309    sign of the offset.  SCALE is the amount by which the offset should
3310    be multiplied *after* it has been converted to address width.
3311
3312    Return true if the function is supported, storing the function
3313    id in *IFN_OUT and the type of a vector element in *ELEMENT_TYPE_OUT.  */
3314
3315 static bool
3316 vect_gather_scatter_fn_p (bool read_p, bool masked_p, tree vectype,
3317                           tree memory_type, unsigned int offset_bits,
3318                           signop offset_sign, int scale,
3319                           internal_fn *ifn_out, tree *element_type_out)
3320 {
3321   unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
3322   unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype)));
3323   if (offset_bits > element_bits)
3324     /* Internal functions require the offset to be the same width as
3325        the vector elements.  We can extend narrower offsets, but it isn't
3326        safe to truncate wider offsets.  */
3327     return false;
3328
3329   if (element_bits != memory_bits)
3330     /* For now the vector elements must be the same width as the
3331        memory elements.  */
3332     return false;
3333
3334   /* Work out which function we need.  */
3335   internal_fn ifn;
3336   if (read_p)
3337     ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
3338   else
3339     return false;
3340
3341   /* Test whether the target supports this combination.  */
3342   if (!internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
3343                                                offset_sign, scale))
3344     return false;
3345
3346   *ifn_out = ifn;
3347   *element_type_out = TREE_TYPE (vectype);
3348   return true;
3349 }
3350
3351 /* CALL is a call to an internal gather load or scatter store function.
3352    Describe the operation in INFO.  */
3353
3354 static void
3355 vect_describe_gather_scatter_call (gcall *call, gather_scatter_info *info)
3356 {
3357   stmt_vec_info stmt_info = vinfo_for_stmt (call);
3358   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3359   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3360
3361   info->ifn = gimple_call_internal_fn (call);
3362   info->decl = NULL_TREE;
3363   info->base = gimple_call_arg (call, 0);
3364   info->offset = gimple_call_arg (call, 1);
3365   info->offset_dt = vect_unknown_def_type;
3366   info->offset_vectype = NULL_TREE;
3367   info->scale = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
3368   info->element_type = TREE_TYPE (vectype);
3369   info->memory_type = TREE_TYPE (DR_REF (dr));
3370 }
3371
3372 /* Return true if a non-affine read or write in STMT is suitable for a
3373    gather load or scatter store.  Describe the operation in *INFO if so.  */
3374
3375 bool
3376 vect_check_gather_scatter (gimple *stmt, loop_vec_info loop_vinfo,
3377                            gather_scatter_info *info)
3378 {
3379   HOST_WIDE_INT scale = 1;
3380   poly_int64 pbitpos, pbitsize;
3381   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3382   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3383   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3384   tree offtype = NULL_TREE;
3385   tree decl = NULL_TREE, base, off;
3386   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3387   tree memory_type = TREE_TYPE (DR_REF (dr));
3388   machine_mode pmode;
3389   int punsignedp, reversep, pvolatilep = 0;
3390   internal_fn ifn;
3391   tree element_type;
3392   bool masked_p = false;
3393
3394   /* See whether this is already a call to a gather/scatter internal function.
3395      If not, see whether it's a masked load or store.  */
3396   gcall *call = dyn_cast <gcall *> (stmt);
3397   if (call && gimple_call_internal_p (call))
3398     {
3399       ifn = gimple_call_internal_fn (stmt);
3400       if (internal_gather_scatter_fn_p (ifn))
3401         {
3402           vect_describe_gather_scatter_call (call, info);
3403           return true;
3404         }
3405       masked_p = (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE);
3406     }
3407
3408   /* True if we should aim to use internal functions rather than
3409      built-in functions.  */
3410   bool use_ifn_p = (DR_IS_READ (dr)
3411                     && supports_vec_gather_load_p ());
3412
3413   base = DR_REF (dr);
3414   /* For masked loads/stores, DR_REF (dr) is an artificial MEM_REF,
3415      see if we can use the def stmt of the address.  */
3416   if (masked_p
3417       && TREE_CODE (base) == MEM_REF
3418       && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
3419       && integer_zerop (TREE_OPERAND (base, 1))
3420       && !expr_invariant_in_loop_p (loop, TREE_OPERAND (base, 0)))
3421     {
3422       gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base, 0));
3423       if (is_gimple_assign (def_stmt)
3424           && gimple_assign_rhs_code (def_stmt) == ADDR_EXPR)
3425         base = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
3426     }
3427
3428   /* The gather and scatter builtins need address of the form
3429      loop_invariant + vector * {1, 2, 4, 8}
3430      or
3431      loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
3432      Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
3433      of loop invariants/SSA_NAMEs defined in the loop, with casts,
3434      multiplications and additions in it.  To get a vector, we need
3435      a single SSA_NAME that will be defined in the loop and will
3436      contain everything that is not loop invariant and that can be
3437      vectorized.  The following code attempts to find such a preexistng
3438      SSA_NAME OFF and put the loop invariants into a tree BASE
3439      that can be gimplified before the loop.  */
3440   base = get_inner_reference (base, &pbitsize, &pbitpos, &off, &pmode,
3441                               &punsignedp, &reversep, &pvolatilep);
3442   gcc_assert (base && !reversep);
3443   poly_int64 pbytepos = exact_div (pbitpos, BITS_PER_UNIT);
3444
3445   if (TREE_CODE (base) == MEM_REF)
3446     {
3447       if (!integer_zerop (TREE_OPERAND (base, 1)))
3448         {
3449           if (off == NULL_TREE)
3450             off = wide_int_to_tree (sizetype, mem_ref_offset (base));
3451           else
3452             off = size_binop (PLUS_EXPR, off,
3453                               fold_convert (sizetype, TREE_OPERAND (base, 1)));
3454         }
3455       base = TREE_OPERAND (base, 0);
3456     }
3457   else
3458     base = build_fold_addr_expr (base);
3459
3460   if (off == NULL_TREE)
3461     off = size_zero_node;
3462
3463   /* If base is not loop invariant, either off is 0, then we start with just
3464      the constant offset in the loop invariant BASE and continue with base
3465      as OFF, otherwise give up.
3466      We could handle that case by gimplifying the addition of base + off
3467      into some SSA_NAME and use that as off, but for now punt.  */
3468   if (!expr_invariant_in_loop_p (loop, base))
3469     {
3470       if (!integer_zerop (off))
3471         return false;
3472       off = base;
3473       base = size_int (pbytepos);
3474     }
3475   /* Otherwise put base + constant offset into the loop invariant BASE
3476      and continue with OFF.  */
3477   else
3478     {
3479       base = fold_convert (sizetype, base);
3480       base = size_binop (PLUS_EXPR, base, size_int (pbytepos));
3481     }
3482
3483   /* OFF at this point may be either a SSA_NAME or some tree expression
3484      from get_inner_reference.  Try to peel off loop invariants from it
3485      into BASE as long as possible.  */
3486   STRIP_NOPS (off);
3487   while (offtype == NULL_TREE)
3488     {
3489       enum tree_code code;
3490       tree op0, op1, add = NULL_TREE;
3491
3492       if (TREE_CODE (off) == SSA_NAME)
3493         {
3494           gimple *def_stmt = SSA_NAME_DEF_STMT (off);
3495
3496           if (expr_invariant_in_loop_p (loop, off))
3497             return false;
3498
3499           if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
3500             break;
3501
3502           op0 = gimple_assign_rhs1 (def_stmt);
3503           code = gimple_assign_rhs_code (def_stmt);
3504           op1 = gimple_assign_rhs2 (def_stmt);
3505         }
3506       else
3507         {
3508           if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
3509             return false;
3510           code = TREE_CODE (off);
3511           extract_ops_from_tree (off, &code, &op0, &op1);
3512         }
3513       switch (code)
3514         {
3515         case POINTER_PLUS_EXPR:
3516         case PLUS_EXPR:
3517           if (expr_invariant_in_loop_p (loop, op0))
3518             {
3519               add = op0;
3520               off = op1;
3521             do_add:
3522               add = fold_convert (sizetype, add);
3523               if (scale != 1)
3524                 add = size_binop (MULT_EXPR, add, size_int (scale));
3525               base = size_binop (PLUS_EXPR, base, add);
3526               continue;
3527             }
3528           if (expr_invariant_in_loop_p (loop, op1))
3529             {
3530               add = op1;
3531               off = op0;
3532               goto do_add;
3533             }
3534           break;
3535         case MINUS_EXPR:
3536           if (expr_invariant_in_loop_p (loop, op1))
3537             {
3538               add = fold_convert (sizetype, op1);
3539               add = size_binop (MINUS_EXPR, size_zero_node, add);
3540               off = op0;
3541               goto do_add;
3542             }
3543           break;
3544         case MULT_EXPR:
3545           if (scale == 1 && tree_fits_shwi_p (op1))
3546             {
3547               int new_scale = tree_to_shwi (op1);
3548               /* Only treat this as a scaling operation if the target
3549                  supports it.  */
3550               if (use_ifn_p
3551                   && !vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p,
3552                                                 vectype, memory_type, 1,
3553                                                 TYPE_SIGN (TREE_TYPE (op0)),
3554                                                 new_scale, &ifn,
3555                                                 &element_type))
3556                 break;
3557               scale = new_scale;
3558               off = op0;
3559               continue;
3560             }
3561           break;
3562         case SSA_NAME:
3563           off = op0;
3564           continue;
3565         CASE_CONVERT:
3566           if (!POINTER_TYPE_P (TREE_TYPE (op0))
3567               && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3568             break;
3569           if (TYPE_PRECISION (TREE_TYPE (op0))
3570               == TYPE_PRECISION (TREE_TYPE (off)))
3571             {
3572               off = op0;
3573               continue;
3574             }
3575
3576           /* The internal functions need the offset to be the same width
3577              as the elements of VECTYPE.  Don't include operations that
3578              cast the offset from that width to a different width.  */
3579           if (use_ifn_p
3580               && (int_size_in_bytes (TREE_TYPE (vectype))
3581                   == int_size_in_bytes (TREE_TYPE (off))))
3582             break;
3583
3584           if (TYPE_PRECISION (TREE_TYPE (op0))
3585               < TYPE_PRECISION (TREE_TYPE (off)))
3586             {
3587               off = op0;
3588               offtype = TREE_TYPE (off);
3589               STRIP_NOPS (off);
3590               continue;
3591             }
3592           break;
3593         default:
3594           break;
3595         }
3596       break;
3597     }
3598
3599   /* If at the end OFF still isn't a SSA_NAME or isn't
3600      defined in the loop, punt.  */
3601   if (TREE_CODE (off) != SSA_NAME
3602       || expr_invariant_in_loop_p (loop, off))
3603     return false;
3604
3605   if (offtype == NULL_TREE)
3606     offtype = TREE_TYPE (off);
3607
3608   if (use_ifn_p)
3609     {
3610       if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
3611                                      memory_type, TYPE_PRECISION (offtype),
3612                                      TYPE_SIGN (offtype), scale, &ifn,
3613                                      &element_type))
3614         return false;
3615     }
3616   else
3617     {
3618       if (DR_IS_READ (dr))
3619         decl = targetm.vectorize.builtin_gather (vectype, offtype, scale);
3620       else
3621         decl = targetm.vectorize.builtin_scatter (vectype, offtype, scale);
3622
3623       if (!decl)
3624         return false;
3625
3626       ifn = IFN_LAST;
3627       element_type = TREE_TYPE (vectype);
3628     }
3629
3630   info->ifn = ifn;
3631   info->decl = decl;
3632   info->base = base;
3633   info->offset = off;
3634   info->offset_dt = vect_unknown_def_type;
3635   info->offset_vectype = NULL_TREE;
3636   info->scale = scale;
3637   info->element_type = element_type;
3638   info->memory_type = memory_type;
3639   return true;
3640 }
3641
3642 /* Function vect_analyze_data_refs.
3643
3644   Find all the data references in the loop or basic block.
3645
3646    The general structure of the analysis of data refs in the vectorizer is as
3647    follows:
3648    1- vect_analyze_data_refs(loop/bb): call
3649       compute_data_dependences_for_loop/bb to find and analyze all data-refs
3650       in the loop/bb and their dependences.
3651    2- vect_analyze_dependences(): apply dependence testing using ddrs.
3652    3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
3653    4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
3654
3655 */
3656
3657 bool
3658 vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf)
3659 {
3660   struct loop *loop = NULL;
3661   unsigned int i;
3662   struct data_reference *dr;
3663   tree scalar_type;
3664
3665   if (dump_enabled_p ())
3666     dump_printf_loc (MSG_NOTE, vect_location,
3667                      "=== vect_analyze_data_refs ===\n");
3668
3669   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
3670     loop = LOOP_VINFO_LOOP (loop_vinfo);
3671
3672   /* Go through the data-refs, check that the analysis succeeded.  Update
3673      pointer from stmt_vec_info struct to DR and vectype.  */
3674
3675   vec<data_reference_p> datarefs = vinfo->datarefs;
3676   FOR_EACH_VEC_ELT (datarefs, i, dr)
3677     {
3678       gimple *stmt;
3679       stmt_vec_info stmt_info;
3680       tree base, offset, init;
3681       enum { SG_NONE, GATHER, SCATTER } gatherscatter = SG_NONE;
3682       bool simd_lane_access = false;
3683       poly_uint64 vf;
3684
3685 again:
3686       if (!dr || !DR_REF (dr))
3687         {
3688           if (dump_enabled_p ())
3689             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3690                              "not vectorized: unhandled data-ref\n");
3691           return false;
3692         }
3693
3694       stmt = DR_STMT (dr);
3695       stmt_info = vinfo_for_stmt (stmt);
3696
3697       /* Discard clobbers from the dataref vector.  We will remove
3698          clobber stmts during vectorization.  */
3699       if (gimple_clobber_p (stmt))
3700         {
3701           free_data_ref (dr);
3702           if (i == datarefs.length () - 1)
3703             {
3704               datarefs.pop ();
3705               break;
3706             }
3707           datarefs.ordered_remove (i);
3708           dr = datarefs[i];
3709           goto again;
3710         }
3711
3712       /* Check that analysis of the data-ref succeeded.  */
3713       if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
3714           || !DR_STEP (dr))
3715         {
3716           bool maybe_gather
3717             = DR_IS_READ (dr)
3718               && !TREE_THIS_VOLATILE (DR_REF (dr))
3719               && (targetm.vectorize.builtin_gather != NULL
3720                   || supports_vec_gather_load_p ());
3721           bool maybe_scatter
3722             = DR_IS_WRITE (dr)
3723               && !TREE_THIS_VOLATILE (DR_REF (dr))
3724               && targetm.vectorize.builtin_scatter != NULL;
3725           bool maybe_simd_lane_access
3726             = is_a <loop_vec_info> (vinfo) && loop->simduid;
3727
3728           /* If target supports vector gather loads or scatter stores, or if
3729              this might be a SIMD lane access, see if they can't be used.  */
3730           if (is_a <loop_vec_info> (vinfo)
3731               && (maybe_gather || maybe_scatter || maybe_simd_lane_access)
3732               && !nested_in_vect_loop_p (loop, stmt))
3733             {
3734               struct data_reference *newdr
3735                 = create_data_ref (NULL, loop_containing_stmt (stmt),
3736                                    DR_REF (dr), stmt, !maybe_scatter,
3737                                    DR_IS_CONDITIONAL_IN_STMT (dr));
3738               gcc_assert (newdr != NULL && DR_REF (newdr));
3739               if (DR_BASE_ADDRESS (newdr)
3740                   && DR_OFFSET (newdr)
3741                   && DR_INIT (newdr)
3742                   && DR_STEP (newdr)
3743                   && integer_zerop (DR_STEP (newdr)))
3744                 {
3745                   if (maybe_simd_lane_access)
3746                     {
3747                       tree off = DR_OFFSET (newdr);
3748                       STRIP_NOPS (off);
3749                       if (TREE_CODE (DR_INIT (newdr)) == INTEGER_CST
3750                           && TREE_CODE (off) == MULT_EXPR
3751                           && tree_fits_uhwi_p (TREE_OPERAND (off, 1)))
3752                         {
3753                           tree step = TREE_OPERAND (off, 1);
3754                           off = TREE_OPERAND (off, 0);
3755                           STRIP_NOPS (off);
3756                           if (CONVERT_EXPR_P (off)
3757                               && TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (off,
3758                                                                           0)))
3759                                  < TYPE_PRECISION (TREE_TYPE (off)))
3760                             off = TREE_OPERAND (off, 0);
3761                           if (TREE_CODE (off) == SSA_NAME)
3762                             {
3763                               gimple *def = SSA_NAME_DEF_STMT (off);
3764                               tree reft = TREE_TYPE (DR_REF (newdr));
3765                               if (is_gimple_call (def)
3766                                   && gimple_call_internal_p (def)
3767                                   && (gimple_call_internal_fn (def)
3768                                       == IFN_GOMP_SIMD_LANE))
3769                                 {
3770                                   tree arg = gimple_call_arg (def, 0);
3771                                   gcc_assert (TREE_CODE (arg) == SSA_NAME);
3772                                   arg = SSA_NAME_VAR (arg);
3773                                   if (arg == loop->simduid
3774                                       /* For now.  */
3775                                       && tree_int_cst_equal
3776                                            (TYPE_SIZE_UNIT (reft),
3777                                             step))
3778                                     {
3779                                       DR_OFFSET (newdr) = ssize_int (0);
3780                                       DR_STEP (newdr) = step;
3781                                       DR_OFFSET_ALIGNMENT (newdr)
3782                                         = BIGGEST_ALIGNMENT;
3783                                       DR_STEP_ALIGNMENT (newdr)
3784                                         = highest_pow2_factor (step);
3785                                       dr = newdr;
3786                                       simd_lane_access = true;
3787                                     }
3788                                 }
3789                             }
3790                         }
3791                     }
3792                   if (!simd_lane_access && (maybe_gather || maybe_scatter))
3793                     {
3794                       dr = newdr;
3795                       if (maybe_gather)
3796                         gatherscatter = GATHER;
3797                       else
3798                         gatherscatter = SCATTER;
3799                     }
3800                 }
3801               if (gatherscatter == SG_NONE && !simd_lane_access)
3802                 free_data_ref (newdr);
3803             }
3804
3805           if (gatherscatter == SG_NONE && !simd_lane_access)
3806             {
3807               if (dump_enabled_p ())
3808                 {
3809                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3810                                    "not vectorized: data ref analysis "
3811                                    "failed ");
3812                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3813                 }
3814
3815               if (is_a <bb_vec_info> (vinfo))
3816                 break;
3817
3818               return false;
3819             }
3820         }
3821
3822       if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3823         {
3824           if (dump_enabled_p ())
3825             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3826                              "not vectorized: base addr of dr is a "
3827                              "constant\n");
3828
3829           if (is_a <bb_vec_info> (vinfo))
3830             break;
3831
3832           if (gatherscatter != SG_NONE || simd_lane_access)
3833             free_data_ref (dr);
3834           return false;
3835         }
3836
3837       if (TREE_THIS_VOLATILE (DR_REF (dr)))
3838         {
3839           if (dump_enabled_p ())
3840             {
3841               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3842                                "not vectorized: volatile type ");
3843               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3844             }
3845
3846           if (is_a <bb_vec_info> (vinfo))
3847             break;
3848
3849           return false;
3850         }
3851
3852       if (stmt_can_throw_internal (stmt))
3853         {
3854           if (dump_enabled_p ())
3855             {
3856               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3857                                "not vectorized: statement can throw an "
3858                                "exception ");
3859               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3860             }
3861
3862           if (is_a <bb_vec_info> (vinfo))
3863             break;
3864
3865           if (gatherscatter != SG_NONE || simd_lane_access)
3866             free_data_ref (dr);
3867           return false;
3868         }
3869
3870       if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
3871           && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
3872         {
3873           if (dump_enabled_p ())
3874             {
3875               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3876                                "not vectorized: statement is bitfield "
3877                                "access ");
3878               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3879             }
3880
3881           if (is_a <bb_vec_info> (vinfo))
3882             break;
3883
3884           if (gatherscatter != SG_NONE || simd_lane_access)
3885             free_data_ref (dr);
3886           return false;
3887         }
3888
3889       base = unshare_expr (DR_BASE_ADDRESS (dr));
3890       offset = unshare_expr (DR_OFFSET (dr));
3891       init = unshare_expr (DR_INIT (dr));
3892
3893       if (is_gimple_call (stmt)
3894           && (!gimple_call_internal_p (stmt)
3895               || (gimple_call_internal_fn (stmt) != IFN_MASK_LOAD
3896                   && gimple_call_internal_fn (stmt) != IFN_MASK_STORE)))
3897         {
3898           if (dump_enabled_p ())
3899             {
3900               dump_printf_loc (MSG_MISSED_OPTIMIZATION,  vect_location,
3901                                "not vectorized: dr in a call ");
3902               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3903             }
3904
3905           if (is_a <bb_vec_info> (vinfo))
3906             break;
3907
3908           if (gatherscatter != SG_NONE || simd_lane_access)
3909             free_data_ref (dr);
3910           return false;
3911         }
3912
3913       /* Update DR field in stmt_vec_info struct.  */
3914
3915       /* If the dataref is in an inner-loop of the loop that is considered for
3916          for vectorization, we also want to analyze the access relative to
3917          the outer-loop (DR contains information only relative to the
3918          inner-most enclosing loop).  We do that by building a reference to the
3919          first location accessed by the inner-loop, and analyze it relative to
3920          the outer-loop.  */
3921       if (loop && nested_in_vect_loop_p (loop, stmt))
3922         {
3923           /* Build a reference to the first location accessed by the
3924              inner loop: *(BASE + INIT + OFFSET).  By construction,
3925              this address must be invariant in the inner loop, so we
3926              can consider it as being used in the outer loop.  */
3927           tree init_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset),
3928                                           init, offset);
3929           tree init_addr = fold_build_pointer_plus (base, init_offset);
3930           tree init_ref = build_fold_indirect_ref (init_addr);
3931
3932           if (dump_enabled_p ())
3933             {
3934               dump_printf_loc (MSG_NOTE, vect_location,
3935                                "analyze in outer loop: ");
3936               dump_generic_expr (MSG_NOTE, TDF_SLIM, init_ref);
3937               dump_printf (MSG_NOTE, "\n");
3938             }
3939
3940           if (!dr_analyze_innermost (&STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info),
3941                                      init_ref, loop))
3942             /* dr_analyze_innermost already explained the failure.  */
3943             return false;
3944
3945           if (dump_enabled_p ())
3946             {
3947               dump_printf_loc (MSG_NOTE, vect_location,
3948                                "\touter base_address: ");
3949               dump_generic_expr (MSG_NOTE, TDF_SLIM,
3950                                  STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
3951               dump_printf (MSG_NOTE, "\n\touter offset from base address: ");
3952               dump_generic_expr (MSG_NOTE, TDF_SLIM,
3953                                  STMT_VINFO_DR_OFFSET (stmt_info));
3954               dump_printf (MSG_NOTE,
3955                            "\n\touter constant offset from base address: ");
3956               dump_generic_expr (MSG_NOTE, TDF_SLIM,
3957                                  STMT_VINFO_DR_INIT (stmt_info));
3958               dump_printf (MSG_NOTE, "\n\touter step: ");
3959               dump_generic_expr (MSG_NOTE, TDF_SLIM,
3960                                  STMT_VINFO_DR_STEP (stmt_info));
3961               dump_printf (MSG_NOTE, "\n\touter base alignment: %d\n",
3962                            STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info));
3963               dump_printf (MSG_NOTE, "\n\touter base misalignment: %d\n",
3964                            STMT_VINFO_DR_BASE_MISALIGNMENT (stmt_info));
3965               dump_printf (MSG_NOTE, "\n\touter offset alignment: %d\n",
3966                            STMT_VINFO_DR_OFFSET_ALIGNMENT (stmt_info));
3967               dump_printf (MSG_NOTE, "\n\touter step alignment: %d\n",
3968                            STMT_VINFO_DR_STEP_ALIGNMENT (stmt_info));
3969             }
3970         }
3971
3972       if (STMT_VINFO_DATA_REF (stmt_info))
3973         {
3974           if (dump_enabled_p ())
3975             {
3976               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3977                                "not vectorized: more than one data ref "
3978                                "in stmt: ");
3979               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3980             }
3981
3982           if (is_a <bb_vec_info> (vinfo))
3983             break;
3984
3985           if (gatherscatter != SG_NONE || simd_lane_access)
3986             free_data_ref (dr);
3987           return false;
3988         }
3989
3990       STMT_VINFO_DATA_REF (stmt_info) = dr;
3991       if (simd_lane_access)
3992         {
3993           STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) = true;
3994           free_data_ref (datarefs[i]);
3995           datarefs[i] = dr;
3996         }
3997
3998       if (TREE_CODE (DR_BASE_ADDRESS (dr)) == ADDR_EXPR
3999           && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr), 0))
4000           && DECL_NONALIASED (TREE_OPERAND (DR_BASE_ADDRESS (dr), 0)))
4001         {
4002           if (dump_enabled_p ())
4003             {
4004               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4005                                "not vectorized: base object not addressable "
4006                                "for stmt: ");
4007               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
4008             }
4009           if (is_a <bb_vec_info> (vinfo))
4010             {
4011               /* In BB vectorization the ref can still participate
4012                  in dependence analysis, we just can't vectorize it.  */
4013               STMT_VINFO_VECTORIZABLE (stmt_info) = false;
4014               continue;
4015             }
4016           return false;
4017         }
4018
4019       /* Set vectype for STMT.  */
4020       scalar_type = TREE_TYPE (DR_REF (dr));
4021       STMT_VINFO_VECTYPE (stmt_info)
4022         = get_vectype_for_scalar_type (scalar_type);
4023       if (!STMT_VINFO_VECTYPE (stmt_info))
4024         {
4025           if (dump_enabled_p ())
4026             {
4027               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4028                                "not vectorized: no vectype for stmt: ");
4029               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
4030               dump_printf (MSG_MISSED_OPTIMIZATION, " scalar_type: ");
4031               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_DETAILS,
4032                                  scalar_type);
4033               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4034             }
4035
4036           if (is_a <bb_vec_info> (vinfo))
4037             {
4038               /* No vector type is fine, the ref can still participate
4039                  in dependence analysis, we just can't vectorize it.  */
4040               STMT_VINFO_VECTORIZABLE (stmt_info) = false;
4041               continue;
4042             }
4043
4044           if (gatherscatter != SG_NONE || simd_lane_access)
4045             {
4046               STMT_VINFO_DATA_REF (stmt_info) = NULL;
4047               if (gatherscatter != SG_NONE)
4048                 free_data_ref (dr);
4049             }
4050           return false;
4051         }
4052       else
4053         {
4054           if (dump_enabled_p ())
4055             {
4056               dump_printf_loc (MSG_NOTE, vect_location,
4057                                "got vectype for stmt: ");
4058               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
4059               dump_generic_expr (MSG_NOTE, TDF_SLIM,
4060                                  STMT_VINFO_VECTYPE (stmt_info));
4061               dump_printf (MSG_NOTE, "\n");
4062             }
4063         }
4064
4065       /* Adjust the minimal vectorization factor according to the
4066          vector type.  */
4067       vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
4068       *min_vf = upper_bound (*min_vf, vf);
4069
4070       if (gatherscatter != SG_NONE)
4071         {
4072           gather_scatter_info gs_info;
4073           if (!vect_check_gather_scatter (stmt, as_a <loop_vec_info> (vinfo),
4074                                           &gs_info)
4075               || !get_vectype_for_scalar_type (TREE_TYPE (gs_info.offset)))
4076             {
4077               STMT_VINFO_DATA_REF (stmt_info) = NULL;
4078               free_data_ref (dr);
4079               if (dump_enabled_p ())
4080                 {
4081                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4082                                    (gatherscatter == GATHER) ?
4083                                    "not vectorized: not suitable for gather "
4084                                    "load " :
4085                                    "not vectorized: not suitable for scatter "
4086                                    "store ");
4087                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
4088                 }
4089               return false;
4090             }
4091
4092           free_data_ref (datarefs[i]);
4093           datarefs[i] = dr;
4094           STMT_VINFO_GATHER_SCATTER_P (stmt_info) = gatherscatter;
4095         }
4096
4097       else if (is_a <loop_vec_info> (vinfo)
4098                && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
4099         {
4100           if (nested_in_vect_loop_p (loop, stmt))
4101             {
4102               if (dump_enabled_p ())
4103                 {
4104                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4105                                    "not vectorized: not suitable for strided "
4106                                    "load ");
4107                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
4108                 }
4109               return false;
4110             }
4111           STMT_VINFO_STRIDED_P (stmt_info) = true;
4112         }
4113     }
4114
4115   /* If we stopped analysis at the first dataref we could not analyze
4116      when trying to vectorize a basic-block mark the rest of the datarefs
4117      as not vectorizable and truncate the vector of datarefs.  That
4118      avoids spending useless time in analyzing their dependence.  */
4119   if (i != datarefs.length ())
4120     {
4121       gcc_assert (is_a <bb_vec_info> (vinfo));
4122       for (unsigned j = i; j < datarefs.length (); ++j)
4123         {
4124           data_reference_p dr = datarefs[j];
4125           STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
4126           free_data_ref (dr);
4127         }
4128       datarefs.truncate (i);
4129     }
4130
4131   return true;
4132 }
4133
4134
4135 /* Function vect_get_new_vect_var.
4136
4137    Returns a name for a new variable.  The current naming scheme appends the
4138    prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
4139    the name of vectorizer generated variables, and appends that to NAME if
4140    provided.  */
4141
4142 tree
4143 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
4144 {
4145   const char *prefix;
4146   tree new_vect_var;
4147
4148   switch (var_kind)
4149   {
4150   case vect_simple_var:
4151     prefix = "vect";
4152     break;
4153   case vect_scalar_var:
4154     prefix = "stmp";
4155     break;
4156   case vect_mask_var:
4157     prefix = "mask";
4158     break;
4159   case vect_pointer_var:
4160     prefix = "vectp";
4161     break;
4162   default:
4163     gcc_unreachable ();
4164   }
4165
4166   if (name)
4167     {
4168       char* tmp = concat (prefix, "_", name, NULL);
4169       new_vect_var = create_tmp_reg (type, tmp);
4170       free (tmp);
4171     }
4172   else
4173     new_vect_var = create_tmp_reg (type, prefix);
4174
4175   return new_vect_var;
4176 }
4177
4178 /* Like vect_get_new_vect_var but return an SSA name.  */
4179
4180 tree
4181 vect_get_new_ssa_name (tree type, enum vect_var_kind var_kind, const char *name)
4182 {
4183   const char *prefix;
4184   tree new_vect_var;
4185
4186   switch (var_kind)
4187   {
4188   case vect_simple_var:
4189     prefix = "vect";
4190     break;
4191   case vect_scalar_var:
4192     prefix = "stmp";
4193     break;
4194   case vect_pointer_var:
4195     prefix = "vectp";
4196     break;
4197   default:
4198     gcc_unreachable ();
4199   }
4200
4201   if (name)
4202     {
4203       char* tmp = concat (prefix, "_", name, NULL);
4204       new_vect_var = make_temp_ssa_name (type, NULL, tmp);
4205       free (tmp);
4206     }
4207   else
4208     new_vect_var = make_temp_ssa_name (type, NULL, prefix);
4209
4210   return new_vect_var;
4211 }
4212
4213 /* Duplicate ptr info and set alignment/misaligment on NAME from DR.  */
4214
4215 static void
4216 vect_duplicate_ssa_name_ptr_info (tree name, data_reference *dr)
4217 {
4218   duplicate_ssa_name_ptr_info (name, DR_PTR_INFO (dr));
4219   int misalign = DR_MISALIGNMENT (dr);
4220   if (misalign == DR_MISALIGNMENT_UNKNOWN)
4221     mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (name));
4222   else
4223     set_ptr_info_alignment (SSA_NAME_PTR_INFO (name),
4224                             DR_TARGET_ALIGNMENT (dr), misalign);
4225 }
4226
4227 /* Function vect_create_addr_base_for_vector_ref.
4228
4229    Create an expression that computes the address of the first memory location
4230    that will be accessed for a data reference.
4231
4232    Input:
4233    STMT: The statement containing the data reference.
4234    NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
4235    OFFSET: Optional. If supplied, it is be added to the initial address.
4236    LOOP:    Specify relative to which loop-nest should the address be computed.
4237             For example, when the dataref is in an inner-loop nested in an
4238             outer-loop that is now being vectorized, LOOP can be either the
4239             outer-loop, or the inner-loop.  The first memory location accessed
4240             by the following dataref ('in' points to short):
4241
4242                 for (i=0; i<N; i++)
4243                    for (j=0; j<M; j++)
4244                      s += in[i+j]
4245
4246             is as follows:
4247             if LOOP=i_loop:     &in             (relative to i_loop)
4248             if LOOP=j_loop:     &in+i*2B        (relative to j_loop)
4249    BYTE_OFFSET: Optional, defaulted to NULL.  If supplied, it is added to the
4250             initial address.  Unlike OFFSET, which is number of elements to
4251             be added, BYTE_OFFSET is measured in bytes.
4252
4253    Output:
4254    1. Return an SSA_NAME whose value is the address of the memory location of
4255       the first vector of the data reference.
4256    2. If new_stmt_list is not NULL_TREE after return then the caller must insert
4257       these statement(s) which define the returned SSA_NAME.
4258
4259    FORNOW: We are only handling array accesses with step 1.  */
4260
4261 tree
4262 vect_create_addr_base_for_vector_ref (gimple *stmt,
4263                                       gimple_seq *new_stmt_list,
4264                                       tree offset,
4265                                       tree byte_offset)
4266 {
4267   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4268   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4269   const char *base_name;
4270   tree addr_base;
4271   tree dest;
4272   gimple_seq seq = NULL;
4273   tree vect_ptr_type;
4274   tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
4275   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4276   innermost_loop_behavior *drb = vect_dr_behavior (dr);
4277
4278   tree data_ref_base = unshare_expr (drb->base_address);
4279   tree base_offset = unshare_expr (drb->offset);
4280   tree init = unshare_expr (drb->init);
4281
4282   if (loop_vinfo)
4283     base_name = get_name (data_ref_base);
4284   else
4285     {
4286       base_offset = ssize_int (0);
4287       init = ssize_int (0);
4288       base_name = get_name (DR_REF (dr));
4289     }
4290
4291   /* Create base_offset */
4292   base_offset = size_binop (PLUS_EXPR,
4293                             fold_convert (sizetype, base_offset),
4294                             fold_convert (sizetype, init));
4295
4296   if (offset)
4297     {
4298       offset = fold_build2 (MULT_EXPR, sizetype,
4299                             fold_convert (sizetype, offset), step);
4300       base_offset = fold_build2 (PLUS_EXPR, sizetype,
4301                                  base_offset, offset);
4302     }
4303   if (byte_offset)
4304     {
4305       byte_offset = fold_convert (sizetype, byte_offset);
4306       base_offset = fold_build2 (PLUS_EXPR, sizetype,
4307                                  base_offset, byte_offset);
4308     }
4309
4310   /* base + base_offset */
4311   if (loop_vinfo)
4312     addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
4313   else
4314     {
4315       addr_base = build1 (ADDR_EXPR,
4316                           build_pointer_type (TREE_TYPE (DR_REF (dr))),
4317                           unshare_expr (DR_REF (dr)));
4318     }
4319
4320   vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
4321   dest = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, base_name);
4322   addr_base = force_gimple_operand (addr_base, &seq, true, dest);
4323   gimple_seq_add_seq (new_stmt_list, seq);
4324
4325   if (DR_PTR_INFO (dr)
4326       && TREE_CODE (addr_base) == SSA_NAME
4327       && !SSA_NAME_PTR_INFO (addr_base))
4328     {
4329       vect_duplicate_ssa_name_ptr_info (addr_base, dr);
4330       if (offset || byte_offset)
4331         mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (addr_base));
4332     }
4333
4334   if (dump_enabled_p ())
4335     {
4336       dump_printf_loc (MSG_NOTE, vect_location, "created ");
4337       dump_generic_expr (MSG_NOTE, TDF_SLIM, addr_base);
4338       dump_printf (MSG_NOTE, "\n");
4339     }
4340
4341   return addr_base;
4342 }
4343
4344
4345 /* Function vect_create_data_ref_ptr.
4346
4347    Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
4348    location accessed in the loop by STMT, along with the def-use update
4349    chain to appropriately advance the pointer through the loop iterations.
4350    Also set aliasing information for the pointer.  This pointer is used by
4351    the callers to this function to create a memory reference expression for
4352    vector load/store access.
4353
4354    Input:
4355    1. STMT: a stmt that references memory. Expected to be of the form
4356          GIMPLE_ASSIGN <name, data-ref> or
4357          GIMPLE_ASSIGN <data-ref, name>.
4358    2. AGGR_TYPE: the type of the reference, which should be either a vector
4359         or an array.
4360    3. AT_LOOP: the loop where the vector memref is to be created.
4361    4. OFFSET (optional): an offset to be added to the initial address accessed
4362         by the data-ref in STMT.
4363    5. BSI: location where the new stmts are to be placed if there is no loop
4364    6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
4365         pointing to the initial address.
4366    7. BYTE_OFFSET (optional, defaults to NULL): a byte offset to be added
4367         to the initial address accessed by the data-ref in STMT.  This is
4368         similar to OFFSET, but OFFSET is counted in elements, while BYTE_OFFSET
4369         in bytes.
4370
4371    Output:
4372    1. Declare a new ptr to vector_type, and have it point to the base of the
4373       data reference (initial addressed accessed by the data reference).
4374       For example, for vector of type V8HI, the following code is generated:
4375
4376       v8hi *ap;
4377       ap = (v8hi *)initial_address;
4378
4379       if OFFSET is not supplied:
4380          initial_address = &a[init];
4381       if OFFSET is supplied:
4382          initial_address = &a[init + OFFSET];
4383       if BYTE_OFFSET is supplied:
4384          initial_address = &a[init] + BYTE_OFFSET;
4385
4386       Return the initial_address in INITIAL_ADDRESS.
4387
4388    2. If ONLY_INIT is true, just return the initial pointer.  Otherwise, also
4389       update the pointer in each iteration of the loop.
4390
4391       Return the increment stmt that updates the pointer in PTR_INCR.
4392
4393    3. Set INV_P to true if the access pattern of the data reference in the
4394       vectorized loop is invariant.  Set it to false otherwise.
4395
4396    4. Return the pointer.  */
4397
4398 tree
4399 vect_create_data_ref_ptr (gimple *stmt, tree aggr_type, struct loop *at_loop,
4400                           tree offset, tree *initial_address,
4401                           gimple_stmt_iterator *gsi, gimple **ptr_incr,
4402                           bool only_init, bool *inv_p, tree byte_offset)
4403 {
4404   const char *base_name;
4405   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4406   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4407   struct loop *loop = NULL;
4408   bool nested_in_vect_loop = false;
4409   struct loop *containing_loop = NULL;
4410   tree aggr_ptr_type;
4411   tree aggr_ptr;
4412   tree new_temp;
4413   gimple_seq new_stmt_list = NULL;
4414   edge pe = NULL;
4415   basic_block new_bb;
4416   tree aggr_ptr_init;
4417   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4418   tree aptr;
4419   gimple_stmt_iterator incr_gsi;
4420   bool insert_after;
4421   tree indx_before_incr, indx_after_incr;
4422   gimple *incr;
4423   tree step;
4424   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4425
4426   gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
4427               || TREE_CODE (aggr_type) == VECTOR_TYPE);
4428
4429   if (loop_vinfo)
4430     {
4431       loop = LOOP_VINFO_LOOP (loop_vinfo);
4432       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4433       containing_loop = (gimple_bb (stmt))->loop_father;
4434       pe = loop_preheader_edge (loop);
4435     }
4436   else
4437     {
4438       gcc_assert (bb_vinfo);
4439       only_init = true;
4440       *ptr_incr = NULL;
4441     }
4442
4443   /* Check the step (evolution) of the load in LOOP, and record
4444      whether it's invariant.  */
4445   step = vect_dr_behavior (dr)->step;
4446   if (integer_zerop (step))
4447     *inv_p = true;
4448   else
4449     *inv_p = false;
4450
4451   /* Create an expression for the first address accessed by this load
4452      in LOOP.  */
4453   base_name = get_name (DR_BASE_ADDRESS (dr));
4454
4455   if (dump_enabled_p ())
4456     {
4457       tree dr_base_type = TREE_TYPE (DR_BASE_OBJECT (dr));
4458       dump_printf_loc (MSG_NOTE, vect_location,
4459                        "create %s-pointer variable to type: ",
4460                        get_tree_code_name (TREE_CODE (aggr_type)));
4461       dump_generic_expr (MSG_NOTE, TDF_SLIM, aggr_type);
4462       if (TREE_CODE (dr_base_type) == ARRAY_TYPE)
4463         dump_printf (MSG_NOTE, "  vectorizing an array ref: ");
4464       else if (TREE_CODE (dr_base_type) == VECTOR_TYPE)
4465         dump_printf (MSG_NOTE, "  vectorizing a vector ref: ");
4466       else if (TREE_CODE (dr_base_type) == RECORD_TYPE)
4467         dump_printf (MSG_NOTE, "  vectorizing a record based array ref: ");
4468       else
4469         dump_printf (MSG_NOTE, "  vectorizing a pointer ref: ");
4470       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_BASE_OBJECT (dr));
4471       dump_printf (MSG_NOTE, "\n");
4472     }
4473
4474   /* (1) Create the new aggregate-pointer variable.
4475      Vector and array types inherit the alias set of their component
4476      type by default so we need to use a ref-all pointer if the data
4477      reference does not conflict with the created aggregated data
4478      reference because it is not addressable.  */
4479   bool need_ref_all = false;
4480   if (!alias_sets_conflict_p (get_alias_set (aggr_type),
4481                               get_alias_set (DR_REF (dr))))
4482     need_ref_all = true;
4483   /* Likewise for any of the data references in the stmt group.  */
4484   else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
4485     {
4486       gimple *orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
4487       do
4488         {
4489           stmt_vec_info sinfo = vinfo_for_stmt (orig_stmt);
4490           struct data_reference *sdr = STMT_VINFO_DATA_REF (sinfo);
4491           if (!alias_sets_conflict_p (get_alias_set (aggr_type),
4492                                       get_alias_set (DR_REF (sdr))))
4493             {
4494               need_ref_all = true;
4495               break;
4496             }
4497           orig_stmt = STMT_VINFO_GROUP_NEXT_ELEMENT (sinfo);
4498         }
4499       while (orig_stmt);
4500     }
4501   aggr_ptr_type = build_pointer_type_for_mode (aggr_type, ptr_mode,
4502                                                need_ref_all);
4503   aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, base_name);
4504
4505
4506   /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
4507      vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
4508      def-use update cycles for the pointer: one relative to the outer-loop
4509      (LOOP), which is what steps (3) and (4) below do.  The other is relative
4510      to the inner-loop (which is the inner-most loop containing the dataref),
4511      and this is done be step (5) below.
4512
4513      When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
4514      inner-most loop, and so steps (3),(4) work the same, and step (5) is
4515      redundant.  Steps (3),(4) create the following:
4516
4517         vp0 = &base_addr;
4518         LOOP:   vp1 = phi(vp0,vp2)
4519                 ...
4520                 ...
4521                 vp2 = vp1 + step
4522                 goto LOOP
4523
4524      If there is an inner-loop nested in loop, then step (5) will also be
4525      applied, and an additional update in the inner-loop will be created:
4526
4527         vp0 = &base_addr;
4528         LOOP:   vp1 = phi(vp0,vp2)
4529                 ...
4530         inner:     vp3 = phi(vp1,vp4)
4531                    vp4 = vp3 + inner_step
4532                    if () goto inner
4533                 ...
4534                 vp2 = vp1 + step
4535                 if () goto LOOP   */
4536
4537   /* (2) Calculate the initial address of the aggregate-pointer, and set
4538      the aggregate-pointer to point to it before the loop.  */
4539
4540   /* Create: (&(base[init_val+offset]+byte_offset) in the loop preheader.  */
4541
4542   new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
4543                                                    offset, byte_offset);
4544   if (new_stmt_list)
4545     {
4546       if (pe)
4547         {
4548           new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
4549           gcc_assert (!new_bb);
4550         }
4551       else
4552         gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
4553     }
4554
4555   *initial_address = new_temp;
4556   aggr_ptr_init = new_temp;
4557
4558   /* (3) Handle the updating of the aggregate-pointer inside the loop.
4559      This is needed when ONLY_INIT is false, and also when AT_LOOP is the
4560      inner-loop nested in LOOP (during outer-loop vectorization).  */
4561
4562   /* No update in loop is required.  */
4563   if (only_init && (!loop_vinfo || at_loop == loop))
4564     aptr = aggr_ptr_init;
4565   else
4566     {
4567       /* The step of the aggregate pointer is the type size.  */
4568       tree iv_step = TYPE_SIZE_UNIT (aggr_type);
4569       /* One exception to the above is when the scalar step of the load in
4570          LOOP is zero. In this case the step here is also zero.  */
4571       if (*inv_p)
4572         iv_step = size_zero_node;
4573       else if (tree_int_cst_sgn (step) == -1)
4574         iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
4575
4576       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4577
4578       create_iv (aggr_ptr_init,
4579                  fold_convert (aggr_ptr_type, iv_step),
4580                  aggr_ptr, loop, &incr_gsi, insert_after,
4581                  &indx_before_incr, &indx_after_incr);
4582       incr = gsi_stmt (incr_gsi);
4583       set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
4584
4585       /* Copy the points-to information if it exists. */
4586       if (DR_PTR_INFO (dr))
4587         {
4588           vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr);
4589           vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr);
4590         }
4591       if (ptr_incr)
4592         *ptr_incr = incr;
4593
4594       aptr = indx_before_incr;
4595     }
4596
4597   if (!nested_in_vect_loop || only_init)
4598     return aptr;
4599
4600
4601   /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
4602      nested in LOOP, if exists.  */
4603
4604   gcc_assert (nested_in_vect_loop);
4605   if (!only_init)
4606     {
4607       standard_iv_increment_position (containing_loop, &incr_gsi,
4608                                       &insert_after);
4609       create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
4610                  containing_loop, &incr_gsi, insert_after, &indx_before_incr,
4611                  &indx_after_incr);
4612       incr = gsi_stmt (incr_gsi);
4613       set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
4614
4615       /* Copy the points-to information if it exists. */
4616       if (DR_PTR_INFO (dr))
4617         {
4618           vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr);
4619           vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr);
4620         }
4621       if (ptr_incr)
4622         *ptr_incr = incr;
4623
4624       return indx_before_incr;
4625     }
4626   else
4627     gcc_unreachable ();
4628 }
4629
4630
4631 /* Function bump_vector_ptr
4632
4633    Increment a pointer (to a vector type) by vector-size. If requested,
4634    i.e. if PTR-INCR is given, then also connect the new increment stmt
4635    to the existing def-use update-chain of the pointer, by modifying
4636    the PTR_INCR as illustrated below:
4637
4638    The pointer def-use update-chain before this function:
4639                         DATAREF_PTR = phi (p_0, p_2)
4640                         ....
4641         PTR_INCR:       p_2 = DATAREF_PTR + step
4642
4643    The pointer def-use update-chain after this function:
4644                         DATAREF_PTR = phi (p_0, p_2)
4645                         ....
4646                         NEW_DATAREF_PTR = DATAREF_PTR + BUMP
4647                         ....
4648         PTR_INCR:       p_2 = NEW_DATAREF_PTR + step
4649
4650    Input:
4651    DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
4652                  in the loop.
4653    PTR_INCR - optional. The stmt that updates the pointer in each iteration of
4654               the loop.  The increment amount across iterations is expected
4655               to be vector_size.
4656    BSI - location where the new update stmt is to be placed.
4657    STMT - the original scalar memory-access stmt that is being vectorized.
4658    BUMP - optional. The offset by which to bump the pointer. If not given,
4659           the offset is assumed to be vector_size.
4660
4661    Output: Return NEW_DATAREF_PTR as illustrated above.
4662
4663 */
4664
4665 tree
4666 bump_vector_ptr (tree dataref_ptr, gimple *ptr_incr, gimple_stmt_iterator *gsi,
4667                  gimple *stmt, tree bump)
4668 {
4669   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4670   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4671   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4672   tree update = TYPE_SIZE_UNIT (vectype);
4673   gassign *incr_stmt;
4674   ssa_op_iter iter;
4675   use_operand_p use_p;
4676   tree new_dataref_ptr;
4677
4678   if (bump)
4679     update = bump;
4680
4681   if (TREE_CODE (dataref_ptr) == SSA_NAME)
4682     new_dataref_ptr = copy_ssa_name (dataref_ptr);
4683   else
4684     new_dataref_ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
4685   incr_stmt = gimple_build_assign (new_dataref_ptr, POINTER_PLUS_EXPR,
4686                                    dataref_ptr, update);
4687   vect_finish_stmt_generation (stmt, incr_stmt, gsi);
4688
4689   /* Copy the points-to information if it exists. */
4690   if (DR_PTR_INFO (dr))
4691     {
4692       duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
4693       mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr));
4694     }
4695
4696   if (!ptr_incr)
4697     return new_dataref_ptr;
4698
4699   /* Update the vector-pointer's cross-iteration increment.  */
4700   FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
4701     {
4702       tree use = USE_FROM_PTR (use_p);
4703
4704       if (use == dataref_ptr)
4705         SET_USE (use_p, new_dataref_ptr);
4706       else
4707         gcc_assert (tree_int_cst_compare (use, update) == 0);
4708     }
4709
4710   return new_dataref_ptr;
4711 }
4712
4713
4714 /* Function vect_create_destination_var.
4715
4716    Create a new temporary of type VECTYPE.  */
4717
4718 tree
4719 vect_create_destination_var (tree scalar_dest, tree vectype)
4720 {
4721   tree vec_dest;
4722   const char *name;
4723   char *new_name;
4724   tree type;
4725   enum vect_var_kind kind;
4726
4727   kind = vectype
4728     ? VECTOR_BOOLEAN_TYPE_P (vectype)
4729     ? vect_mask_var
4730     : vect_simple_var
4731     : vect_scalar_var;
4732   type = vectype ? vectype : TREE_TYPE (scalar_dest);
4733
4734   gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
4735
4736   name = get_name (scalar_dest);
4737   if (name)
4738     new_name = xasprintf ("%s_%u", name, SSA_NAME_VERSION (scalar_dest));
4739   else
4740     new_name = xasprintf ("_%u", SSA_NAME_VERSION (scalar_dest));
4741   vec_dest = vect_get_new_vect_var (type, kind, new_name);
4742   free (new_name);
4743
4744   return vec_dest;
4745 }
4746
4747 /* Function vect_grouped_store_supported.
4748
4749    Returns TRUE if interleave high and interleave low permutations
4750    are supported, and FALSE otherwise.  */
4751
4752 bool
4753 vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
4754 {
4755   machine_mode mode = TYPE_MODE (vectype);
4756
4757   /* vect_permute_store_chain requires the group size to be equal to 3 or
4758      be a power of two.  */
4759   if (count != 3 && exact_log2 (count) == -1)
4760     {
4761       if (dump_enabled_p ())
4762         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4763                          "the size of the group of accesses"
4764                          " is not a power of 2 or not eqaul to 3\n");
4765       return false;
4766     }
4767
4768   /* Check that the permutation is supported.  */
4769   if (VECTOR_MODE_P (mode))
4770     {
4771       unsigned int i;
4772       if (count == 3)
4773         {
4774           unsigned int j0 = 0, j1 = 0, j2 = 0;
4775           unsigned int i, j;
4776
4777           unsigned int nelt;
4778           if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
4779             {
4780               if (dump_enabled_p ())
4781                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4782                                  "cannot handle groups of 3 stores for"
4783                                  " variable-length vectors\n");
4784               return false;
4785             }
4786
4787           vec_perm_builder sel (nelt, nelt, 1);
4788           sel.quick_grow (nelt);
4789           vec_perm_indices indices;
4790           for (j = 0; j < 3; j++)
4791             {
4792               int nelt0 = ((3 - j) * nelt) % 3;
4793               int nelt1 = ((3 - j) * nelt + 1) % 3;
4794               int nelt2 = ((3 - j) * nelt + 2) % 3;
4795               for (i = 0; i < nelt; i++)
4796                 {
4797                   if (3 * i + nelt0 < nelt)
4798                     sel[3 * i + nelt0] = j0++;
4799                   if (3 * i + nelt1 < nelt)
4800                     sel[3 * i + nelt1] = nelt + j1++;
4801                   if (3 * i + nelt2 < nelt)
4802                     sel[3 * i + nelt2] = 0;
4803                 }
4804               indices.new_vector (sel, 2, nelt);
4805               if (!can_vec_perm_const_p (mode, indices))
4806                 {
4807                   if (dump_enabled_p ())
4808                     dump_printf (MSG_MISSED_OPTIMIZATION,
4809                                  "permutation op not supported by target.\n");
4810                   return false;
4811                 }
4812
4813               for (i = 0; i < nelt; i++)
4814                 {
4815                   if (3 * i + nelt0 < nelt)
4816                     sel[3 * i + nelt0] = 3 * i + nelt0;
4817                   if (3 * i + nelt1 < nelt)
4818                     sel[3 * i + nelt1] = 3 * i + nelt1;
4819                   if (3 * i + nelt2 < nelt)
4820                     sel[3 * i + nelt2] = nelt + j2++;
4821                 }
4822               indices.new_vector (sel, 2, nelt);
4823               if (!can_vec_perm_const_p (mode, indices))
4824                 {
4825                   if (dump_enabled_p ())
4826                     dump_printf (MSG_MISSED_OPTIMIZATION,
4827                                  "permutation op not supported by target.\n");
4828                   return false;
4829                 }
4830             }
4831           return true;
4832         }
4833       else
4834         {
4835           /* If length is not equal to 3 then only power of 2 is supported.  */
4836           gcc_assert (pow2p_hwi (count));
4837           poly_uint64 nelt = GET_MODE_NUNITS (mode);
4838
4839           /* The encoding has 2 interleaved stepped patterns.  */
4840           vec_perm_builder sel (nelt, 2, 3);
4841           sel.quick_grow (6);
4842           for (i = 0; i < 3; i++)
4843             {
4844               sel[i * 2] = i;
4845               sel[i * 2 + 1] = i + nelt;
4846             }
4847           vec_perm_indices indices (sel, 2, nelt);
4848           if (can_vec_perm_const_p (mode, indices))
4849             {
4850               for (i = 0; i < 6; i++)
4851                 sel[i] += exact_div (nelt, 2);
4852               indices.new_vector (sel, 2, nelt);
4853               if (can_vec_perm_const_p (mode, indices))
4854                 return true;
4855             }
4856         }
4857     }
4858
4859   if (dump_enabled_p ())
4860     dump_printf (MSG_MISSED_OPTIMIZATION,
4861                  "permutaion op not supported by target.\n");
4862   return false;
4863 }
4864
4865
4866 /* Return TRUE if vec_{mask_}store_lanes is available for COUNT vectors of
4867    type VECTYPE.  MASKED_P says whether the masked form is needed.  */
4868
4869 bool
4870 vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
4871                             bool masked_p)
4872 {
4873   if (masked_p)
4874     return vect_lanes_optab_supported_p ("vec_mask_store_lanes",
4875                                          vec_mask_store_lanes_optab,
4876                                          vectype, count);
4877   else
4878     return vect_lanes_optab_supported_p ("vec_store_lanes",
4879                                          vec_store_lanes_optab,
4880                                          vectype, count);
4881 }
4882
4883
4884 /* Function vect_permute_store_chain.
4885
4886    Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
4887    a power of 2 or equal to 3, generate interleave_high/low stmts to reorder
4888    the data correctly for the stores.  Return the final references for stores
4889    in RESULT_CHAIN.
4890
4891    E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4892    The input is 4 vectors each containing 8 elements.  We assign a number to
4893    each element, the input sequence is:
4894
4895    1st vec:   0  1  2  3  4  5  6  7
4896    2nd vec:   8  9 10 11 12 13 14 15
4897    3rd vec:  16 17 18 19 20 21 22 23
4898    4th vec:  24 25 26 27 28 29 30 31
4899
4900    The output sequence should be:
4901
4902    1st vec:  0  8 16 24  1  9 17 25
4903    2nd vec:  2 10 18 26  3 11 19 27
4904    3rd vec:  4 12 20 28  5 13 21 30
4905    4th vec:  6 14 22 30  7 15 23 31
4906
4907    i.e., we interleave the contents of the four vectors in their order.
4908
4909    We use interleave_high/low instructions to create such output.  The input of
4910    each interleave_high/low operation is two vectors:
4911    1st vec    2nd vec
4912    0 1 2 3    4 5 6 7
4913    the even elements of the result vector are obtained left-to-right from the
4914    high/low elements of the first vector.  The odd elements of the result are
4915    obtained left-to-right from the high/low elements of the second vector.
4916    The output of interleave_high will be:   0 4 1 5
4917    and of interleave_low:                   2 6 3 7
4918
4919
4920    The permutation is done in log LENGTH stages.  In each stage interleave_high
4921    and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
4922    where the first argument is taken from the first half of DR_CHAIN and the
4923    second argument from it's second half.
4924    In our example,
4925
4926    I1: interleave_high (1st vec, 3rd vec)
4927    I2: interleave_low (1st vec, 3rd vec)
4928    I3: interleave_high (2nd vec, 4th vec)
4929    I4: interleave_low (2nd vec, 4th vec)
4930
4931    The output for the first stage is:
4932
4933    I1:  0 16  1 17  2 18  3 19
4934    I2:  4 20  5 21  6 22  7 23
4935    I3:  8 24  9 25 10 26 11 27
4936    I4: 12 28 13 29 14 30 15 31
4937
4938    The output of the second stage, i.e. the final result is:
4939
4940    I1:  0  8 16 24  1  9 17 25
4941    I2:  2 10 18 26  3 11 19 27
4942    I3:  4 12 20 28  5 13 21 30
4943    I4:  6 14 22 30  7 15 23 31.  */
4944
4945 void
4946 vect_permute_store_chain (vec<tree> dr_chain,
4947                           unsigned int length,
4948                           gimple *stmt,
4949                           gimple_stmt_iterator *gsi,
4950                           vec<tree> *result_chain)
4951 {
4952   tree vect1, vect2, high, low;
4953   gimple *perm_stmt;
4954   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4955   tree perm_mask_low, perm_mask_high;
4956   tree data_ref;
4957   tree perm3_mask_low, perm3_mask_high;
4958   unsigned int i, j, n, log_length = exact_log2 (length);
4959
4960   result_chain->quick_grow (length);
4961   memcpy (result_chain->address (), dr_chain.address (),
4962           length * sizeof (tree));
4963
4964   if (length == 3)
4965     {
4966       /* vect_grouped_store_supported ensures that this is constant.  */
4967       unsigned int nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
4968       unsigned int j0 = 0, j1 = 0, j2 = 0;
4969
4970       vec_perm_builder sel (nelt, nelt, 1);
4971       sel.quick_grow (nelt);
4972       vec_perm_indices indices;
4973       for (j = 0; j < 3; j++)
4974         {
4975           int nelt0 = ((3 - j) * nelt) % 3;
4976           int nelt1 = ((3 - j) * nelt + 1) % 3;
4977           int nelt2 = ((3 - j) * nelt + 2) % 3;
4978
4979           for (i = 0; i < nelt; i++)
4980             {
4981               if (3 * i + nelt0 < nelt)
4982                 sel[3 * i + nelt0] = j0++;
4983               if (3 * i + nelt1 < nelt)
4984                 sel[3 * i + nelt1] = nelt + j1++;
4985               if (3 * i + nelt2 < nelt)
4986                 sel[3 * i + nelt2] = 0;
4987             }
4988           indices.new_vector (sel, 2, nelt);
4989           perm3_mask_low = vect_gen_perm_mask_checked (vectype, indices);
4990
4991           for (i = 0; i < nelt; i++)
4992             {
4993               if (3 * i + nelt0 < nelt)
4994                 sel[3 * i + nelt0] = 3 * i + nelt0;
4995               if (3 * i + nelt1 < nelt)
4996                 sel[3 * i + nelt1] = 3 * i + nelt1;
4997               if (3 * i + nelt2 < nelt)
4998                 sel[3 * i + nelt2] = nelt + j2++;
4999             }
5000           indices.new_vector (sel, 2, nelt);
5001           perm3_mask_high = vect_gen_perm_mask_checked (vectype, indices);
5002
5003           vect1 = dr_chain[0];
5004           vect2 = dr_chain[1];
5005
5006           /* Create interleaving stmt:
5007              low = VEC_PERM_EXPR <vect1, vect2,
5008                                   {j, nelt, *, j + 1, nelt + j + 1, *,
5009                                    j + 2, nelt + j + 2, *, ...}>  */
5010           data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low");
5011           perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
5012                                            vect2, perm3_mask_low);
5013           vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5014
5015           vect1 = data_ref;
5016           vect2 = dr_chain[2];
5017           /* Create interleaving stmt:
5018              low = VEC_PERM_EXPR <vect1, vect2,
5019                                   {0, 1, nelt + j, 3, 4, nelt + j + 1,
5020                                    6, 7, nelt + j + 2, ...}>  */
5021           data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high");
5022           perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
5023                                            vect2, perm3_mask_high);
5024           vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5025           (*result_chain)[j] = data_ref;
5026         }
5027     }
5028   else
5029     {
5030       /* If length is not equal to 3 then only power of 2 is supported.  */
5031       gcc_assert (pow2p_hwi (length));
5032
5033       /* The encoding has 2 interleaved stepped patterns.  */
5034       poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (vectype);
5035       vec_perm_builder sel (nelt, 2, 3);
5036       sel.quick_grow (6);
5037       for (i = 0; i < 3; i++)
5038         {
5039           sel[i * 2] = i;
5040           sel[i * 2 + 1] = i + nelt;
5041         }
5042         vec_perm_indices indices (sel, 2, nelt);
5043         perm_mask_high = vect_gen_perm_mask_checked (vectype, indices);
5044
5045         for (i = 0; i < 6; i++)
5046           sel[i] += exact_div (nelt, 2);
5047         indices.new_vector (sel, 2, nelt);
5048         perm_mask_low = vect_gen_perm_mask_checked (vectype, indices);
5049
5050         for (i = 0, n = log_length; i < n; i++)
5051           {
5052             for (j = 0; j < length/2; j++)
5053               {
5054                 vect1 = dr_chain[j];
5055                 vect2 = dr_chain[j+length/2];
5056
5057                 /* Create interleaving stmt:
5058                    high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1,
5059                                                         ...}>  */
5060                 high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
5061                 perm_stmt = gimple_build_assign (high, VEC_PERM_EXPR, vect1,
5062                                                  vect2, perm_mask_high);
5063                 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5064                 (*result_chain)[2*j] = high;
5065
5066                 /* Create interleaving stmt:
5067                    low = VEC_PERM_EXPR <vect1, vect2,
5068                                         {nelt/2, nelt*3/2, nelt/2+1, nelt*3/2+1,
5069                                          ...}>  */
5070                 low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
5071                 perm_stmt = gimple_build_assign (low, VEC_PERM_EXPR, vect1,
5072                                                  vect2, perm_mask_low);
5073                 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5074                 (*result_chain)[2*j+1] = low;
5075               }
5076             memcpy (dr_chain.address (), result_chain->address (),
5077                     length * sizeof (tree));
5078           }
5079     }
5080 }
5081
5082 /* Function vect_setup_realignment
5083
5084    This function is called when vectorizing an unaligned load using
5085    the dr_explicit_realign[_optimized] scheme.
5086    This function generates the following code at the loop prolog:
5087
5088       p = initial_addr;
5089    x  msq_init = *(floor(p));   # prolog load
5090       realignment_token = call target_builtin;
5091     loop:
5092    x  msq = phi (msq_init, ---)
5093
5094    The stmts marked with x are generated only for the case of
5095    dr_explicit_realign_optimized.
5096
5097    The code above sets up a new (vector) pointer, pointing to the first
5098    location accessed by STMT, and a "floor-aligned" load using that pointer.
5099    It also generates code to compute the "realignment-token" (if the relevant
5100    target hook was defined), and creates a phi-node at the loop-header bb
5101    whose arguments are the result of the prolog-load (created by this
5102    function) and the result of a load that takes place in the loop (to be
5103    created by the caller to this function).
5104
5105    For the case of dr_explicit_realign_optimized:
5106    The caller to this function uses the phi-result (msq) to create the
5107    realignment code inside the loop, and sets up the missing phi argument,
5108    as follows:
5109     loop:
5110       msq = phi (msq_init, lsq)
5111       lsq = *(floor(p'));        # load in loop
5112       result = realign_load (msq, lsq, realignment_token);
5113
5114    For the case of dr_explicit_realign:
5115     loop:
5116       msq = *(floor(p));        # load in loop
5117       p' = p + (VS-1);
5118       lsq = *(floor(p'));       # load in loop
5119       result = realign_load (msq, lsq, realignment_token);
5120
5121    Input:
5122    STMT - (scalar) load stmt to be vectorized. This load accesses
5123           a memory location that may be unaligned.
5124    BSI - place where new code is to be inserted.
5125    ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
5126                               is used.
5127
5128    Output:
5129    REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
5130                        target hook, if defined.
5131    Return value - the result of the loop-header phi node.  */
5132
5133 tree
5134 vect_setup_realignment (gimple *stmt, gimple_stmt_iterator *gsi,
5135                         tree *realignment_token,
5136                         enum dr_alignment_support alignment_support_scheme,
5137                         tree init_addr,
5138                         struct loop **at_loop)
5139 {
5140   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5141   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5142   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5143   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
5144   struct loop *loop = NULL;
5145   edge pe = NULL;
5146   tree scalar_dest = gimple_assign_lhs (stmt);
5147   tree vec_dest;
5148   gimple *inc;
5149   tree ptr;
5150   tree data_ref;
5151   basic_block new_bb;
5152   tree msq_init = NULL_TREE;
5153   tree new_temp;
5154   gphi *phi_stmt;
5155   tree msq = NULL_TREE;
5156   gimple_seq stmts = NULL;
5157   bool inv_p;
5158   bool compute_in_loop = false;
5159   bool nested_in_vect_loop = false;
5160   struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
5161   struct loop *loop_for_initial_load = NULL;
5162
5163   if (loop_vinfo)
5164     {
5165       loop = LOOP_VINFO_LOOP (loop_vinfo);
5166       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5167     }
5168
5169   gcc_assert (alignment_support_scheme == dr_explicit_realign
5170               || alignment_support_scheme == dr_explicit_realign_optimized);
5171
5172   /* We need to generate three things:
5173      1. the misalignment computation
5174      2. the extra vector load (for the optimized realignment scheme).
5175      3. the phi node for the two vectors from which the realignment is
5176       done (for the optimized realignment scheme).  */
5177
5178   /* 1. Determine where to generate the misalignment computation.
5179
5180      If INIT_ADDR is NULL_TREE, this indicates that the misalignment
5181      calculation will be generated by this function, outside the loop (in the
5182      preheader).  Otherwise, INIT_ADDR had already been computed for us by the
5183      caller, inside the loop.
5184
5185      Background: If the misalignment remains fixed throughout the iterations of
5186      the loop, then both realignment schemes are applicable, and also the
5187      misalignment computation can be done outside LOOP.  This is because we are
5188      vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
5189      are a multiple of VS (the Vector Size), and therefore the misalignment in
5190      different vectorized LOOP iterations is always the same.
5191      The problem arises only if the memory access is in an inner-loop nested
5192      inside LOOP, which is now being vectorized using outer-loop vectorization.
5193      This is the only case when the misalignment of the memory access may not
5194      remain fixed throughout the iterations of the inner-loop (as explained in
5195      detail in vect_supportable_dr_alignment).  In this case, not only is the
5196      optimized realignment scheme not applicable, but also the misalignment
5197      computation (and generation of the realignment token that is passed to
5198      REALIGN_LOAD) have to be done inside the loop.
5199
5200      In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
5201      or not, which in turn determines if the misalignment is computed inside
5202      the inner-loop, or outside LOOP.  */
5203
5204   if (init_addr != NULL_TREE || !loop_vinfo)
5205     {
5206       compute_in_loop = true;
5207       gcc_assert (alignment_support_scheme == dr_explicit_realign);
5208     }
5209
5210
5211   /* 2. Determine where to generate the extra vector load.
5212
5213      For the optimized realignment scheme, instead of generating two vector
5214      loads in each iteration, we generate a single extra vector load in the
5215      preheader of the loop, and in each iteration reuse the result of the
5216      vector load from the previous iteration.  In case the memory access is in
5217      an inner-loop nested inside LOOP, which is now being vectorized using
5218      outer-loop vectorization, we need to determine whether this initial vector
5219      load should be generated at the preheader of the inner-loop, or can be
5220      generated at the preheader of LOOP.  If the memory access has no evolution
5221      in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
5222      to be generated inside LOOP (in the preheader of the inner-loop).  */
5223
5224   if (nested_in_vect_loop)
5225     {
5226       tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
5227       bool invariant_in_outerloop =
5228             (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
5229       loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
5230     }
5231   else
5232     loop_for_initial_load = loop;
5233   if (at_loop)
5234     *at_loop = loop_for_initial_load;
5235
5236   if (loop_for_initial_load)
5237     pe = loop_preheader_edge (loop_for_initial_load);
5238
5239   /* 3. For the case of the optimized realignment, create the first vector
5240       load at the loop preheader.  */
5241
5242   if (alignment_support_scheme == dr_explicit_realign_optimized)
5243     {
5244       /* Create msq_init = *(floor(p1)) in the loop preheader  */
5245       gassign *new_stmt;
5246
5247       gcc_assert (!compute_in_loop);
5248       vec_dest = vect_create_destination_var (scalar_dest, vectype);
5249       ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
5250                                       NULL_TREE, &init_addr, NULL, &inc,
5251                                       true, &inv_p);
5252       if (TREE_CODE (ptr) == SSA_NAME)
5253         new_temp = copy_ssa_name (ptr);
5254       else
5255         new_temp = make_ssa_name (TREE_TYPE (ptr));
5256       unsigned int align = DR_TARGET_ALIGNMENT (dr);
5257       new_stmt = gimple_build_assign
5258                    (new_temp, BIT_AND_EXPR, ptr,
5259                     build_int_cst (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
5260       new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
5261       gcc_assert (!new_bb);
5262       data_ref
5263         = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
5264                   build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
5265       new_stmt = gimple_build_assign (vec_dest, data_ref);
5266       new_temp = make_ssa_name (vec_dest, new_stmt);
5267       gimple_assign_set_lhs (new_stmt, new_temp);
5268       if (pe)
5269         {
5270           new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
5271           gcc_assert (!new_bb);
5272         }
5273       else
5274          gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
5275
5276       msq_init = gimple_assign_lhs (new_stmt);
5277     }
5278
5279   /* 4. Create realignment token using a target builtin, if available.
5280       It is done either inside the containing loop, or before LOOP (as
5281       determined above).  */
5282
5283   if (targetm.vectorize.builtin_mask_for_load)
5284     {
5285       gcall *new_stmt;
5286       tree builtin_decl;
5287
5288       /* Compute INIT_ADDR - the initial addressed accessed by this memref.  */
5289       if (!init_addr)
5290         {
5291           /* Generate the INIT_ADDR computation outside LOOP.  */
5292           init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
5293                                                             NULL_TREE);
5294           if (loop)
5295             {
5296               pe = loop_preheader_edge (loop);
5297               new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
5298               gcc_assert (!new_bb);
5299             }
5300           else
5301              gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
5302         }
5303
5304       builtin_decl = targetm.vectorize.builtin_mask_for_load ();
5305       new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
5306       vec_dest =
5307         vect_create_destination_var (scalar_dest,
5308                                      gimple_call_return_type (new_stmt));
5309       new_temp = make_ssa_name (vec_dest, new_stmt);
5310       gimple_call_set_lhs (new_stmt, new_temp);
5311
5312       if (compute_in_loop)
5313         gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
5314       else
5315         {
5316           /* Generate the misalignment computation outside LOOP.  */
5317           pe = loop_preheader_edge (loop);
5318           new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
5319           gcc_assert (!new_bb);
5320         }
5321
5322       *realignment_token = gimple_call_lhs (new_stmt);
5323
5324       /* The result of the CALL_EXPR to this builtin is determined from
5325          the value of the parameter and no global variables are touched
5326          which makes the builtin a "const" function.  Requiring the
5327          builtin to have the "const" attribute makes it unnecessary
5328          to call mark_call_clobbered.  */
5329       gcc_assert (TREE_READONLY (builtin_decl));
5330     }
5331
5332   if (alignment_support_scheme == dr_explicit_realign)
5333     return msq;
5334
5335   gcc_assert (!compute_in_loop);
5336   gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
5337
5338
5339   /* 5. Create msq = phi <msq_init, lsq> in loop  */
5340
5341   pe = loop_preheader_edge (containing_loop);
5342   vec_dest = vect_create_destination_var (scalar_dest, vectype);
5343   msq = make_ssa_name (vec_dest);
5344   phi_stmt = create_phi_node (msq, containing_loop->header);
5345   add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
5346
5347   return msq;
5348 }
5349
5350
5351 /* Function vect_grouped_load_supported.
5352
5353    COUNT is the size of the load group (the number of statements plus the
5354    number of gaps).  SINGLE_ELEMENT_P is true if there is actually
5355    only one statement, with a gap of COUNT - 1.
5356
5357    Returns true if a suitable permute exists.  */
5358
5359 bool
5360 vect_grouped_load_supported (tree vectype, bool single_element_p,
5361                              unsigned HOST_WIDE_INT count)
5362 {
5363   machine_mode mode = TYPE_MODE (vectype);
5364
5365   /* If this is single-element interleaving with an element distance
5366      that leaves unused vector loads around punt - we at least create
5367      very sub-optimal code in that case (and blow up memory,
5368      see PR65518).  */
5369   if (single_element_p && maybe_gt (count, TYPE_VECTOR_SUBPARTS (vectype)))
5370     {
5371       if (dump_enabled_p ())
5372         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5373                          "single-element interleaving not supported "
5374                          "for not adjacent vector loads\n");
5375       return false;
5376     }
5377
5378   /* vect_permute_load_chain requires the group size to be equal to 3 or
5379      be a power of two.  */
5380   if (count != 3 && exact_log2 (count) == -1)
5381     {
5382       if (dump_enabled_p ())
5383         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5384                          "the size of the group of accesses"
5385                          " is not a power of 2 or not equal to 3\n");
5386       return false;
5387     }
5388
5389   /* Check that the permutation is supported.  */
5390   if (VECTOR_MODE_P (mode))
5391     {
5392       unsigned int i, j;
5393       if (count == 3)
5394         {
5395           unsigned int nelt;
5396           if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
5397             {
5398               if (dump_enabled_p ())
5399                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5400                                  "cannot handle groups of 3 loads for"
5401                                  " variable-length vectors\n");
5402               return false;
5403             }
5404
5405           vec_perm_builder sel (nelt, nelt, 1);
5406           sel.quick_grow (nelt);
5407           vec_perm_indices indices;
5408           unsigned int k;
5409           for (k = 0; k < 3; k++)
5410             {
5411               for (i = 0; i < nelt; i++)
5412                 if (3 * i + k < 2 * nelt)
5413                   sel[i] = 3 * i + k;
5414                 else
5415                   sel[i] = 0;
5416               indices.new_vector (sel, 2, nelt);
5417               if (!can_vec_perm_const_p (mode, indices))
5418                 {
5419                   if (dump_enabled_p ())
5420                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5421                                      "shuffle of 3 loads is not supported by"
5422                                      " target\n");
5423                   return false;
5424                 }
5425               for (i = 0, j = 0; i < nelt; i++)
5426                 if (3 * i + k < 2 * nelt)
5427                   sel[i] = i;
5428                 else
5429                   sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
5430               indices.new_vector (sel, 2, nelt);
5431               if (!can_vec_perm_const_p (mode, indices))
5432                 {
5433                   if (dump_enabled_p ())
5434                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5435                                      "shuffle of 3 loads is not supported by"
5436                                      " target\n");
5437                   return false;
5438                 }
5439             }
5440           return true;
5441         }
5442       else
5443         {
5444           /* If length is not equal to 3 then only power of 2 is supported.  */
5445           gcc_assert (pow2p_hwi (count));
5446           poly_uint64 nelt = GET_MODE_NUNITS (mode);
5447
5448           /* The encoding has a single stepped pattern.  */
5449           vec_perm_builder sel (nelt, 1, 3);
5450           sel.quick_grow (3);
5451           for (i = 0; i < 3; i++)
5452             sel[i] = i * 2;
5453           vec_perm_indices indices (sel, 2, nelt);
5454           if (can_vec_perm_const_p (mode, indices))
5455             {
5456               for (i = 0; i < 3; i++)
5457                 sel[i] = i * 2 + 1;
5458               indices.new_vector (sel, 2, nelt);
5459               if (can_vec_perm_const_p (mode, indices))
5460                 return true;
5461             }
5462         }
5463     }
5464
5465   if (dump_enabled_p ())
5466     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5467                      "extract even/odd not supported by target\n");
5468   return false;
5469 }
5470
5471 /* Return TRUE if vec_{masked_}load_lanes is available for COUNT vectors of
5472    type VECTYPE.  MASKED_P says whether the masked form is needed.  */
5473
5474 bool
5475 vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
5476                            bool masked_p)
5477 {
5478   if (masked_p)
5479     return vect_lanes_optab_supported_p ("vec_mask_load_lanes",
5480                                          vec_mask_load_lanes_optab,
5481                                          vectype, count);
5482   else
5483     return vect_lanes_optab_supported_p ("vec_load_lanes",
5484                                          vec_load_lanes_optab,
5485                                          vectype, count);
5486 }
5487
5488 /* Function vect_permute_load_chain.
5489
5490    Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
5491    a power of 2 or equal to 3, generate extract_even/odd stmts to reorder
5492    the input data correctly.  Return the final references for loads in
5493    RESULT_CHAIN.
5494
5495    E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
5496    The input is 4 vectors each containing 8 elements. We assign a number to each
5497    element, the input sequence is:
5498
5499    1st vec:   0  1  2  3  4  5  6  7
5500    2nd vec:   8  9 10 11 12 13 14 15
5501    3rd vec:  16 17 18 19 20 21 22 23
5502    4th vec:  24 25 26 27 28 29 30 31
5503
5504    The output sequence should be:
5505
5506    1st vec:  0 4  8 12 16 20 24 28
5507    2nd vec:  1 5  9 13 17 21 25 29
5508    3rd vec:  2 6 10 14 18 22 26 30
5509    4th vec:  3 7 11 15 19 23 27 31
5510
5511    i.e., the first output vector should contain the first elements of each
5512    interleaving group, etc.
5513
5514    We use extract_even/odd instructions to create such output.  The input of
5515    each extract_even/odd operation is two vectors
5516    1st vec    2nd vec
5517    0 1 2 3    4 5 6 7
5518
5519    and the output is the vector of extracted even/odd elements.  The output of
5520    extract_even will be:   0 2 4 6
5521    and of extract_odd:     1 3 5 7
5522
5523
5524    The permutation is done in log LENGTH stages.  In each stage extract_even
5525    and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
5526    their order.  In our example,
5527
5528    E1: extract_even (1st vec, 2nd vec)
5529    E2: extract_odd (1st vec, 2nd vec)
5530    E3: extract_even (3rd vec, 4th vec)
5531    E4: extract_odd (3rd vec, 4th vec)
5532
5533    The output for the first stage will be:
5534
5535    E1:  0  2  4  6  8 10 12 14
5536    E2:  1  3  5  7  9 11 13 15
5537    E3: 16 18 20 22 24 26 28 30
5538    E4: 17 19 21 23 25 27 29 31
5539
5540    In order to proceed and create the correct sequence for the next stage (or
5541    for the correct output, if the second stage is the last one, as in our
5542    example), we first put the output of extract_even operation and then the
5543    output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
5544    The input for the second stage is:
5545
5546    1st vec (E1):  0  2  4  6  8 10 12 14
5547    2nd vec (E3): 16 18 20 22 24 26 28 30
5548    3rd vec (E2):  1  3  5  7  9 11 13 15
5549    4th vec (E4): 17 19 21 23 25 27 29 31
5550
5551    The output of the second stage:
5552
5553    E1: 0 4  8 12 16 20 24 28
5554    E2: 2 6 10 14 18 22 26 30
5555    E3: 1 5  9 13 17 21 25 29
5556    E4: 3 7 11 15 19 23 27 31
5557
5558    And RESULT_CHAIN after reordering:
5559
5560    1st vec (E1):  0 4  8 12 16 20 24 28
5561    2nd vec (E3):  1 5  9 13 17 21 25 29
5562    3rd vec (E2):  2 6 10 14 18 22 26 30
5563    4th vec (E4):  3 7 11 15 19 23 27 31.  */
5564
5565 static void
5566 vect_permute_load_chain (vec<tree> dr_chain,
5567                          unsigned int length,
5568                          gimple *stmt,
5569                          gimple_stmt_iterator *gsi,
5570                          vec<tree> *result_chain)
5571 {
5572   tree data_ref, first_vect, second_vect;
5573   tree perm_mask_even, perm_mask_odd;
5574   tree perm3_mask_low, perm3_mask_high;
5575   gimple *perm_stmt;
5576   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
5577   unsigned int i, j, log_length = exact_log2 (length);
5578
5579   result_chain->quick_grow (length);
5580   memcpy (result_chain->address (), dr_chain.address (),
5581           length * sizeof (tree));
5582
5583   if (length == 3)
5584     {
5585       /* vect_grouped_load_supported ensures that this is constant.  */
5586       unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
5587       unsigned int k;
5588
5589       vec_perm_builder sel (nelt, nelt, 1);
5590       sel.quick_grow (nelt);
5591       vec_perm_indices indices;
5592       for (k = 0; k < 3; k++)
5593         {
5594           for (i = 0; i < nelt; i++)
5595             if (3 * i + k < 2 * nelt)
5596               sel[i] = 3 * i + k;
5597             else
5598               sel[i] = 0;
5599           indices.new_vector (sel, 2, nelt);
5600           perm3_mask_low = vect_gen_perm_mask_checked (vectype, indices);
5601
5602           for (i = 0, j = 0; i < nelt; i++)
5603             if (3 * i + k < 2 * nelt)
5604               sel[i] = i;
5605             else
5606               sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
5607           indices.new_vector (sel, 2, nelt);
5608           perm3_mask_high = vect_gen_perm_mask_checked (vectype, indices);
5609
5610           first_vect = dr_chain[0];
5611           second_vect = dr_chain[1];
5612
5613           /* Create interleaving stmt (low part of):
5614              low = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
5615                                                              ...}>  */
5616           data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low");
5617           perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, first_vect,
5618                                            second_vect, perm3_mask_low);
5619           vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5620
5621           /* Create interleaving stmt (high part of):
5622              high = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
5623                                                               ...}>  */
5624           first_vect = data_ref;
5625           second_vect = dr_chain[2];
5626           data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high");
5627           perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, first_vect,
5628                                            second_vect, perm3_mask_high);
5629           vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5630           (*result_chain)[k] = data_ref;
5631         }
5632     }
5633   else
5634     {
5635       /* If length is not equal to 3 then only power of 2 is supported.  */
5636       gcc_assert (pow2p_hwi (length));
5637
5638       /* The encoding has a single stepped pattern.  */
5639       poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (vectype);
5640       vec_perm_builder sel (nelt, 1, 3);
5641       sel.quick_grow (3);
5642       for (i = 0; i < 3; ++i)
5643         sel[i] = i * 2;
5644       vec_perm_indices indices (sel, 2, nelt);
5645       perm_mask_even = vect_gen_perm_mask_checked (vectype, indices);
5646
5647       for (i = 0; i < 3; ++i)
5648         sel[i] = i * 2 + 1;
5649       indices.new_vector (sel, 2, nelt);
5650       perm_mask_odd = vect_gen_perm_mask_checked (vectype, indices);
5651
5652       for (i = 0; i < log_length; i++)
5653         {
5654           for (j = 0; j < length; j += 2)
5655             {
5656               first_vect = dr_chain[j];
5657               second_vect = dr_chain[j+1];
5658
5659               /* data_ref = permute_even (first_data_ref, second_data_ref);  */
5660               data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_even");
5661               perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
5662                                                first_vect, second_vect,
5663                                                perm_mask_even);
5664               vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5665               (*result_chain)[j/2] = data_ref;
5666
5667               /* data_ref = permute_odd (first_data_ref, second_data_ref);  */
5668               data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_odd");
5669               perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
5670                                                first_vect, second_vect,
5671                                                perm_mask_odd);
5672               vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5673               (*result_chain)[j/2+length/2] = data_ref;
5674             }
5675           memcpy (dr_chain.address (), result_chain->address (),
5676                   length * sizeof (tree));
5677         }
5678     }
5679 }
5680
5681 /* Function vect_shift_permute_load_chain.
5682
5683    Given a chain of loads in DR_CHAIN of LENGTH 2 or 3, generate
5684    sequence of stmts to reorder the input data accordingly.
5685    Return the final references for loads in RESULT_CHAIN.
5686    Return true if successed, false otherwise.
5687
5688    E.g., LENGTH is 3 and the scalar type is short, i.e., VF is 8.
5689    The input is 3 vectors each containing 8 elements.  We assign a
5690    number to each element, the input sequence is:
5691
5692    1st vec:   0  1  2  3  4  5  6  7
5693    2nd vec:   8  9 10 11 12 13 14 15
5694    3rd vec:  16 17 18 19 20 21 22 23
5695
5696    The output sequence should be:
5697
5698    1st vec:  0 3 6  9 12 15 18 21
5699    2nd vec:  1 4 7 10 13 16 19 22
5700    3rd vec:  2 5 8 11 14 17 20 23
5701
5702    We use 3 shuffle instructions and 3 * 3 - 1 shifts to create such output.
5703
5704    First we shuffle all 3 vectors to get correct elements order:
5705
5706    1st vec:  ( 0  3  6) ( 1  4  7) ( 2  5)
5707    2nd vec:  ( 8 11 14) ( 9 12 15) (10 13)
5708    3rd vec:  (16 19 22) (17 20 23) (18 21)
5709
5710    Next we unite and shift vector 3 times:
5711
5712    1st step:
5713      shift right by 6 the concatenation of:
5714      "1st vec" and  "2nd vec"
5715        ( 0  3  6) ( 1  4  7) |( 2  5) _ ( 8 11 14) ( 9 12 15)| (10 13)
5716      "2nd vec" and  "3rd vec"
5717        ( 8 11 14) ( 9 12 15) |(10 13) _ (16 19 22) (17 20 23)| (18 21)
5718      "3rd vec" and  "1st vec"
5719        (16 19 22) (17 20 23) |(18 21) _ ( 0  3  6) ( 1  4  7)| ( 2  5)
5720                              | New vectors                   |
5721
5722      So that now new vectors are:
5723
5724      1st vec:  ( 2  5) ( 8 11 14) ( 9 12 15)
5725      2nd vec:  (10 13) (16 19 22) (17 20 23)
5726      3rd vec:  (18 21) ( 0  3  6) ( 1  4  7)
5727
5728    2nd step:
5729      shift right by 5 the concatenation of:
5730      "1st vec" and  "3rd vec"
5731        ( 2  5) ( 8 11 14) |( 9 12 15) _ (18 21) ( 0  3  6)| ( 1  4  7)
5732      "2nd vec" and  "1st vec"
5733        (10 13) (16 19 22) |(17 20 23) _ ( 2  5) ( 8 11 14)| ( 9 12 15)
5734      "3rd vec" and  "2nd vec"
5735        (18 21) ( 0  3  6) |( 1  4  7) _ (10 13) (16 19 22)| (17 20 23)
5736                           | New vectors                   |
5737
5738      So that now new vectors are:
5739
5740      1st vec:  ( 9 12 15) (18 21) ( 0  3  6)
5741      2nd vec:  (17 20 23) ( 2  5) ( 8 11 14)
5742      3rd vec:  ( 1  4  7) (10 13) (16 19 22) READY
5743
5744    3rd step:
5745      shift right by 5 the concatenation of:
5746      "1st vec" and  "1st vec"
5747        ( 9 12 15) (18 21) |( 0  3  6) _ ( 9 12 15) (18 21)| ( 0  3  6)
5748      shift right by 3 the concatenation of:
5749      "2nd vec" and  "2nd vec"
5750                (17 20 23) |( 2  5) ( 8 11 14) _ (17 20 23)| ( 2  5) ( 8 11 14)
5751                           | New vectors                   |
5752
5753      So that now all vectors are READY:
5754      1st vec:  ( 0  3  6) ( 9 12 15) (18 21)
5755      2nd vec:  ( 2  5) ( 8 11 14) (17 20 23)
5756      3rd vec:  ( 1  4  7) (10 13) (16 19 22)
5757
5758    This algorithm is faster than one in vect_permute_load_chain if:
5759      1.  "shift of a concatination" is faster than general permutation.
5760          This is usually so.
5761      2.  The TARGET machine can't execute vector instructions in parallel.
5762          This is because each step of the algorithm depends on previous.
5763          The algorithm in vect_permute_load_chain is much more parallel.
5764
5765    The algorithm is applicable only for LOAD CHAIN LENGTH less than VF.
5766 */
5767
5768 static bool
5769 vect_shift_permute_load_chain (vec<tree> dr_chain,
5770                                unsigned int length,
5771                                gimple *stmt,
5772                                gimple_stmt_iterator *gsi,
5773                                vec<tree> *result_chain)
5774 {
5775   tree vect[3], vect_shift[3], data_ref, first_vect, second_vect;
5776   tree perm2_mask1, perm2_mask2, perm3_mask;
5777   tree select_mask, shift1_mask, shift2_mask, shift3_mask, shift4_mask;
5778   gimple *perm_stmt;
5779
5780   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
5781   unsigned int i;
5782   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5783   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5784
5785   unsigned HOST_WIDE_INT nelt, vf;
5786   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nelt)
5787       || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
5788     /* Not supported for variable-length vectors.  */
5789     return false;
5790
5791   vec_perm_builder sel (nelt, nelt, 1);
5792   sel.quick_grow (nelt);
5793
5794   result_chain->quick_grow (length);
5795   memcpy (result_chain->address (), dr_chain.address (),
5796           length * sizeof (tree));
5797
5798   if (pow2p_hwi (length) && vf > 4)
5799     {
5800       unsigned int j, log_length = exact_log2 (length);
5801       for (i = 0; i < nelt / 2; ++i)
5802         sel[i] = i * 2;
5803       for (i = 0; i < nelt / 2; ++i)
5804         sel[nelt / 2 + i] = i * 2 + 1;
5805       vec_perm_indices indices (sel, 2, nelt);
5806       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
5807         {
5808           if (dump_enabled_p ())
5809             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5810                              "shuffle of 2 fields structure is not \
5811                               supported by target\n");
5812           return false;
5813         }
5814       perm2_mask1 = vect_gen_perm_mask_checked (vectype, indices);
5815
5816       for (i = 0; i < nelt / 2; ++i)
5817         sel[i] = i * 2 + 1;
5818       for (i = 0; i < nelt / 2; ++i)
5819         sel[nelt / 2 + i] = i * 2;
5820       indices.new_vector (sel, 2, nelt);
5821       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
5822         {
5823           if (dump_enabled_p ())
5824             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5825                              "shuffle of 2 fields structure is not \
5826                               supported by target\n");
5827           return false;
5828         }
5829       perm2_mask2 = vect_gen_perm_mask_checked (vectype, indices);
5830
5831       /* Generating permutation constant to shift all elements.
5832          For vector length 8 it is {4 5 6 7 8 9 10 11}.  */
5833       for (i = 0; i < nelt; i++)
5834         sel[i] = nelt / 2 + i;
5835       indices.new_vector (sel, 2, nelt);
5836       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
5837         {
5838           if (dump_enabled_p ())
5839             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5840                              "shift permutation is not supported by target\n");
5841           return false;
5842         }
5843       shift1_mask = vect_gen_perm_mask_checked (vectype, indices);
5844
5845       /* Generating permutation constant to select vector from 2.
5846          For vector length 8 it is {0 1 2 3 12 13 14 15}.  */
5847       for (i = 0; i < nelt / 2; i++)
5848         sel[i] = i;
5849       for (i = nelt / 2; i < nelt; i++)
5850         sel[i] = nelt + i;
5851       indices.new_vector (sel, 2, nelt);
5852       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
5853         {
5854           if (dump_enabled_p ())
5855             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5856                              "select is not supported by target\n");
5857           return false;
5858         }
5859       select_mask = vect_gen_perm_mask_checked (vectype, indices);
5860
5861       for (i = 0; i < log_length; i++)
5862         {
5863           for (j = 0; j < length; j += 2)
5864             {
5865               first_vect = dr_chain[j];
5866               second_vect = dr_chain[j + 1];
5867
5868               data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
5869               perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
5870                                                first_vect, first_vect,
5871                                                perm2_mask1);
5872               vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5873               vect[0] = data_ref;
5874
5875               data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
5876               perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
5877                                                second_vect, second_vect,
5878                                                perm2_mask2);
5879               vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5880               vect[1] = data_ref;
5881
5882               data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift");
5883               perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
5884                                                vect[0], vect[1], shift1_mask);
5885               vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5886               (*result_chain)[j/2 + length/2] = data_ref;
5887
5888               data_ref = make_temp_ssa_name (vectype, NULL, "vect_select");
5889               perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
5890                                                vect[0], vect[1], select_mask);
5891               vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5892               (*result_chain)[j/2] = data_ref;
5893             }
5894           memcpy (dr_chain.address (), result_chain->address (),
5895                   length * sizeof (tree));
5896         }
5897       return true;
5898     }
5899   if (length == 3 && vf > 2)
5900     {
5901       unsigned int k = 0, l = 0;
5902
5903       /* Generating permutation constant to get all elements in rigth order.
5904          For vector length 8 it is {0 3 6 1 4 7 2 5}.  */
5905       for (i = 0; i < nelt; i++)
5906         {
5907           if (3 * k + (l % 3) >= nelt)
5908             {
5909               k = 0;
5910               l += (3 - (nelt % 3));
5911             }
5912           sel[i] = 3 * k + (l % 3);
5913           k++;
5914         }
5915       vec_perm_indices indices (sel, 2, nelt);
5916       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
5917         {
5918           if (dump_enabled_p ())
5919             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5920                              "shuffle of 3 fields structure is not \
5921                               supported by target\n");
5922           return false;
5923         }
5924       perm3_mask = vect_gen_perm_mask_checked (vectype, indices);
5925
5926       /* Generating permutation constant to shift all elements.
5927          For vector length 8 it is {6 7 8 9 10 11 12 13}.  */
5928       for (i = 0; i < nelt; i++)
5929         sel[i] = 2 * (nelt / 3) + (nelt % 3) + i;
5930       indices.new_vector (sel, 2, nelt);
5931       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
5932         {
5933           if (dump_enabled_p ())
5934             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5935                              "shift permutation is not supported by target\n");
5936           return false;
5937         }
5938       shift1_mask = vect_gen_perm_mask_checked (vectype, indices);
5939
5940       /* Generating permutation constant to shift all elements.
5941          For vector length 8 it is {5 6 7 8 9 10 11 12}.  */
5942       for (i = 0; i < nelt; i++)
5943         sel[i] = 2 * (nelt / 3) + 1 + i;
5944       indices.new_vector (sel, 2, nelt);
5945       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
5946         {
5947           if (dump_enabled_p ())
5948             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5949                              "shift permutation is not supported by target\n");
5950           return false;
5951         }
5952       shift2_mask = vect_gen_perm_mask_checked (vectype, indices);
5953
5954       /* Generating permutation constant to shift all elements.
5955          For vector length 8 it is {3 4 5 6 7 8 9 10}.  */
5956       for (i = 0; i < nelt; i++)
5957         sel[i] = (nelt / 3) + (nelt % 3) / 2 + i;
5958       indices.new_vector (sel, 2, nelt);
5959       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
5960         {
5961           if (dump_enabled_p ())
5962             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5963                              "shift permutation is not supported by target\n");
5964           return false;
5965         }
5966       shift3_mask = vect_gen_perm_mask_checked (vectype, indices);
5967
5968       /* Generating permutation constant to shift all elements.
5969          For vector length 8 it is {5 6 7 8 9 10 11 12}.  */
5970       for (i = 0; i < nelt; i++)
5971         sel[i] = 2 * (nelt / 3) + (nelt % 3) / 2 + i;
5972       indices.new_vector (sel, 2, nelt);
5973       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
5974         {
5975           if (dump_enabled_p ())
5976             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5977                              "shift permutation is not supported by target\n");
5978           return false;
5979         }
5980       shift4_mask = vect_gen_perm_mask_checked (vectype, indices);
5981
5982       for (k = 0; k < 3; k++)
5983         {
5984           data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3");
5985           perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
5986                                            dr_chain[k], dr_chain[k],
5987                                            perm3_mask);
5988           vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5989           vect[k] = data_ref;
5990         }
5991
5992       for (k = 0; k < 3; k++)
5993         {
5994           data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift1");
5995           perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
5996                                            vect[k % 3], vect[(k + 1) % 3],
5997                                            shift1_mask);
5998           vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5999           vect_shift[k] = data_ref;
6000         }
6001
6002       for (k = 0; k < 3; k++)
6003         {
6004           data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift2");
6005           perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6006                                            vect_shift[(4 - k) % 3],
6007                                            vect_shift[(3 - k) % 3],
6008                                            shift2_mask);
6009           vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6010           vect[k] = data_ref;
6011         }
6012
6013       (*result_chain)[3 - (nelt % 3)] = vect[2];
6014
6015       data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift3");
6016       perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect[0],
6017                                        vect[0], shift3_mask);
6018       vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6019       (*result_chain)[nelt % 3] = data_ref;
6020
6021       data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift4");
6022       perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect[1],
6023                                        vect[1], shift4_mask);
6024       vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6025       (*result_chain)[0] = data_ref;
6026       return true;
6027     }
6028   return false;
6029 }
6030
6031 /* Function vect_transform_grouped_load.
6032
6033    Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
6034    to perform their permutation and ascribe the result vectorized statements to
6035    the scalar statements.
6036 */
6037
6038 void
6039 vect_transform_grouped_load (gimple *stmt, vec<tree> dr_chain, int size,
6040                              gimple_stmt_iterator *gsi)
6041 {
6042   machine_mode mode;
6043   vec<tree> result_chain = vNULL;
6044
6045   /* DR_CHAIN contains input data-refs that are a part of the interleaving.
6046      RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
6047      vectors, that are ready for vector computation.  */
6048   result_chain.create (size);
6049
6050   /* If reassociation width for vector type is 2 or greater target machine can
6051      execute 2 or more vector instructions in parallel.  Otherwise try to
6052      get chain for loads group using vect_shift_permute_load_chain.  */
6053   mode = TYPE_MODE (STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)));
6054   if (targetm.sched.reassociation_width (VEC_PERM_EXPR, mode) > 1
6055       || pow2p_hwi (size)
6056       || !vect_shift_permute_load_chain (dr_chain, size, stmt,
6057                                          gsi, &result_chain))
6058     vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
6059   vect_record_grouped_load_vectors (stmt, result_chain);
6060   result_chain.release ();
6061 }
6062
6063 /* RESULT_CHAIN contains the output of a group of grouped loads that were
6064    generated as part of the vectorization of STMT.  Assign the statement
6065    for each vector to the associated scalar statement.  */
6066
6067 void
6068 vect_record_grouped_load_vectors (gimple *stmt, vec<tree> result_chain)
6069 {
6070   gimple *first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
6071   gimple *next_stmt, *new_stmt;
6072   unsigned int i, gap_count;
6073   tree tmp_data_ref;
6074
6075   /* Put a permuted data-ref in the VECTORIZED_STMT field.
6076      Since we scan the chain starting from it's first node, their order
6077      corresponds the order of data-refs in RESULT_CHAIN.  */
6078   next_stmt = first_stmt;
6079   gap_count = 1;
6080   FOR_EACH_VEC_ELT (result_chain, i, tmp_data_ref)
6081     {
6082       if (!next_stmt)
6083         break;
6084
6085       /* Skip the gaps.  Loads created for the gaps will be removed by dead
6086        code elimination pass later.  No need to check for the first stmt in
6087        the group, since it always exists.
6088        GROUP_GAP is the number of steps in elements from the previous
6089        access (if there is no gap GROUP_GAP is 1).  We skip loads that
6090        correspond to the gaps.  */
6091       if (next_stmt != first_stmt
6092           && gap_count < GROUP_GAP (vinfo_for_stmt (next_stmt)))
6093       {
6094         gap_count++;
6095         continue;
6096       }
6097
6098       while (next_stmt)
6099         {
6100           new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
6101           /* We assume that if VEC_STMT is not NULL, this is a case of multiple
6102              copies, and we put the new vector statement in the first available
6103              RELATED_STMT.  */
6104           if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
6105             STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
6106           else
6107             {
6108               if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
6109                 {
6110                   gimple *prev_stmt =
6111                     STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
6112                   gimple *rel_stmt =
6113                     STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
6114                   while (rel_stmt)
6115                     {
6116                       prev_stmt = rel_stmt;
6117                       rel_stmt =
6118                         STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
6119                     }
6120
6121                   STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
6122                     new_stmt;
6123                 }
6124             }
6125
6126           next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6127           gap_count = 1;
6128           /* If NEXT_STMT accesses the same DR as the previous statement,
6129              put the same TMP_DATA_REF as its vectorized statement; otherwise
6130              get the next data-ref from RESULT_CHAIN.  */
6131           if (!next_stmt || !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
6132             break;
6133         }
6134     }
6135 }
6136
6137 /* Function vect_force_dr_alignment_p.
6138
6139    Returns whether the alignment of a DECL can be forced to be aligned
6140    on ALIGNMENT bit boundary.  */
6141
6142 bool
6143 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
6144 {
6145   if (!VAR_P (decl))
6146     return false;
6147
6148   if (decl_in_symtab_p (decl)
6149       && !symtab_node::get (decl)->can_increase_alignment_p ())
6150     return false;
6151
6152   if (TREE_STATIC (decl))
6153     return (alignment <= MAX_OFILE_ALIGNMENT);
6154   else
6155     return (alignment <= MAX_STACK_ALIGNMENT);
6156 }
6157
6158
6159 /* Return whether the data reference DR is supported with respect to its
6160    alignment.
6161    If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
6162    it is aligned, i.e., check if it is possible to vectorize it with different
6163    alignment.  */
6164
6165 enum dr_alignment_support
6166 vect_supportable_dr_alignment (struct data_reference *dr,
6167                                bool check_aligned_accesses)
6168 {
6169   gimple *stmt = DR_STMT (dr);
6170   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6171   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6172   machine_mode mode = TYPE_MODE (vectype);
6173   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6174   struct loop *vect_loop = NULL;
6175   bool nested_in_vect_loop = false;
6176
6177   if (aligned_access_p (dr) && !check_aligned_accesses)
6178     return dr_aligned;
6179
6180   /* For now assume all conditional loads/stores support unaligned
6181      access without any special code.  */
6182   if (is_gimple_call (stmt)
6183       && gimple_call_internal_p (stmt)
6184       && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
6185           || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
6186     return dr_unaligned_supported;
6187
6188   if (loop_vinfo)
6189     {
6190       vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
6191       nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
6192     }
6193
6194   /* Possibly unaligned access.  */
6195
6196   /* We can choose between using the implicit realignment scheme (generating
6197      a misaligned_move stmt) and the explicit realignment scheme (generating
6198      aligned loads with a REALIGN_LOAD).  There are two variants to the
6199      explicit realignment scheme: optimized, and unoptimized.
6200      We can optimize the realignment only if the step between consecutive
6201      vector loads is equal to the vector size.  Since the vector memory
6202      accesses advance in steps of VS (Vector Size) in the vectorized loop, it
6203      is guaranteed that the misalignment amount remains the same throughout the
6204      execution of the vectorized loop.  Therefore, we can create the
6205      "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
6206      at the loop preheader.
6207
6208      However, in the case of outer-loop vectorization, when vectorizing a
6209      memory access in the inner-loop nested within the LOOP that is now being
6210      vectorized, while it is guaranteed that the misalignment of the
6211      vectorized memory access will remain the same in different outer-loop
6212      iterations, it is *not* guaranteed that is will remain the same throughout
6213      the execution of the inner-loop.  This is because the inner-loop advances
6214      with the original scalar step (and not in steps of VS).  If the inner-loop
6215      step happens to be a multiple of VS, then the misalignment remains fixed
6216      and we can use the optimized realignment scheme.  For example:
6217
6218       for (i=0; i<N; i++)
6219         for (j=0; j<M; j++)
6220           s += a[i+j];
6221
6222      When vectorizing the i-loop in the above example, the step between
6223      consecutive vector loads is 1, and so the misalignment does not remain
6224      fixed across the execution of the inner-loop, and the realignment cannot
6225      be optimized (as illustrated in the following pseudo vectorized loop):
6226
6227       for (i=0; i<N; i+=4)
6228         for (j=0; j<M; j++){
6229           vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
6230                          // when j is {0,1,2,3,4,5,6,7,...} respectively.
6231                          // (assuming that we start from an aligned address).
6232           }
6233
6234      We therefore have to use the unoptimized realignment scheme:
6235
6236       for (i=0; i<N; i+=4)
6237           for (j=k; j<M; j+=4)
6238           vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
6239                            // that the misalignment of the initial address is
6240                            // 0).
6241
6242      The loop can then be vectorized as follows:
6243
6244       for (k=0; k<4; k++){
6245         rt = get_realignment_token (&vp[k]);
6246         for (i=0; i<N; i+=4){
6247           v1 = vp[i+k];
6248           for (j=k; j<M; j+=4){
6249             v2 = vp[i+j+VS-1];
6250             va = REALIGN_LOAD <v1,v2,rt>;
6251             vs += va;
6252             v1 = v2;
6253           }
6254         }
6255     } */
6256
6257   if (DR_IS_READ (dr))
6258     {
6259       bool is_packed = false;
6260       tree type = (TREE_TYPE (DR_REF (dr)));
6261
6262       if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
6263           && (!targetm.vectorize.builtin_mask_for_load
6264               || targetm.vectorize.builtin_mask_for_load ()))
6265         {
6266           tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6267
6268           /* If we are doing SLP then the accesses need not have the
6269              same alignment, instead it depends on the SLP group size.  */
6270           if (loop_vinfo
6271               && STMT_SLP_TYPE (stmt_info)
6272               && !multiple_p (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6273                               * GROUP_SIZE (vinfo_for_stmt
6274                                             (GROUP_FIRST_ELEMENT (stmt_info))),
6275                               TYPE_VECTOR_SUBPARTS (vectype)))
6276             ;
6277           else if (!loop_vinfo
6278                    || (nested_in_vect_loop
6279                        && maybe_ne (TREE_INT_CST_LOW (DR_STEP (dr)),
6280                                     GET_MODE_SIZE (TYPE_MODE (vectype)))))
6281             return dr_explicit_realign;
6282           else
6283             return dr_explicit_realign_optimized;
6284         }
6285       if (!known_alignment_for_access_p (dr))
6286         is_packed = not_size_aligned (DR_REF (dr));
6287
6288       if (targetm.vectorize.support_vector_misalignment
6289             (mode, type, DR_MISALIGNMENT (dr), is_packed))
6290         /* Can't software pipeline the loads, but can at least do them.  */
6291         return dr_unaligned_supported;
6292     }
6293   else
6294     {
6295       bool is_packed = false;
6296       tree type = (TREE_TYPE (DR_REF (dr)));
6297
6298       if (!known_alignment_for_access_p (dr))
6299         is_packed = not_size_aligned (DR_REF (dr));
6300
6301      if (targetm.vectorize.support_vector_misalignment
6302            (mode, type, DR_MISALIGNMENT (dr), is_packed))
6303        return dr_unaligned_supported;
6304     }
6305
6306   /* Unsupported.  */
6307   return dr_unaligned_unsupported;
6308 }