gcc/tree-vect-data-refs.c

   1 /* Data References Analysis and Manipulation Utilities for Vectorization.
   2    Copyright (C) 2003-2021 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4    and Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "target.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "gimple.h"
  30 #include "predict.h"
  31 #include "memmodel.h"
  32 #include "tm_p.h"
  33 #include "ssa.h"
  34 #include "optabs-tree.h"
  35 #include "cgraph.h"
  36 #include "dumpfile.h"
  37 #include "alias.h"
  38 #include "fold-const.h"
  39 #include "stor-layout.h"
  40 #include "tree-eh.h"
  41 #include "gimplify.h"
  42 #include "gimple-iterator.h"
  43 #include "gimplify-me.h"
  44 #include "tree-ssa-loop-ivopts.h"
  45 #include "tree-ssa-loop-manip.h"
  46 #include "tree-ssa-loop.h"
  47 #include "cfgloop.h"
  48 #include "tree-scalar-evolution.h"
  49 #include "tree-vectorizer.h"
  50 #include "expr.h"
  51 #include "builtins.h"
  52 #include "tree-cfg.h"
  53 #include "tree-hash-traits.h"
  54 #include "vec-perm-indices.h"
  55 #include "internal-fn.h"
  56 #include "gimple-fold.h"
  57
  58 /* Return true if load- or store-lanes optab OPTAB is implemented for
  59    COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.  */
  60
  61 static bool
  62 vect_lanes_optab_supported_p (const char *name, convert_optab optab,
  63                               tree vectype, unsigned HOST_WIDE_INT count)
  64 {
  65   machine_mode mode, array_mode;
  66   bool limit_p;
  67
  68   mode = TYPE_MODE (vectype);
  69   if (!targetm.array_mode (mode, count).exists (&array_mode))
  70     {
  71       poly_uint64 bits = count * GET_MODE_BITSIZE (mode);
  72       limit_p = !targetm.array_mode_supported_p (mode, count);
  73       if (!int_mode_for_size (bits, limit_p).exists (&array_mode))
  74         {
  75           if (dump_enabled_p ())
  76             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
  77                              "no array mode for %s[%wu]\n",
  78                              GET_MODE_NAME (mode), count);
  79           return false;
  80         }
  81     }
  82
  83   if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
  84     {
  85       if (dump_enabled_p ())
  86         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
  87                          "cannot use %s<%s><%s>\n", name,
  88                          GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
  89       return false;
  90     }
  91
  92   if (dump_enabled_p ())
  93     dump_printf_loc (MSG_NOTE, vect_location,
  94                      "can use %s<%s><%s>\n", name, GET_MODE_NAME (array_mode),
  95                      GET_MODE_NAME (mode));
  96
  97   return true;
  98 }
  99
 100
 101 /* Return the smallest scalar part of STMT_INFO.
 102    This is used to determine the vectype of the stmt.  We generally set the
 103    vectype according to the type of the result (lhs).  For stmts whose
 104    result-type is different than the type of the arguments (e.g., demotion,
 105    promotion), vectype will be reset appropriately (later).  Note that we have
 106    to visit the smallest datatype in this function, because that determines the
 107    VF.  If the smallest datatype in the loop is present only as the rhs of a
 108    promotion operation - we'd miss it.
 109    Such a case, where a variable of this datatype does not appear in the lhs
 110    anywhere in the loop, can only occur if it's an invariant: e.g.:
 111    'int_x = (int) short_inv', which we'd expect to have been optimized away by
 112    invariant motion.  However, we cannot rely on invariant motion to always
 113    take invariants out of the loop, and so in the case of promotion we also
 114    have to check the rhs.
 115    LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
 116    types.  */
 117
 118 tree
 119 vect_get_smallest_scalar_type (stmt_vec_info stmt_info, tree scalar_type)
 120 {
 121   HOST_WIDE_INT lhs, rhs;
 122
 123   /* During the analysis phase, this function is called on arbitrary
 124      statements that might not have scalar results.  */
 125   if (!tree_fits_uhwi_p (TYPE_SIZE_UNIT (scalar_type)))
 126     return scalar_type;
 127
 128   lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
 129
 130   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
 131   if (assign)
 132     {
 133       scalar_type = TREE_TYPE (gimple_assign_lhs (assign));
 134       if (gimple_assign_cast_p (assign)
 135           || gimple_assign_rhs_code (assign) == DOT_PROD_EXPR
 136           || gimple_assign_rhs_code (assign) == WIDEN_SUM_EXPR
 137           || gimple_assign_rhs_code (assign) == WIDEN_MULT_EXPR
 138           || gimple_assign_rhs_code (assign) == WIDEN_LSHIFT_EXPR
 139           || gimple_assign_rhs_code (assign) == WIDEN_PLUS_EXPR
 140           || gimple_assign_rhs_code (assign) == WIDEN_MINUS_EXPR
 141           || gimple_assign_rhs_code (assign) == FLOAT_EXPR)
 142         {
 143           tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign));
 144
 145           rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
 146           if (rhs < lhs)
 147             scalar_type = rhs_type;
 148         }
 149     }
 150   else if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
 151     {
 152       unsigned int i = 0;
 153       if (gimple_call_internal_p (call))
 154         {
 155           internal_fn ifn = gimple_call_internal_fn (call);
 156           if (internal_load_fn_p (ifn))
 157             /* For loads the LHS type does the trick.  */
 158             i = ~0U;
 159           else if (internal_store_fn_p (ifn))
 160             {
 161               /* For stores use the tyep of the stored value.  */
 162               i = internal_fn_stored_value_index (ifn);
 163               scalar_type = TREE_TYPE (gimple_call_arg (call, i));
 164               i = ~0U;
 165             }
 166           else if (internal_fn_mask_index (ifn) == 0)
 167             i = 1;
 168         }
 169       if (i < gimple_call_num_args (call))
 170         {
 171           tree rhs_type = TREE_TYPE (gimple_call_arg (call, i));
 172           if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (rhs_type)))
 173             {
 174               rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
 175               if (rhs < lhs)
 176                 scalar_type = rhs_type;
 177             }
 178         }
 179     }
 180
 181   return scalar_type;
 182 }
 183
 184
 185 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
 186    tested at run-time.  Return TRUE if DDR was successfully inserted.
 187    Return false if versioning is not supported.  */
 188
 189 static opt_result
 190 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
 191 {
 192   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 193
 194   if ((unsigned) param_vect_max_version_for_alias_checks == 0)
 195     return opt_result::failure_at (vect_location,
 196                                    "will not create alias checks, as"
 197                                    " --param vect-max-version-for-alias-checks"
 198                                    " == 0\n");
 199
 200   opt_result res
 201     = runtime_alias_check_p (ddr, loop,
 202                              optimize_loop_nest_for_speed_p (loop));
 203   if (!res)
 204     return res;
 205
 206   LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).safe_push (ddr);
 207   return opt_result::success ();
 208 }
 209
 210 /* Record that loop LOOP_VINFO needs to check that VALUE is nonzero.  */
 211
 212 static void
 213 vect_check_nonzero_value (loop_vec_info loop_vinfo, tree value)
 214 {
 215   const vec<tree> &checks = LOOP_VINFO_CHECK_NONZERO (loop_vinfo);
 216   for (unsigned int i = 0; i < checks.length(); ++i)
 217     if (checks[i] == value)
 218       return;
 219
 220   if (dump_enabled_p ())
 221     dump_printf_loc (MSG_NOTE, vect_location,
 222                      "need run-time check that %T is nonzero\n",
 223                      value);
 224   LOOP_VINFO_CHECK_NONZERO (loop_vinfo).safe_push (value);
 225 }
 226
 227 /* Return true if we know that the order of vectorized DR_INFO_A and
 228    vectorized DR_INFO_B will be the same as the order of DR_INFO_A and
 229    DR_INFO_B.  At least one of the accesses is a write.  */
 230
 231 static bool
 232 vect_preserves_scalar_order_p (dr_vec_info *dr_info_a, dr_vec_info *dr_info_b)
 233 {
 234   stmt_vec_info stmtinfo_a = dr_info_a->stmt;
 235   stmt_vec_info stmtinfo_b = dr_info_b->stmt;
 236
 237   /* Single statements are always kept in their original order.  */
 238   if (!STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)
 239       && !STMT_VINFO_GROUPED_ACCESS (stmtinfo_b))
 240     return true;
 241
 242   /* STMT_A and STMT_B belong to overlapping groups.  All loads are
 243      emitted at the position of the first scalar load.
 244      Stores in a group are emitted at the position of the last scalar store.
 245      Compute that position and check whether the resulting order matches
 246      the current one.  */
 247   stmt_vec_info il_a = DR_GROUP_FIRST_ELEMENT (stmtinfo_a);
 248   if (il_a)
 249     {
 250       if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_a)))
 251         for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s;
 252              s = DR_GROUP_NEXT_ELEMENT (s))
 253           il_a = get_later_stmt (il_a, s);
 254       else /* DR_IS_READ */
 255         for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s;
 256              s = DR_GROUP_NEXT_ELEMENT (s))
 257           if (get_later_stmt (il_a, s) == il_a)
 258             il_a = s;
 259     }
 260   else
 261     il_a = stmtinfo_a;
 262   stmt_vec_info il_b = DR_GROUP_FIRST_ELEMENT (stmtinfo_b);
 263   if (il_b)
 264     {
 265       if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_b)))
 266         for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s;
 267              s = DR_GROUP_NEXT_ELEMENT (s))
 268           il_b = get_later_stmt (il_b, s);
 269       else /* DR_IS_READ */
 270         for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s;
 271              s = DR_GROUP_NEXT_ELEMENT (s))
 272           if (get_later_stmt (il_b, s) == il_b)
 273             il_b = s;
 274     }
 275   else
 276     il_b = stmtinfo_b;
 277   bool a_after_b = (get_later_stmt (stmtinfo_a, stmtinfo_b) == stmtinfo_a);
 278   return (get_later_stmt (il_a, il_b) == il_a) == a_after_b;
 279 }
 280
 281 /* A subroutine of vect_analyze_data_ref_dependence.  Handle
 282    DDR_COULD_BE_INDEPENDENT_P ddr DDR that has a known set of dependence
 283    distances.  These distances are conservatively correct but they don't
 284    reflect a guaranteed dependence.
 285
 286    Return true if this function does all the work necessary to avoid
 287    an alias or false if the caller should use the dependence distances
 288    to limit the vectorization factor in the usual way.  LOOP_DEPTH is
 289    the depth of the loop described by LOOP_VINFO and the other arguments
 290    are as for vect_analyze_data_ref_dependence.  */
 291
 292 static bool
 293 vect_analyze_possibly_independent_ddr (data_dependence_relation *ddr,
 294                                        loop_vec_info loop_vinfo,
 295                                        int loop_depth, unsigned int *max_vf)
 296 {
 297   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 298   for (lambda_vector &dist_v : DDR_DIST_VECTS (ddr))
 299     {
 300       int dist = dist_v[loop_depth];
 301       if (dist != 0 && !(dist > 0 && DDR_REVERSED_P (ddr)))
 302         {
 303           /* If the user asserted safelen >= DIST consecutive iterations
 304              can be executed concurrently, assume independence.
 305
 306              ??? An alternative would be to add the alias check even
 307              in this case, and vectorize the fallback loop with the
 308              maximum VF set to safelen.  However, if the user has
 309              explicitly given a length, it's less likely that that
 310              would be a win.  */
 311           if (loop->safelen >= 2 && abs_hwi (dist) <= loop->safelen)
 312             {
 313               if ((unsigned int) loop->safelen < *max_vf)
 314                 *max_vf = loop->safelen;
 315               LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
 316               continue;
 317             }
 318
 319           /* For dependence distances of 2 or more, we have the option
 320              of limiting VF or checking for an alias at runtime.
 321              Prefer to check at runtime if we can, to avoid limiting
 322              the VF unnecessarily when the bases are in fact independent.
 323
 324              Note that the alias checks will be removed if the VF ends up
 325              being small enough.  */
 326           dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (DDR_A (ddr));
 327           dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr));
 328           return (!STMT_VINFO_GATHER_SCATTER_P (dr_info_a->stmt)
 329                   && !STMT_VINFO_GATHER_SCATTER_P (dr_info_b->stmt)
 330                   && vect_mark_for_runtime_alias_test (ddr, loop_vinfo));
 331         }
 332     }
 333   return true;
 334 }
 335
 336
 337 /* Function vect_analyze_data_ref_dependence.
 338
 339    FIXME: I needed to change the sense of the returned flag.
 340
 341    Return FALSE if there (might) exist a dependence between a memory-reference
 342    DRA and a memory-reference DRB.  When versioning for alias may check a
 343    dependence at run-time, return TRUE.  Adjust *MAX_VF according to
 344    the data dependence.  */
 345
 346 static opt_result
 347 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
 348                                   loop_vec_info loop_vinfo,
 349                                   unsigned int *max_vf)
 350 {
 351   unsigned int i;
 352   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 353   struct data_reference *dra = DDR_A (ddr);
 354   struct data_reference *drb = DDR_B (ddr);
 355   dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (dra);
 356   dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (drb);
 357   stmt_vec_info stmtinfo_a = dr_info_a->stmt;
 358   stmt_vec_info stmtinfo_b = dr_info_b->stmt;
 359   lambda_vector dist_v;
 360   unsigned int loop_depth;
 361
 362   /* If user asserted safelen consecutive iterations can be
 363      executed concurrently, assume independence.  */
 364   auto apply_safelen = [&]()
 365     {
 366       if (loop->safelen >= 2)
 367         {
 368           if ((unsigned int) loop->safelen < *max_vf)
 369             *max_vf = loop->safelen;
 370           LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
 371           return true;
 372         }
 373       return false;
 374     };
 375
 376   /* In loop analysis all data references should be vectorizable.  */
 377   if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
 378       || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
 379     gcc_unreachable ();
 380
 381   /* Independent data accesses.  */
 382   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 383     return opt_result::success ();
 384
 385   if (dra == drb
 386       || (DR_IS_READ (dra) && DR_IS_READ (drb)))
 387     return opt_result::success ();
 388
 389   /* We do not have to consider dependences between accesses that belong
 390      to the same group, unless the stride could be smaller than the
 391      group size.  */
 392   if (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)
 393       && (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)
 394           == DR_GROUP_FIRST_ELEMENT (stmtinfo_b))
 395       && !STMT_VINFO_STRIDED_P (stmtinfo_a))
 396     return opt_result::success ();
 397
 398   /* Even if we have an anti-dependence then, as the vectorized loop covers at
 399      least two scalar iterations, there is always also a true dependence.
 400      As the vectorizer does not re-order loads and stores we can ignore
 401      the anti-dependence if TBAA can disambiguate both DRs similar to the
 402      case with known negative distance anti-dependences (positive
 403      distance anti-dependences would violate TBAA constraints).  */
 404   if (((DR_IS_READ (dra) && DR_IS_WRITE (drb))
 405        || (DR_IS_WRITE (dra) && DR_IS_READ (drb)))
 406       && !alias_sets_conflict_p (get_alias_set (DR_REF (dra)),
 407                                  get_alias_set (DR_REF (drb))))
 408     return opt_result::success ();
 409
 410   if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
 411       || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
 412     {
 413       if (apply_safelen ())
 414         return opt_result::success ();
 415
 416       return opt_result::failure_at
 417         (stmtinfo_a->stmt,
 418          "possible alias involving gather/scatter between %T and %T\n",
 419          DR_REF (dra), DR_REF (drb));
 420     }
 421
 422   /* Unknown data dependence.  */
 423   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 424     {
 425       if (apply_safelen ())
 426         return opt_result::success ();
 427
 428       if (dump_enabled_p ())
 429         dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmtinfo_a->stmt,
 430                          "versioning for alias required: "
 431                          "can't determine dependence between %T and %T\n",
 432                          DR_REF (dra), DR_REF (drb));
 433
 434       /* Add to list of ddrs that need to be tested at run-time.  */
 435       return vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
 436     }
 437
 438   /* Known data dependence.  */
 439   if (DDR_NUM_DIST_VECTS (ddr) == 0)
 440     {
 441       if (apply_safelen ())
 442         return opt_result::success ();
 443
 444       if (dump_enabled_p ())
 445         dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmtinfo_a->stmt,
 446                          "versioning for alias required: "
 447                          "bad dist vector for %T and %T\n",
 448                          DR_REF (dra), DR_REF (drb));
 449       /* Add to list of ddrs that need to be tested at run-time.  */
 450       return vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
 451     }
 452
 453   loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
 454
 455   if (DDR_COULD_BE_INDEPENDENT_P (ddr)
 456       && vect_analyze_possibly_independent_ddr (ddr, loop_vinfo,
 457                                                 loop_depth, max_vf))
 458     return opt_result::success ();
 459
 460   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
 461     {
 462       int dist = dist_v[loop_depth];
 463
 464       if (dump_enabled_p ())
 465         dump_printf_loc (MSG_NOTE, vect_location,
 466                          "dependence distance  = %d.\n", dist);
 467
 468       if (dist == 0)
 469         {
 470           if (dump_enabled_p ())
 471             dump_printf_loc (MSG_NOTE, vect_location,
 472                              "dependence distance == 0 between %T and %T\n",
 473                              DR_REF (dra), DR_REF (drb));
 474
 475           /* When we perform grouped accesses and perform implicit CSE
 476              by detecting equal accesses and doing disambiguation with
 477              runtime alias tests like for
 478                 .. = a[i];
 479                 .. = a[i+1];
 480                 a[i] = ..;
 481                 a[i+1] = ..;
 482                 *p = ..;
 483                 .. = a[i];
 484                 .. = a[i+1];
 485              where we will end up loading { a[i], a[i+1] } once, make
 486              sure that inserting group loads before the first load and
 487              stores after the last store will do the right thing.
 488              Similar for groups like
 489                 a[i] = ...;
 490                 ... = a[i];
 491                 a[i+1] = ...;
 492              where loads from the group interleave with the store.  */
 493           if (!vect_preserves_scalar_order_p (dr_info_a, dr_info_b))
 494             return opt_result::failure_at (stmtinfo_a->stmt,
 495                                            "READ_WRITE dependence"
 496                                            " in interleaving.\n");
 497
 498           if (loop->safelen < 2)
 499             {
 500               tree indicator = dr_zero_step_indicator (dra);
 501               if (!indicator || integer_zerop (indicator))
 502                 return opt_result::failure_at (stmtinfo_a->stmt,
 503                                                "access also has a zero step\n");
 504               else if (TREE_CODE (indicator) != INTEGER_CST)
 505                 vect_check_nonzero_value (loop_vinfo, indicator);
 506             }
 507           continue;
 508         }
 509
 510       if (dist > 0 && DDR_REVERSED_P (ddr))
 511         {
 512           /* If DDR_REVERSED_P the order of the data-refs in DDR was
 513              reversed (to make distance vector positive), and the actual
 514              distance is negative.  */
 515           if (dump_enabled_p ())
 516             dump_printf_loc (MSG_NOTE, vect_location,
 517                              "dependence distance negative.\n");
 518           /* When doing outer loop vectorization, we need to check if there is
 519              a backward dependence at the inner loop level if the dependence
 520              at the outer loop is reversed.  See PR81740.  */
 521           if (nested_in_vect_loop_p (loop, stmtinfo_a)
 522               || nested_in_vect_loop_p (loop, stmtinfo_b))
 523             {
 524               unsigned inner_depth = index_in_loop_nest (loop->inner->num,
 525                                                          DDR_LOOP_NEST (ddr));
 526               if (dist_v[inner_depth] < 0)
 527                 return opt_result::failure_at (stmtinfo_a->stmt,
 528                                                "not vectorized, dependence "
 529                                                "between data-refs %T and %T\n",
 530                                                DR_REF (dra), DR_REF (drb));
 531             }
 532           /* Record a negative dependence distance to later limit the
 533              amount of stmt copying / unrolling we can perform.
 534              Only need to handle read-after-write dependence.  */
 535           if (DR_IS_READ (drb)
 536               && (STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) == 0
 537                   || STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) > (unsigned)dist))
 538             STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) = dist;
 539           continue;
 540         }
 541
 542       unsigned int abs_dist = abs (dist);
 543       if (abs_dist >= 2 && abs_dist < *max_vf)
 544         {
 545           /* The dependence distance requires reduction of the maximal
 546              vectorization factor.  */
 547           *max_vf = abs_dist;
 548           if (dump_enabled_p ())
 549             dump_printf_loc (MSG_NOTE, vect_location,
 550                              "adjusting maximal vectorization factor to %i\n",
 551                              *max_vf);
 552         }
 553
 554       if (abs_dist >= *max_vf)
 555         {
 556           /* Dependence distance does not create dependence, as far as
 557              vectorization is concerned, in this case.  */
 558           if (dump_enabled_p ())
 559             dump_printf_loc (MSG_NOTE, vect_location,
 560                              "dependence distance >= VF.\n");
 561           continue;
 562         }
 563
 564       return opt_result::failure_at (stmtinfo_a->stmt,
 565                                      "not vectorized, possible dependence "
 566                                      "between data-refs %T and %T\n",
 567                                      DR_REF (dra), DR_REF (drb));
 568     }
 569
 570   return opt_result::success ();
 571 }
 572
 573 /* Function vect_analyze_data_ref_dependences.
 574
 575    Examine all the data references in the loop, and make sure there do not
 576    exist any data dependences between them.  Set *MAX_VF according to
 577    the maximum vectorization factor the data dependences allow.  */
 578
 579 opt_result
 580 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
 581                                    unsigned int *max_vf)
 582 {
 583   unsigned int i;
 584   struct data_dependence_relation *ddr;
 585
 586   DUMP_VECT_SCOPE ("vect_analyze_data_ref_dependences");
 587
 588   if (!LOOP_VINFO_DDRS (loop_vinfo).exists ())
 589     {
 590       LOOP_VINFO_DDRS (loop_vinfo)
 591         .create (LOOP_VINFO_DATAREFS (loop_vinfo).length ()
 592                  * LOOP_VINFO_DATAREFS (loop_vinfo).length ());
 593       /* We do not need read-read dependences.  */
 594       bool res = compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo),
 595                                           &LOOP_VINFO_DDRS (loop_vinfo),
 596                                           LOOP_VINFO_LOOP_NEST (loop_vinfo),
 597                                           false);
 598       gcc_assert (res);
 599     }
 600
 601   LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = true;
 602
 603   /* For epilogues we either have no aliases or alias versioning
 604      was applied to original loop.  Therefore we may just get max_vf
 605      using VF of original loop.  */
 606   if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
 607     *max_vf = LOOP_VINFO_ORIG_MAX_VECT_FACTOR (loop_vinfo);
 608   else
 609     FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr)
 610       {
 611         opt_result res
 612           = vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf);
 613         if (!res)
 614           return res;
 615       }
 616
 617   return opt_result::success ();
 618 }
 619
 620
 621 /* Function vect_slp_analyze_data_ref_dependence.
 622
 623    Return TRUE if there (might) exist a dependence between a memory-reference
 624    DRA and a memory-reference DRB for VINFO.  When versioning for alias
 625    may check a dependence at run-time, return FALSE.  Adjust *MAX_VF
 626    according to the data dependence.  */
 627
 628 static bool
 629 vect_slp_analyze_data_ref_dependence (vec_info *vinfo,
 630                                       struct data_dependence_relation *ddr)
 631 {
 632   struct data_reference *dra = DDR_A (ddr);
 633   struct data_reference *drb = DDR_B (ddr);
 634   dr_vec_info *dr_info_a = vinfo->lookup_dr (dra);
 635   dr_vec_info *dr_info_b = vinfo->lookup_dr (drb);
 636
 637   /* We need to check dependences of statements marked as unvectorizable
 638      as well, they still can prohibit vectorization.  */
 639
 640   /* Independent data accesses.  */
 641   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 642     return false;
 643
 644   if (dra == drb)
 645     return false;
 646
 647   /* Read-read is OK.  */
 648   if (DR_IS_READ (dra) && DR_IS_READ (drb))
 649     return false;
 650
 651   /* If dra and drb are part of the same interleaving chain consider
 652      them independent.  */
 653   if (STMT_VINFO_GROUPED_ACCESS (dr_info_a->stmt)
 654       && (DR_GROUP_FIRST_ELEMENT (dr_info_a->stmt)
 655           == DR_GROUP_FIRST_ELEMENT (dr_info_b->stmt)))
 656     return false;
 657
 658   /* Unknown data dependence.  */
 659   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 660     {
 661       if  (dump_enabled_p ())
 662         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 663                          "can't determine dependence between %T and %T\n",
 664                          DR_REF (dra), DR_REF (drb));
 665     }
 666   else if (dump_enabled_p ())
 667     dump_printf_loc (MSG_NOTE, vect_location,
 668                      "determined dependence between %T and %T\n",
 669                      DR_REF (dra), DR_REF (drb));
 670
 671   return true;
 672 }
 673
 674
 675 /* Analyze dependences involved in the transform of SLP NODE.  STORES
 676    contain the vector of scalar stores of this instance if we are
 677    disambiguating the loads.  */
 678
 679 static bool
 680 vect_slp_analyze_node_dependences (vec_info *vinfo, slp_tree node,
 681                                    vec<stmt_vec_info> stores,
 682                                    stmt_vec_info last_store_info)
 683 {
 684   /* This walks over all stmts involved in the SLP load/store done
 685      in NODE verifying we can sink them up to the last stmt in the
 686      group.  */
 687   if (DR_IS_WRITE (STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (node))))
 688     {
 689       stmt_vec_info last_access_info = vect_find_last_scalar_stmt_in_slp (node);
 690       for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k)
 691         {
 692           stmt_vec_info access_info
 693             = vect_orig_stmt (SLP_TREE_SCALAR_STMTS (node)[k]);
 694           if (access_info == last_access_info)
 695             continue;
 696           data_reference *dr_a = STMT_VINFO_DATA_REF (access_info);
 697           ao_ref ref;
 698           bool ref_initialized_p = false;
 699           for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt);
 700                gsi_stmt (gsi) != last_access_info->stmt; gsi_next (&gsi))
 701             {
 702               gimple *stmt = gsi_stmt (gsi);
 703               if (! gimple_vuse (stmt))
 704                 continue;
 705
 706               /* If we couldn't record a (single) data reference for this
 707                  stmt we have to resort to the alias oracle.  */
 708               stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
 709               data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info);
 710               if (!dr_b)
 711                 {
 712                   /* We are moving a store - this means
 713                      we cannot use TBAA for disambiguation.  */
 714                   if (!ref_initialized_p)
 715                     ao_ref_init (&ref, DR_REF (dr_a));
 716                   if (stmt_may_clobber_ref_p_1 (stmt, &ref, false)
 717                       || ref_maybe_used_by_stmt_p (stmt, &ref, false))
 718                     return false;
 719                   continue;
 720                 }
 721
 722               bool dependent = false;
 723               /* If we run into a store of this same instance (we've just
 724                  marked those) then delay dependence checking until we run
 725                  into the last store because this is where it will have
 726                  been sunk to (and we verify if we can do that as well).  */
 727               if (gimple_visited_p (stmt))
 728                 {
 729                   if (stmt_info != last_store_info)
 730                     continue;
 731
 732                   for (stmt_vec_info &store_info : stores)
 733                     {
 734                       data_reference *store_dr
 735                         = STMT_VINFO_DATA_REF (store_info);
 736                       ddr_p ddr = initialize_data_dependence_relation
 737                                     (dr_a, store_dr, vNULL);
 738                       dependent
 739                         = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
 740                       free_dependence_relation (ddr);
 741                       if (dependent)
 742                         break;
 743                     }
 744                 }
 745               else
 746                 {
 747                   ddr_p ddr = initialize_data_dependence_relation (dr_a,
 748                                                                    dr_b, vNULL);
 749                   dependent = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
 750                   free_dependence_relation (ddr);
 751                 }
 752               if (dependent)
 753                 return false;
 754             }
 755         }
 756     }
 757   else /* DR_IS_READ */
 758     {
 759       stmt_vec_info first_access_info
 760         = vect_find_first_scalar_stmt_in_slp (node);
 761       for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k)
 762         {
 763           stmt_vec_info access_info
 764             = vect_orig_stmt (SLP_TREE_SCALAR_STMTS (node)[k]);
 765           if (access_info == first_access_info)
 766             continue;
 767           data_reference *dr_a = STMT_VINFO_DATA_REF (access_info);
 768           ao_ref ref;
 769           bool ref_initialized_p = false;
 770           for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt);
 771                gsi_stmt (gsi) != first_access_info->stmt; gsi_prev (&gsi))
 772             {
 773               gimple *stmt = gsi_stmt (gsi);
 774               if (! gimple_vdef (stmt))
 775                 continue;
 776
 777               /* If we couldn't record a (single) data reference for this
 778                  stmt we have to resort to the alias oracle.  */
 779               stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
 780               data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info);
 781
 782               /* We are hoisting a load - this means we can use
 783                  TBAA for disambiguation.  */
 784               if (!ref_initialized_p)
 785                 ao_ref_init (&ref, DR_REF (dr_a));
 786               if (stmt_may_clobber_ref_p_1 (stmt, &ref, true))
 787                 {
 788                   if (!dr_b)
 789                     return false;
 790                   /* Resort to dependence checking below.  */
 791                 }
 792               else
 793                 /* No dependence.  */
 794                 continue;
 795
 796               bool dependent = false;
 797               /* If we run into a store of this same instance (we've just
 798                  marked those) then delay dependence checking until we run
 799                  into the last store because this is where it will have
 800                  been sunk to (and we verify if we can do that as well).  */
 801               if (gimple_visited_p (stmt))
 802                 {
 803                   if (stmt_info != last_store_info)
 804                     continue;
 805
 806                   for (stmt_vec_info &store_info : stores)
 807                     {
 808                       data_reference *store_dr
 809                         = STMT_VINFO_DATA_REF (store_info);
 810                       ddr_p ddr = initialize_data_dependence_relation
 811                                     (dr_a, store_dr, vNULL);
 812                       dependent
 813                         = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
 814                       free_dependence_relation (ddr);
 815                       if (dependent)
 816                         break;
 817                     }
 818                 }
 819               else
 820                 {
 821                   ddr_p ddr = initialize_data_dependence_relation (dr_a,
 822                                                                    dr_b, vNULL);
 823                   dependent = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
 824                   free_dependence_relation (ddr);
 825                 }
 826               if (dependent)
 827                 return false;
 828             }
 829         }
 830     }
 831   return true;
 832 }
 833
 834
 835 /* Function vect_analyze_data_ref_dependences.
 836
 837    Examine all the data references in the basic-block, and make sure there
 838    do not exist any data dependences between them.  Set *MAX_VF according to
 839    the maximum vectorization factor the data dependences allow.  */
 840
 841 bool
 842 vect_slp_analyze_instance_dependence (vec_info *vinfo, slp_instance instance)
 843 {
 844   DUMP_VECT_SCOPE ("vect_slp_analyze_instance_dependence");
 845
 846   /* The stores of this instance are at the root of the SLP tree.  */
 847   slp_tree store = NULL;
 848   if (SLP_INSTANCE_KIND (instance) == slp_inst_kind_store)
 849     store = SLP_INSTANCE_TREE (instance);
 850
 851   /* Verify we can sink stores to the vectorized stmt insert location.  */
 852   stmt_vec_info last_store_info = NULL;
 853   if (store)
 854     {
 855       if (! vect_slp_analyze_node_dependences (vinfo, store, vNULL, NULL))
 856         return false;
 857
 858       /* Mark stores in this instance and remember the last one.  */
 859       last_store_info = vect_find_last_scalar_stmt_in_slp (store);
 860       for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (store).length (); ++k)
 861         gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k]->stmt, true);
 862     }
 863
 864   bool res = true;
 865
 866   /* Verify we can sink loads to the vectorized stmt insert location,
 867      special-casing stores of this instance.  */
 868   for (slp_tree &load : SLP_INSTANCE_LOADS (instance))
 869     if (! vect_slp_analyze_node_dependences (vinfo, load,
 870                                              store
 871                                              ? SLP_TREE_SCALAR_STMTS (store)
 872                                              : vNULL, last_store_info))
 873       {
 874         res = false;
 875         break;
 876       }
 877
 878   /* Unset the visited flag.  */
 879   if (store)
 880     for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (store).length (); ++k)
 881       gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k]->stmt, false);
 882
 883   return res;
 884 }
 885
 886 /* Return the misalignment of DR_INFO accessed in VECTYPE with OFFSET
 887    applied.  */
 888
 889 int
 890 dr_misalignment (dr_vec_info *dr_info, tree vectype, poly_int64 offset)
 891 {
 892   HOST_WIDE_INT diff = 0;
 893   /* Alignment is only analyzed for the first element of a DR group,
 894      use that but adjust misalignment by the offset of the access.  */
 895   if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
 896     {
 897       dr_vec_info *first_dr
 898         = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
 899       /* vect_analyze_data_ref_accesses guarantees that DR_INIT are
 900          INTEGER_CSTs and the first element in the group has the lowest
 901          address.  */
 902       diff = (TREE_INT_CST_LOW (DR_INIT (dr_info->dr))
 903               - TREE_INT_CST_LOW (DR_INIT (first_dr->dr)));
 904       gcc_assert (diff >= 0);
 905       dr_info = first_dr;
 906     }
 907
 908   int misalign = dr_info->misalignment;
 909   gcc_assert (misalign != DR_MISALIGNMENT_UNINITIALIZED);
 910   if (misalign == DR_MISALIGNMENT_UNKNOWN)
 911     return misalign;
 912
 913   /* If the access is only aligned for a vector type with smaller alignment
 914      requirement the access has unknown misalignment.  */
 915   if (maybe_lt (dr_info->target_alignment * BITS_PER_UNIT,
 916                 targetm.vectorize.preferred_vector_alignment (vectype)))
 917     return DR_MISALIGNMENT_UNKNOWN;
 918
 919   /* Apply the offset from the DR group start and the externally supplied
 920      offset which can for example result from a negative stride access.  */
 921   poly_int64 misalignment = misalign + diff + offset;
 922
 923   /* vect_compute_data_ref_alignment will have ensured that target_alignment
 924      is constant and otherwise set misalign to DR_MISALIGNMENT_UNKNOWN.  */
 925   unsigned HOST_WIDE_INT target_alignment_c
 926     = dr_info->target_alignment.to_constant ();
 927   if (!known_misalignment (misalignment, target_alignment_c, &misalign))
 928     return DR_MISALIGNMENT_UNKNOWN;
 929   return misalign;
 930 }
 931
 932 /* Record the base alignment guarantee given by DRB, which occurs
 933    in STMT_INFO.  */
 934
 935 static void
 936 vect_record_base_alignment (vec_info *vinfo, stmt_vec_info stmt_info,
 937                             innermost_loop_behavior *drb)
 938 {
 939   bool existed;
 940   std::pair<stmt_vec_info, innermost_loop_behavior *> &entry
 941     = vinfo->base_alignments.get_or_insert (drb->base_address, &existed);
 942   if (!existed || entry.second->base_alignment < drb->base_alignment)
 943     {
 944       entry = std::make_pair (stmt_info, drb);
 945       if (dump_enabled_p ())
 946         dump_printf_loc (MSG_NOTE, vect_location,
 947                          "recording new base alignment for %T\n"
 948                          "  alignment:    %d\n"
 949                          "  misalignment: %d\n"
 950                          "  based on:     %G",
 951                          drb->base_address,
 952                          drb->base_alignment,
 953                          drb->base_misalignment,
 954                          stmt_info->stmt);
 955     }
 956 }
 957
 958 /* If the region we're going to vectorize is reached, all unconditional
 959    data references occur at least once.  We can therefore pool the base
 960    alignment guarantees from each unconditional reference.  Do this by
 961    going through all the data references in VINFO and checking whether
 962    the containing statement makes the reference unconditionally.  If so,
 963    record the alignment of the base address in VINFO so that it can be
 964    used for all other references with the same base.  */
 965
 966 void
 967 vect_record_base_alignments (vec_info *vinfo)
 968 {
 969   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
 970   class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
 971   for (data_reference *dr : vinfo->shared->datarefs)
 972     {
 973       dr_vec_info *dr_info = vinfo->lookup_dr (dr);
 974       stmt_vec_info stmt_info = dr_info->stmt;
 975       if (!DR_IS_CONDITIONAL_IN_STMT (dr)
 976           && STMT_VINFO_VECTORIZABLE (stmt_info)
 977           && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
 978         {
 979           vect_record_base_alignment (vinfo, stmt_info, &DR_INNERMOST (dr));
 980
 981           /* If DR is nested in the loop that is being vectorized, we can also
 982              record the alignment of the base wrt the outer loop.  */
 983           if (loop && nested_in_vect_loop_p (loop, stmt_info))
 984             vect_record_base_alignment
 985               (vinfo, stmt_info, &STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info));
 986         }
 987     }
 988 }
 989
 990 /* Function vect_compute_data_ref_alignment
 991
 992    Compute the misalignment of the data reference DR_INFO when vectorizing
 993    with VECTYPE.
 994
 995    Output:
 996    1. initialized misalignment info for DR_INFO
 997
 998    FOR NOW: No analysis is actually performed. Misalignment is calculated
 999    only for trivial cases. TODO.  */
1000
1001 static void
1002 vect_compute_data_ref_alignment (vec_info *vinfo, dr_vec_info *dr_info,
1003                                  tree vectype)
1004 {
1005   stmt_vec_info stmt_info = dr_info->stmt;
1006   vec_base_alignments *base_alignments = &vinfo->base_alignments;
1007   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1008   class loop *loop = NULL;
1009   tree ref = DR_REF (dr_info->dr);
1010
1011   if (dump_enabled_p ())
1012     dump_printf_loc (MSG_NOTE, vect_location,
1013                      "vect_compute_data_ref_alignment:\n");
1014
1015   if (loop_vinfo)
1016     loop = LOOP_VINFO_LOOP (loop_vinfo);
1017
1018   /* Initialize misalignment to unknown.  */
1019   SET_DR_MISALIGNMENT (dr_info, DR_MISALIGNMENT_UNKNOWN);
1020
1021   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1022     return;
1023
1024   innermost_loop_behavior *drb = vect_dr_behavior (vinfo, dr_info);
1025   bool step_preserves_misalignment_p;
1026
1027   poly_uint64 vector_alignment
1028     = exact_div (targetm.vectorize.preferred_vector_alignment (vectype),
1029                  BITS_PER_UNIT);
1030   SET_DR_TARGET_ALIGNMENT (dr_info, vector_alignment);
1031
1032   /* If the main loop has peeled for alignment we have no way of knowing
1033      whether the data accesses in the epilogues are aligned.  We can't at
1034      compile time answer the question whether we have entered the main loop or
1035      not.  Fixes PR 92351.  */
1036   if (loop_vinfo)
1037     {
1038       loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
1039       if (orig_loop_vinfo
1040           && LOOP_VINFO_PEELING_FOR_ALIGNMENT (orig_loop_vinfo) != 0)
1041         return;
1042     }
1043
1044   unsigned HOST_WIDE_INT vect_align_c;
1045   if (!vector_alignment.is_constant (&vect_align_c))
1046     return;
1047
1048   /* No step for BB vectorization.  */
1049   if (!loop)
1050     {
1051       gcc_assert (integer_zerop (drb->step));
1052       step_preserves_misalignment_p = true;
1053     }
1054
1055   /* In case the dataref is in an inner-loop of the loop that is being
1056      vectorized (LOOP), we use the base and misalignment information
1057      relative to the outer-loop (LOOP).  This is ok only if the misalignment
1058      stays the same throughout the execution of the inner-loop, which is why
1059      we have to check that the stride of the dataref in the inner-loop evenly
1060      divides by the vector alignment.  */
1061   else if (nested_in_vect_loop_p (loop, stmt_info))
1062     {
1063       step_preserves_misalignment_p
1064         = (DR_STEP_ALIGNMENT (dr_info->dr) % vect_align_c) == 0;
1065
1066       if (dump_enabled_p ())
1067         {
1068           if (step_preserves_misalignment_p)
1069             dump_printf_loc (MSG_NOTE, vect_location,
1070                              "inner step divides the vector alignment.\n");
1071           else
1072             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1073                              "inner step doesn't divide the vector"
1074                              " alignment.\n");
1075         }
1076     }
1077
1078   /* Similarly we can only use base and misalignment information relative to
1079      an innermost loop if the misalignment stays the same throughout the
1080      execution of the loop.  As above, this is the case if the stride of
1081      the dataref evenly divides by the alignment.  */
1082   else
1083     {
1084       poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1085       step_preserves_misalignment_p
1086         = multiple_p (DR_STEP_ALIGNMENT (dr_info->dr) * vf, vect_align_c);
1087
1088       if (!step_preserves_misalignment_p && dump_enabled_p ())
1089         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1090                          "step doesn't divide the vector alignment.\n");
1091     }
1092
1093   unsigned int base_alignment = drb->base_alignment;
1094   unsigned int base_misalignment = drb->base_misalignment;
1095
1096   /* Calculate the maximum of the pooled base address alignment and the
1097      alignment that we can compute for DR itself.  */
1098   std::pair<stmt_vec_info, innermost_loop_behavior *> *entry
1099     = base_alignments->get (drb->base_address);
1100   if (entry
1101       && base_alignment < (*entry).second->base_alignment
1102       && (loop_vinfo
1103           || (dominated_by_p (CDI_DOMINATORS, gimple_bb (stmt_info->stmt),
1104                               gimple_bb (entry->first->stmt))
1105               && (gimple_bb (stmt_info->stmt) != gimple_bb (entry->first->stmt)
1106                   || (entry->first->dr_aux.group <= dr_info->group)))))
1107     {
1108       base_alignment = entry->second->base_alignment;
1109       base_misalignment = entry->second->base_misalignment;
1110     }
1111
1112   if (drb->offset_alignment < vect_align_c
1113       || !step_preserves_misalignment_p
1114       /* We need to know whether the step wrt the vectorized loop is
1115          negative when computing the starting misalignment below.  */
1116       || TREE_CODE (drb->step) != INTEGER_CST)
1117     {
1118       if (dump_enabled_p ())
1119         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1120                          "Unknown alignment for access: %T\n", ref);
1121       return;
1122     }
1123
1124   if (base_alignment < vect_align_c)
1125     {
1126       unsigned int max_alignment;
1127       tree base = get_base_for_alignment (drb->base_address, &max_alignment);
1128       if (max_alignment < vect_align_c
1129           || !vect_can_force_dr_alignment_p (base,
1130                                              vect_align_c * BITS_PER_UNIT))
1131         {
1132           if (dump_enabled_p ())
1133             dump_printf_loc (MSG_NOTE, vect_location,
1134                              "can't force alignment of ref: %T\n", ref);
1135           return;
1136         }
1137
1138       /* Force the alignment of the decl.
1139          NOTE: This is the only change to the code we make during
1140          the analysis phase, before deciding to vectorize the loop.  */
1141       if (dump_enabled_p ())
1142         dump_printf_loc (MSG_NOTE, vect_location,
1143                          "force alignment of %T\n", ref);
1144
1145       dr_info->base_decl = base;
1146       dr_info->base_misaligned = true;
1147       base_misalignment = 0;
1148     }
1149   poly_int64 misalignment
1150     = base_misalignment + wi::to_poly_offset (drb->init).force_shwi ();
1151
1152   unsigned int const_misalignment;
1153   if (!known_misalignment (misalignment, vect_align_c, &const_misalignment))
1154     {
1155       if (dump_enabled_p ())
1156         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1157                          "Non-constant misalignment for access: %T\n", ref);
1158       return;
1159     }
1160
1161   SET_DR_MISALIGNMENT (dr_info, const_misalignment);
1162
1163   if (dump_enabled_p ())
1164     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1165                      "misalign = %d bytes of ref %T\n",
1166                      const_misalignment, ref);
1167
1168   return;
1169 }
1170
1171 /* Return whether DR_INFO, which is related to DR_PEEL_INFO in
1172    that it only differs in DR_INIT, is aligned if DR_PEEL_INFO
1173    is made aligned via peeling.  */
1174
1175 static bool
1176 vect_dr_aligned_if_related_peeled_dr_is (dr_vec_info *dr_info,
1177                                          dr_vec_info *dr_peel_info)
1178 {
1179   if (multiple_p (DR_TARGET_ALIGNMENT (dr_peel_info),
1180                   DR_TARGET_ALIGNMENT (dr_info)))
1181     {
1182       poly_offset_int diff
1183         = (wi::to_poly_offset (DR_INIT (dr_peel_info->dr))
1184            - wi::to_poly_offset (DR_INIT (dr_info->dr)));
1185       if (known_eq (diff, 0)
1186           || multiple_p (diff, DR_TARGET_ALIGNMENT (dr_info)))
1187         return true;
1188     }
1189   return false;
1190 }
1191
1192 /* Return whether DR_INFO is aligned if DR_PEEL_INFO is made
1193    aligned via peeling.  */
1194
1195 static bool
1196 vect_dr_aligned_if_peeled_dr_is (dr_vec_info *dr_info,
1197                                  dr_vec_info *dr_peel_info)
1198 {
1199   if (!operand_equal_p (DR_BASE_ADDRESS (dr_info->dr),
1200                         DR_BASE_ADDRESS (dr_peel_info->dr), 0)
1201       || !operand_equal_p (DR_OFFSET (dr_info->dr),
1202                            DR_OFFSET (dr_peel_info->dr), 0)
1203       || !operand_equal_p (DR_STEP (dr_info->dr),
1204                            DR_STEP (dr_peel_info->dr), 0))
1205     return false;
1206
1207   return vect_dr_aligned_if_related_peeled_dr_is (dr_info, dr_peel_info);
1208 }
1209
1210 /* Compute the value for dr_info->misalign so that the access appears
1211    aligned.  This is used by peeling to compensate for dr_misalignment
1212    applying the offset for negative step.  */
1213
1214 int
1215 vect_dr_misalign_for_aligned_access (dr_vec_info *dr_info)
1216 {
1217   if (tree_int_cst_sgn (DR_STEP (dr_info->dr)) >= 0)
1218     return 0;
1219
1220   tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
1221   poly_int64 misalignment
1222     = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
1223        * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
1224
1225   unsigned HOST_WIDE_INT target_alignment_c;
1226   int misalign;
1227   if (!dr_info->target_alignment.is_constant (&target_alignment_c)
1228       || !known_misalignment (misalignment, target_alignment_c, &misalign))
1229     return DR_MISALIGNMENT_UNKNOWN;
1230   return misalign;
1231 }
1232
1233 /* Function vect_update_misalignment_for_peel.
1234    Sets DR_INFO's misalignment
1235    - to 0 if it has the same alignment as DR_PEEL_INFO,
1236    - to the misalignment computed using NPEEL if DR_INFO's salignment is known,
1237    - to -1 (unknown) otherwise.
1238
1239    DR_INFO - the data reference whose misalignment is to be adjusted.
1240    DR_PEEL_INFO - the data reference whose misalignment is being made
1241                   zero in the vector loop by the peel.
1242    NPEEL - the number of iterations in the peel loop if the misalignment
1243            of DR_PEEL_INFO is known at compile time.  */
1244
1245 static void
1246 vect_update_misalignment_for_peel (dr_vec_info *dr_info,
1247                                    dr_vec_info *dr_peel_info, int npeel)
1248 {
1249   /* If dr_info is aligned of dr_peel_info is, then mark it so.  */
1250   if (vect_dr_aligned_if_peeled_dr_is (dr_info, dr_peel_info))
1251     {
1252       SET_DR_MISALIGNMENT (dr_info,
1253                            vect_dr_misalign_for_aligned_access (dr_peel_info));
1254       return;
1255     }
1256
1257   unsigned HOST_WIDE_INT alignment;
1258   if (DR_TARGET_ALIGNMENT (dr_info).is_constant (&alignment)
1259       && known_alignment_for_access_p (dr_info,
1260                                        STMT_VINFO_VECTYPE (dr_info->stmt))
1261       && known_alignment_for_access_p (dr_peel_info,
1262                                        STMT_VINFO_VECTYPE (dr_peel_info->stmt)))
1263     {
1264       int misal = dr_info->misalignment;
1265       misal += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
1266       misal &= alignment - 1;
1267       set_dr_misalignment (dr_info, misal);
1268       return;
1269     }
1270
1271   if (dump_enabled_p ())
1272     dump_printf_loc (MSG_NOTE, vect_location, "Setting misalignment " \
1273                      "to unknown (-1).\n");
1274   SET_DR_MISALIGNMENT (dr_info, DR_MISALIGNMENT_UNKNOWN);
1275 }
1276
1277 /* Return true if alignment is relevant for DR_INFO.  */
1278
1279 static bool
1280 vect_relevant_for_alignment_p (dr_vec_info *dr_info)
1281 {
1282   stmt_vec_info stmt_info = dr_info->stmt;
1283
1284   if (!STMT_VINFO_RELEVANT_P (stmt_info))
1285     return false;
1286
1287   /* For interleaving, only the alignment of the first access matters.  */
1288   if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1289       && DR_GROUP_FIRST_ELEMENT (stmt_info) != stmt_info)
1290     return false;
1291
1292   /* Scatter-gather and invariant accesses continue to address individual
1293      scalars, so vector-level alignment is irrelevant.  */
1294   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
1295       || integer_zerop (DR_STEP (dr_info->dr)))
1296     return false;
1297
1298   /* Strided accesses perform only component accesses, alignment is
1299      irrelevant for them.  */
1300   if (STMT_VINFO_STRIDED_P (stmt_info)
1301       && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1302     return false;
1303
1304   return true;
1305 }
1306
1307 /* Given an memory reference EXP return whether its alignment is less
1308    than its size.  */
1309
1310 static bool
1311 not_size_aligned (tree exp)
1312 {
1313   if (!tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (exp))))
1314     return true;
1315
1316   return (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (exp)))
1317           > get_object_alignment (exp));
1318 }
1319
1320 /* Function vector_alignment_reachable_p
1321
1322    Return true if vector alignment for DR_INFO is reachable by peeling
1323    a few loop iterations.  Return false otherwise.  */
1324
1325 static bool
1326 vector_alignment_reachable_p (dr_vec_info *dr_info)
1327 {
1328   stmt_vec_info stmt_info = dr_info->stmt;
1329   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1330
1331   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1332     {
1333       /* For interleaved access we peel only if number of iterations in
1334          the prolog loop ({VF - misalignment}), is a multiple of the
1335          number of the interleaved accesses.  */
1336       int elem_size, mis_in_elements;
1337
1338       /* FORNOW: handle only known alignment.  */
1339       if (!known_alignment_for_access_p (dr_info, vectype))
1340         return false;
1341
1342       poly_uint64 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1343       poly_uint64 vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
1344       elem_size = vector_element_size (vector_size, nelements);
1345       mis_in_elements = dr_misalignment (dr_info, vectype) / elem_size;
1346
1347       if (!multiple_p (nelements - mis_in_elements, DR_GROUP_SIZE (stmt_info)))
1348         return false;
1349     }
1350
1351   /* If misalignment is known at the compile time then allow peeling
1352      only if natural alignment is reachable through peeling.  */
1353   if (known_alignment_for_access_p (dr_info, vectype)
1354       && !aligned_access_p (dr_info, vectype))
1355     {
1356       HOST_WIDE_INT elmsize =
1357                 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1358       if (dump_enabled_p ())
1359         {
1360           dump_printf_loc (MSG_NOTE, vect_location,
1361                            "data size = %wd. misalignment = %d.\n", elmsize,
1362                            dr_misalignment (dr_info, vectype));
1363         }
1364       if (dr_misalignment (dr_info, vectype) % elmsize)
1365         {
1366           if (dump_enabled_p ())
1367             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1368                              "data size does not divide the misalignment.\n");
1369           return false;
1370         }
1371     }
1372
1373   if (!known_alignment_for_access_p (dr_info, vectype))
1374     {
1375       tree type = TREE_TYPE (DR_REF (dr_info->dr));
1376       bool is_packed = not_size_aligned (DR_REF (dr_info->dr));
1377       if (dump_enabled_p ())
1378         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1379                          "Unknown misalignment, %snaturally aligned\n",
1380                          is_packed ? "not " : "");
1381       return targetm.vectorize.vector_alignment_reachable (type, is_packed);
1382     }
1383
1384   return true;
1385 }
1386
1387
1388 /* Calculate the cost of the memory access represented by DR_INFO.  */
1389
1390 static void
1391 vect_get_data_access_cost (vec_info *vinfo, dr_vec_info *dr_info,
1392                            dr_alignment_support alignment_support_scheme,
1393                            int misalignment,
1394                            unsigned int *inside_cost,
1395                            unsigned int *outside_cost,
1396                            stmt_vector_for_cost *body_cost_vec,
1397                            stmt_vector_for_cost *prologue_cost_vec)
1398 {
1399   stmt_vec_info stmt_info = dr_info->stmt;
1400   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1401   int ncopies;
1402
1403   if (PURE_SLP_STMT (stmt_info))
1404     ncopies = 1;
1405   else
1406     ncopies = vect_get_num_copies (loop_vinfo, STMT_VINFO_VECTYPE (stmt_info));
1407
1408   if (DR_IS_READ (dr_info->dr))
1409     vect_get_load_cost (vinfo, stmt_info, ncopies, alignment_support_scheme,
1410                         misalignment, true, inside_cost,
1411                         outside_cost, prologue_cost_vec, body_cost_vec, false);
1412   else
1413     vect_get_store_cost (vinfo,stmt_info, ncopies, alignment_support_scheme,
1414                          misalignment, inside_cost, body_cost_vec);
1415
1416   if (dump_enabled_p ())
1417     dump_printf_loc (MSG_NOTE, vect_location,
1418                      "vect_get_data_access_cost: inside_cost = %d, "
1419                      "outside_cost = %d.\n", *inside_cost, *outside_cost);
1420 }
1421
1422
1423 typedef struct _vect_peel_info
1424 {
1425   dr_vec_info *dr_info;
1426   int npeel;
1427   unsigned int count;
1428 } *vect_peel_info;
1429
1430 typedef struct _vect_peel_extended_info
1431 {
1432   vec_info *vinfo;
1433   struct _vect_peel_info peel_info;
1434   unsigned int inside_cost;
1435   unsigned int outside_cost;
1436 } *vect_peel_extended_info;
1437
1438
1439 /* Peeling hashtable helpers.  */
1440
1441 struct peel_info_hasher : free_ptr_hash <_vect_peel_info>
1442 {
1443   static inline hashval_t hash (const _vect_peel_info *);
1444   static inline bool equal (const _vect_peel_info *, const _vect_peel_info *);
1445 };
1446
1447 inline hashval_t
1448 peel_info_hasher::hash (const _vect_peel_info *peel_info)
1449 {
1450   return (hashval_t) peel_info->npeel;
1451 }
1452
1453 inline bool
1454 peel_info_hasher::equal (const _vect_peel_info *a, const _vect_peel_info *b)
1455 {
1456   return (a->npeel == b->npeel);
1457 }
1458
1459
1460 /* Insert DR_INFO into peeling hash table with NPEEL as key.  */
1461
1462 static void
1463 vect_peeling_hash_insert (hash_table<peel_info_hasher> *peeling_htab,
1464                           loop_vec_info loop_vinfo, dr_vec_info *dr_info,
1465                           int npeel, bool supportable_if_not_aligned)
1466 {
1467   struct _vect_peel_info elem, *slot;
1468   _vect_peel_info **new_slot;
1469
1470   elem.npeel = npeel;
1471   slot = peeling_htab->find (&elem);
1472   if (slot)
1473     slot->count++;
1474   else
1475     {
1476       slot = XNEW (struct _vect_peel_info);
1477       slot->npeel = npeel;
1478       slot->dr_info = dr_info;
1479       slot->count = 1;
1480       new_slot = peeling_htab->find_slot (slot, INSERT);
1481       *new_slot = slot;
1482     }
1483
1484   /* If this DR is not supported with unknown misalignment then bias
1485      this slot when the cost model is disabled.  */
1486   if (!supportable_if_not_aligned
1487       && unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
1488     slot->count += VECT_MAX_COST;
1489 }
1490
1491
1492 /* Traverse peeling hash table to find peeling option that aligns maximum
1493    number of data accesses.  */
1494
1495 int
1496 vect_peeling_hash_get_most_frequent (_vect_peel_info **slot,
1497                                      _vect_peel_extended_info *max)
1498 {
1499   vect_peel_info elem = *slot;
1500
1501   if (elem->count > max->peel_info.count
1502       || (elem->count == max->peel_info.count
1503           && max->peel_info.npeel > elem->npeel))
1504     {
1505       max->peel_info.npeel = elem->npeel;
1506       max->peel_info.count = elem->count;
1507       max->peel_info.dr_info = elem->dr_info;
1508     }
1509
1510   return 1;
1511 }
1512
1513 /* Get the costs of peeling NPEEL iterations for LOOP_VINFO, checking
1514    data access costs for all data refs.  If UNKNOWN_MISALIGNMENT is true,
1515    npeel is computed at runtime but DR0_INFO's misalignment will be zero
1516    after peeling.  */
1517
1518 static void
1519 vect_get_peeling_costs_all_drs (loop_vec_info loop_vinfo,
1520                                 dr_vec_info *dr0_info,
1521                                 unsigned int *inside_cost,
1522                                 unsigned int *outside_cost,
1523                                 stmt_vector_for_cost *body_cost_vec,
1524                                 stmt_vector_for_cost *prologue_cost_vec,
1525                                 unsigned int npeel)
1526 {
1527   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1528
1529   bool dr0_alignment_known_p
1530     = (dr0_info
1531        && known_alignment_for_access_p (dr0_info,
1532                                         STMT_VINFO_VECTYPE (dr0_info->stmt)));
1533
1534   for (data_reference *dr : datarefs)
1535     {
1536       dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
1537       if (!vect_relevant_for_alignment_p (dr_info))
1538         continue;
1539
1540       tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
1541       dr_alignment_support alignment_support_scheme;
1542       int misalignment;
1543       unsigned HOST_WIDE_INT alignment;
1544
1545       bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
1546                                             size_zero_node) < 0;
1547       poly_int64 off = 0;
1548       if (negative)
1549         off = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
1550                * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
1551
1552       if (npeel == 0)
1553         misalignment = dr_misalignment (dr_info, vectype, off);
1554       else if (dr_info == dr0_info
1555                || vect_dr_aligned_if_peeled_dr_is (dr_info, dr0_info))
1556         misalignment = 0;
1557       else if (!dr0_alignment_known_p
1558                || !known_alignment_for_access_p (dr_info, vectype)
1559                || !DR_TARGET_ALIGNMENT (dr_info).is_constant (&alignment))
1560         misalignment = DR_MISALIGNMENT_UNKNOWN;
1561       else
1562         {
1563           misalignment = dr_misalignment (dr_info, vectype, off);
1564           misalignment += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
1565           misalignment &= alignment - 1;
1566         }
1567       alignment_support_scheme
1568         = vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
1569                                          misalignment);
1570
1571       vect_get_data_access_cost (loop_vinfo, dr_info,
1572                                  alignment_support_scheme, misalignment,
1573                                  inside_cost, outside_cost,
1574                                  body_cost_vec, prologue_cost_vec);
1575     }
1576 }
1577
1578 /* Traverse peeling hash table and calculate cost for each peeling option.
1579    Find the one with the lowest cost.  */
1580
1581 int
1582 vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
1583                                    _vect_peel_extended_info *min)
1584 {
1585   vect_peel_info elem = *slot;
1586   int dummy;
1587   unsigned int inside_cost = 0, outside_cost = 0;
1588   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (min->vinfo);
1589   stmt_vector_for_cost prologue_cost_vec, body_cost_vec,
1590                        epilogue_cost_vec;
1591
1592   prologue_cost_vec.create (2);
1593   body_cost_vec.create (2);
1594   epilogue_cost_vec.create (2);
1595
1596   vect_get_peeling_costs_all_drs (loop_vinfo, elem->dr_info, &inside_cost,
1597                                   &outside_cost, &body_cost_vec,
1598                                   &prologue_cost_vec, elem->npeel);
1599
1600   body_cost_vec.release ();
1601
1602   outside_cost += vect_get_known_peeling_cost
1603     (loop_vinfo, elem->npeel, &dummy,
1604      &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
1605      &prologue_cost_vec, &epilogue_cost_vec);
1606
1607   /* Prologue and epilogue costs are added to the target model later.
1608      These costs depend only on the scalar iteration cost, the
1609      number of peeling iterations finally chosen, and the number of
1610      misaligned statements.  So discard the information found here.  */
1611   prologue_cost_vec.release ();
1612   epilogue_cost_vec.release ();
1613
1614   if (inside_cost < min->inside_cost
1615       || (inside_cost == min->inside_cost
1616           && outside_cost < min->outside_cost))
1617     {
1618       min->inside_cost = inside_cost;
1619       min->outside_cost = outside_cost;
1620       min->peel_info.dr_info = elem->dr_info;
1621       min->peel_info.npeel = elem->npeel;
1622       min->peel_info.count = elem->count;
1623     }
1624
1625   return 1;
1626 }
1627
1628
1629 /* Choose best peeling option by traversing peeling hash table and either
1630    choosing an option with the lowest cost (if cost model is enabled) or the
1631    option that aligns as many accesses as possible.  */
1632
1633 static struct _vect_peel_extended_info
1634 vect_peeling_hash_choose_best_peeling (hash_table<peel_info_hasher> *peeling_htab,
1635                                        loop_vec_info loop_vinfo)
1636 {
1637    struct _vect_peel_extended_info res;
1638
1639    res.peel_info.dr_info = NULL;
1640    res.vinfo = loop_vinfo;
1641
1642    if (!unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
1643      {
1644        res.inside_cost = INT_MAX;
1645        res.outside_cost = INT_MAX;
1646        peeling_htab->traverse <_vect_peel_extended_info *,
1647                                vect_peeling_hash_get_lowest_cost> (&res);
1648      }
1649    else
1650      {
1651        res.peel_info.count = 0;
1652        peeling_htab->traverse <_vect_peel_extended_info *,
1653                                vect_peeling_hash_get_most_frequent> (&res);
1654        res.inside_cost = 0;
1655        res.outside_cost = 0;
1656      }
1657
1658    return res;
1659 }
1660
1661 /* Return true if the new peeling NPEEL is supported.  */
1662
1663 static bool
1664 vect_peeling_supportable (loop_vec_info loop_vinfo, dr_vec_info *dr0_info,
1665                           unsigned npeel)
1666 {
1667   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1668   enum dr_alignment_support supportable_dr_alignment;
1669
1670   bool dr0_alignment_known_p
1671     = known_alignment_for_access_p (dr0_info,
1672                                     STMT_VINFO_VECTYPE (dr0_info->stmt));
1673
1674   /* Ensure that all data refs can be vectorized after the peel.  */
1675   for (data_reference *dr : datarefs)
1676     {
1677       if (dr == dr0_info->dr)
1678         continue;
1679
1680       dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
1681       if (!vect_relevant_for_alignment_p (dr_info)
1682           || vect_dr_aligned_if_peeled_dr_is (dr_info, dr0_info))
1683         continue;
1684
1685       tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
1686       int misalignment;
1687       unsigned HOST_WIDE_INT alignment;
1688       if (!dr0_alignment_known_p
1689           || !known_alignment_for_access_p (dr_info, vectype)
1690           || !DR_TARGET_ALIGNMENT (dr_info).is_constant (&alignment))
1691         misalignment = DR_MISALIGNMENT_UNKNOWN;
1692       else
1693         {
1694           misalignment = dr_misalignment (dr_info, vectype);
1695           misalignment += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
1696           misalignment &= alignment - 1;
1697         }
1698       supportable_dr_alignment
1699         = vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
1700                                          misalignment);
1701       if (supportable_dr_alignment == dr_unaligned_unsupported)
1702         return false;
1703     }
1704
1705   return true;
1706 }
1707
1708 /* Compare two data-references DRA and DRB to group them into chunks
1709    with related alignment.  */
1710
1711 static int
1712 dr_align_group_sort_cmp (const void *dra_, const void *drb_)
1713 {
1714   data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_);
1715   data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_);
1716   int cmp;
1717
1718   /* Stabilize sort.  */
1719   if (dra == drb)
1720     return 0;
1721
1722   /* Ordering of DRs according to base.  */
1723   cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra),
1724                                DR_BASE_ADDRESS (drb));
1725   if (cmp != 0)
1726     return cmp;
1727
1728   /* And according to DR_OFFSET.  */
1729   cmp = data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
1730   if (cmp != 0)
1731     return cmp;
1732
1733   /* And after step.  */
1734   cmp = data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb));
1735   if (cmp != 0)
1736     return cmp;
1737
1738   /* Then sort after DR_INIT.  In case of identical DRs sort after stmt UID.  */
1739   cmp = data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb));
1740   if (cmp == 0)
1741     return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -1 : 1;
1742   return cmp;
1743 }
1744
1745 /* Function vect_enhance_data_refs_alignment
1746
1747    This pass will use loop versioning and loop peeling in order to enhance
1748    the alignment of data references in the loop.
1749
1750    FOR NOW: we assume that whatever versioning/peeling takes place, only the
1751    original loop is to be vectorized.  Any other loops that are created by
1752    the transformations performed in this pass - are not supposed to be
1753    vectorized.  This restriction will be relaxed.
1754
1755    This pass will require a cost model to guide it whether to apply peeling
1756    or versioning or a combination of the two.  For example, the scheme that
1757    intel uses when given a loop with several memory accesses, is as follows:
1758    choose one memory access ('p') which alignment you want to force by doing
1759    peeling.  Then, either (1) generate a loop in which 'p' is aligned and all
1760    other accesses are not necessarily aligned, or (2) use loop versioning to
1761    generate one loop in which all accesses are aligned, and another loop in
1762    which only 'p' is necessarily aligned.
1763
1764    ("Automatic Intra-Register Vectorization for the Intel Architecture",
1765    Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1766    Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1767
1768    Devising a cost model is the most critical aspect of this work.  It will
1769    guide us on which access to peel for, whether to use loop versioning, how
1770    many versions to create, etc.  The cost model will probably consist of
1771    generic considerations as well as target specific considerations (on
1772    powerpc for example, misaligned stores are more painful than misaligned
1773    loads).
1774
1775    Here are the general steps involved in alignment enhancements:
1776
1777      -- original loop, before alignment analysis:
1778         for (i=0; i<N; i++){
1779           x = q[i];                     # DR_MISALIGNMENT(q) = unknown
1780           p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
1781         }
1782
1783      -- After vect_compute_data_refs_alignment:
1784         for (i=0; i<N; i++){
1785           x = q[i];                     # DR_MISALIGNMENT(q) = 3
1786           p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
1787         }
1788
1789      -- Possibility 1: we do loop versioning:
1790      if (p is aligned) {
1791         for (i=0; i<N; i++){    # loop 1A
1792           x = q[i];                     # DR_MISALIGNMENT(q) = 3
1793           p[i] = y;                     # DR_MISALIGNMENT(p) = 0
1794         }
1795      }
1796      else {
1797         for (i=0; i<N; i++){    # loop 1B
1798           x = q[i];                     # DR_MISALIGNMENT(q) = 3
1799           p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
1800         }
1801      }
1802
1803      -- Possibility 2: we do loop peeling:
1804      for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
1805         x = q[i];
1806         p[i] = y;
1807      }
1808      for (i = 3; i < N; i++){   # loop 2A
1809         x = q[i];                       # DR_MISALIGNMENT(q) = 0
1810         p[i] = y;                       # DR_MISALIGNMENT(p) = unknown
1811      }
1812
1813      -- Possibility 3: combination of loop peeling and versioning:
1814      for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
1815         x = q[i];
1816         p[i] = y;
1817      }
1818      if (p is aligned) {
1819         for (i = 3; i<N; i++){  # loop 3A
1820           x = q[i];                     # DR_MISALIGNMENT(q) = 0
1821           p[i] = y;                     # DR_MISALIGNMENT(p) = 0
1822         }
1823      }
1824      else {
1825         for (i = 3; i<N; i++){  # loop 3B
1826           x = q[i];                     # DR_MISALIGNMENT(q) = 0
1827           p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
1828         }
1829      }
1830
1831      These loops are later passed to loop_transform to be vectorized.  The
1832      vectorizer will use the alignment information to guide the transformation
1833      (whether to generate regular loads/stores, or with special handling for
1834      misalignment).  */
1835
1836 opt_result
1837 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1838 {
1839   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1840   dr_vec_info *first_store = NULL;
1841   dr_vec_info *dr0_info = NULL;
1842   struct data_reference *dr;
1843   unsigned int i;
1844   bool do_peeling = false;
1845   bool do_versioning = false;
1846   unsigned int npeel = 0;
1847   bool one_misalignment_known = false;
1848   bool one_misalignment_unknown = false;
1849   bool one_dr_unsupportable = false;
1850   dr_vec_info *unsupportable_dr_info = NULL;
1851   unsigned int dr0_same_align_drs = 0, first_store_same_align_drs = 0;
1852   hash_table<peel_info_hasher> peeling_htab (1);
1853
1854   DUMP_VECT_SCOPE ("vect_enhance_data_refs_alignment");
1855
1856   /* Reset data so we can safely be called multiple times.  */
1857   LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
1858   LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = 0;
1859
1860   if (LOOP_VINFO_DATAREFS (loop_vinfo).is_empty ())
1861     return opt_result::success ();
1862
1863   /* Sort the vector of datarefs so DRs that have the same or dependent
1864      alignment are next to each other.  */
1865   auto_vec<data_reference_p> datarefs
1866     = LOOP_VINFO_DATAREFS (loop_vinfo).copy ();
1867   datarefs.qsort (dr_align_group_sort_cmp);
1868
1869   /* Compute the number of DRs that become aligned when we peel
1870      a dataref so it becomes aligned.  */
1871   auto_vec<unsigned> n_same_align_refs (datarefs.length ());
1872   n_same_align_refs.quick_grow_cleared (datarefs.length ());
1873   unsigned i0;
1874   for (i0 = 0; i0 < datarefs.length (); ++i0)
1875     if (DR_BASE_ADDRESS (datarefs[i0]))
1876       break;
1877   for (i = i0 + 1; i <= datarefs.length (); ++i)
1878     {
1879       if (i == datarefs.length ()
1880           || !operand_equal_p (DR_BASE_ADDRESS (datarefs[i0]),
1881                                DR_BASE_ADDRESS (datarefs[i]), 0)
1882           || !operand_equal_p (DR_OFFSET (datarefs[i0]),
1883                                DR_OFFSET (datarefs[i]), 0)
1884           || !operand_equal_p (DR_STEP (datarefs[i0]),
1885                                DR_STEP (datarefs[i]), 0))
1886         {
1887           /* The subgroup [i0, i-1] now only differs in DR_INIT and
1888              possibly DR_TARGET_ALIGNMENT.  Still the whole subgroup
1889              will get known misalignment if we align one of the refs
1890              with the largest DR_TARGET_ALIGNMENT.  */
1891           for (unsigned j = i0; j < i; ++j)
1892             {
1893               dr_vec_info *dr_infoj = loop_vinfo->lookup_dr (datarefs[j]);
1894               for (unsigned k = i0; k < i; ++k)
1895                 {
1896                   if (k == j)
1897                     continue;
1898                   dr_vec_info *dr_infok = loop_vinfo->lookup_dr (datarefs[k]);
1899                   if (vect_dr_aligned_if_related_peeled_dr_is (dr_infok,
1900                                                                dr_infoj))
1901                     n_same_align_refs[j]++;
1902                 }
1903             }
1904           i0 = i;
1905         }
1906     }
1907
1908   /* While cost model enhancements are expected in the future, the high level
1909      view of the code at this time is as follows:
1910
1911      A) If there is a misaligned access then see if peeling to align
1912         this access can make all data references satisfy
1913         vect_supportable_dr_alignment.  If so, update data structures
1914         as needed and return true.
1915
1916      B) If peeling wasn't possible and there is a data reference with an
1917         unknown misalignment that does not satisfy vect_supportable_dr_alignment
1918         then see if loop versioning checks can be used to make all data
1919         references satisfy vect_supportable_dr_alignment.  If so, update
1920         data structures as needed and return true.
1921
1922      C) If neither peeling nor versioning were successful then return false if
1923         any data reference does not satisfy vect_supportable_dr_alignment.
1924
1925      D) Return true (all data references satisfy vect_supportable_dr_alignment).
1926
1927      Note, Possibility 3 above (which is peeling and versioning together) is not
1928      being done at this time.  */
1929
1930   /* (1) Peeling to force alignment.  */
1931
1932   /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1933      Considerations:
1934      + How many accesses will become aligned due to the peeling
1935      - How many accesses will become unaligned due to the peeling,
1936        and the cost of misaligned accesses.
1937      - The cost of peeling (the extra runtime checks, the increase
1938        in code size).  */
1939
1940   FOR_EACH_VEC_ELT (datarefs, i, dr)
1941     {
1942       dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
1943       if (!vect_relevant_for_alignment_p (dr_info))
1944         continue;
1945
1946       stmt_vec_info stmt_info = dr_info->stmt;
1947       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1948       do_peeling = vector_alignment_reachable_p (dr_info);
1949       if (do_peeling)
1950         {
1951           if (known_alignment_for_access_p (dr_info, vectype))
1952             {
1953               unsigned int npeel_tmp = 0;
1954               bool negative = tree_int_cst_compare (DR_STEP (dr),
1955                                                     size_zero_node) < 0;
1956
1957               /* If known_alignment_for_access_p then we have set
1958                  DR_MISALIGNMENT which is only done if we know it at compiler
1959                  time, so it is safe to assume target alignment is constant.
1960                */
1961               unsigned int target_align =
1962                 DR_TARGET_ALIGNMENT (dr_info).to_constant ();
1963               unsigned HOST_WIDE_INT dr_size = vect_get_scalar_dr_size (dr_info);
1964               poly_int64 off = 0;
1965               if (negative)
1966                 off = (TYPE_VECTOR_SUBPARTS (vectype) - 1) * -dr_size;
1967               unsigned int mis = dr_misalignment (dr_info, vectype, off);
1968               mis = negative ? mis : -mis;
1969               if (mis != 0)
1970                 npeel_tmp = (mis & (target_align - 1)) / dr_size;
1971
1972               /* For multiple types, it is possible that the bigger type access
1973                  will have more than one peeling option.  E.g., a loop with two
1974                  types: one of size (vector size / 4), and the other one of
1975                  size (vector size / 8).  Vectorization factor will 8.  If both
1976                  accesses are misaligned by 3, the first one needs one scalar
1977                  iteration to be aligned, and the second one needs 5.  But the
1978                  first one will be aligned also by peeling 5 scalar
1979                  iterations, and in that case both accesses will be aligned.
1980                  Hence, except for the immediate peeling amount, we also want
1981                  to try to add full vector size, while we don't exceed
1982                  vectorization factor.
1983                  We do this automatically for cost model, since we calculate
1984                  cost for every peeling option.  */
1985               poly_uint64 nscalars = npeel_tmp;
1986               if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
1987                 {
1988                   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1989                   nscalars = (STMT_SLP_TYPE (stmt_info)
1990                               ? vf * DR_GROUP_SIZE (stmt_info) : vf);
1991                 }
1992
1993               /* Save info about DR in the hash table.  Also include peeling
1994                  amounts according to the explanation above.  Indicate
1995                  the alignment status when the ref is not aligned.
1996                  ???  Rather than using unknown alignment here we should
1997                  prune all entries from the peeling hashtable which cause
1998                  DRs to be not supported.  */
1999               bool supportable_if_not_aligned
2000                 = vect_supportable_dr_alignment
2001                     (loop_vinfo, dr_info, vectype, DR_MISALIGNMENT_UNKNOWN);
2002               while (known_le (npeel_tmp, nscalars))
2003                 {
2004                   vect_peeling_hash_insert (&peeling_htab, loop_vinfo,
2005                                             dr_info, npeel_tmp,
2006                                             supportable_if_not_aligned);
2007                   npeel_tmp += MAX (1, target_align / dr_size);
2008                 }
2009
2010               one_misalignment_known = true;
2011             }
2012           else
2013             {
2014               /* If we don't know any misalignment values, we prefer
2015                  peeling for data-ref that has the maximum number of data-refs
2016                  with the same alignment, unless the target prefers to align
2017                  stores over load.  */
2018               unsigned same_align_drs = n_same_align_refs[i];
2019               if (!dr0_info
2020                   || dr0_same_align_drs < same_align_drs)
2021                 {
2022                   dr0_same_align_drs = same_align_drs;
2023                   dr0_info = dr_info;
2024                 }
2025               /* For data-refs with the same number of related
2026                  accesses prefer the one where the misalign
2027                  computation will be invariant in the outermost loop.  */
2028               else if (dr0_same_align_drs == same_align_drs)
2029                 {
2030                   class loop *ivloop0, *ivloop;
2031                   ivloop0 = outermost_invariant_loop_for_expr
2032                     (loop, DR_BASE_ADDRESS (dr0_info->dr));
2033                   ivloop = outermost_invariant_loop_for_expr
2034                     (loop, DR_BASE_ADDRESS (dr));
2035                   if ((ivloop && !ivloop0)
2036                       || (ivloop && ivloop0
2037                           && flow_loop_nested_p (ivloop, ivloop0)))
2038                     dr0_info = dr_info;
2039                 }
2040
2041               one_misalignment_unknown = true;
2042
2043               /* Check for data refs with unsupportable alignment that
2044                  can be peeled.  */
2045               enum dr_alignment_support supportable_dr_alignment
2046                 = vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
2047                                                  DR_MISALIGNMENT_UNKNOWN);
2048               if (supportable_dr_alignment == dr_unaligned_unsupported)
2049                 {
2050                   one_dr_unsupportable = true;
2051                   unsupportable_dr_info = dr_info;
2052                 }
2053
2054               if (!first_store && DR_IS_WRITE (dr))
2055                 {
2056                   first_store = dr_info;
2057                   first_store_same_align_drs = same_align_drs;
2058                 }
2059             }
2060         }
2061       else
2062         {
2063           if (!aligned_access_p (dr_info, vectype))
2064             {
2065               if (dump_enabled_p ())
2066                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2067                                  "vector alignment may not be reachable\n");
2068               break;
2069             }
2070         }
2071     }
2072
2073   /* Check if we can possibly peel the loop.  */
2074   if (!vect_can_advance_ivs_p (loop_vinfo)
2075       || !slpeel_can_duplicate_loop_p (loop, single_exit (loop))
2076       || loop->inner)
2077     do_peeling = false;
2078
2079   struct _vect_peel_extended_info peel_for_known_alignment;
2080   struct _vect_peel_extended_info peel_for_unknown_alignment;
2081   struct _vect_peel_extended_info best_peel;
2082
2083   peel_for_unknown_alignment.inside_cost = INT_MAX;
2084   peel_for_unknown_alignment.outside_cost = INT_MAX;
2085   peel_for_unknown_alignment.peel_info.count = 0;
2086
2087   if (do_peeling
2088       && one_misalignment_unknown)
2089     {
2090       /* Check if the target requires to prefer stores over loads, i.e., if
2091          misaligned stores are more expensive than misaligned loads (taking
2092          drs with same alignment into account).  */
2093       unsigned int load_inside_cost = 0;
2094       unsigned int load_outside_cost = 0;
2095       unsigned int store_inside_cost = 0;
2096       unsigned int store_outside_cost = 0;
2097       unsigned int estimated_npeels = vect_vf_for_cost (loop_vinfo) / 2;
2098
2099       stmt_vector_for_cost dummy;
2100       dummy.create (2);
2101       vect_get_peeling_costs_all_drs (loop_vinfo, dr0_info,
2102                                       &load_inside_cost,
2103                                       &load_outside_cost,
2104                                       &dummy, &dummy, estimated_npeels);
2105       dummy.release ();
2106
2107       if (first_store)
2108         {
2109           dummy.create (2);
2110           vect_get_peeling_costs_all_drs (loop_vinfo, first_store,
2111                                           &store_inside_cost,
2112                                           &store_outside_cost,
2113                                           &dummy, &dummy,
2114                                           estimated_npeels);
2115           dummy.release ();
2116         }
2117       else
2118         {
2119           store_inside_cost = INT_MAX;
2120           store_outside_cost = INT_MAX;
2121         }
2122
2123       if (load_inside_cost > store_inside_cost
2124           || (load_inside_cost == store_inside_cost
2125               && load_outside_cost > store_outside_cost))
2126         {
2127           dr0_info = first_store;
2128           dr0_same_align_drs = first_store_same_align_drs;
2129           peel_for_unknown_alignment.inside_cost = store_inside_cost;
2130           peel_for_unknown_alignment.outside_cost = store_outside_cost;
2131         }
2132       else
2133         {
2134           peel_for_unknown_alignment.inside_cost = load_inside_cost;
2135           peel_for_unknown_alignment.outside_cost = load_outside_cost;
2136         }
2137
2138       stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
2139       prologue_cost_vec.create (2);
2140       epilogue_cost_vec.create (2);
2141
2142       int dummy2;
2143       peel_for_unknown_alignment.outside_cost += vect_get_known_peeling_cost
2144         (loop_vinfo, estimated_npeels, &dummy2,
2145          &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
2146          &prologue_cost_vec, &epilogue_cost_vec);
2147
2148       prologue_cost_vec.release ();
2149       epilogue_cost_vec.release ();
2150
2151       peel_for_unknown_alignment.peel_info.count = dr0_same_align_drs + 1;
2152     }
2153
2154   peel_for_unknown_alignment.peel_info.npeel = 0;
2155   peel_for_unknown_alignment.peel_info.dr_info = dr0_info;
2156
2157   best_peel = peel_for_unknown_alignment;
2158
2159   peel_for_known_alignment.inside_cost = INT_MAX;
2160   peel_for_known_alignment.outside_cost = INT_MAX;
2161   peel_for_known_alignment.peel_info.count = 0;
2162   peel_for_known_alignment.peel_info.dr_info = NULL;
2163
2164   if (do_peeling && one_misalignment_known)
2165     {
2166       /* Peeling is possible, but there is no data access that is not supported
2167          unless aligned.  So we try to choose the best possible peeling from
2168          the hash table.  */
2169       peel_for_known_alignment = vect_peeling_hash_choose_best_peeling
2170         (&peeling_htab, loop_vinfo);
2171     }
2172
2173   /* Compare costs of peeling for known and unknown alignment. */
2174   if (peel_for_known_alignment.peel_info.dr_info != NULL
2175       && peel_for_unknown_alignment.inside_cost
2176       >= peel_for_known_alignment.inside_cost)
2177     {
2178       best_peel = peel_for_known_alignment;
2179
2180       /* If the best peeling for known alignment has NPEEL == 0, perform no
2181          peeling at all except if there is an unsupportable dr that we can
2182          align.  */
2183       if (best_peel.peel_info.npeel == 0 && !one_dr_unsupportable)
2184         do_peeling = false;
2185     }
2186
2187   /* If there is an unsupportable data ref, prefer this over all choices so far
2188      since we'd have to discard a chosen peeling except when it accidentally
2189      aligned the unsupportable data ref.  */
2190   if (one_dr_unsupportable)
2191     dr0_info = unsupportable_dr_info;
2192   else if (do_peeling)
2193     {
2194       /* Calculate the penalty for no peeling, i.e. leaving everything as-is.
2195          TODO: Use nopeel_outside_cost or get rid of it?  */
2196       unsigned nopeel_inside_cost = 0;
2197       unsigned nopeel_outside_cost = 0;
2198
2199       stmt_vector_for_cost dummy;
2200       dummy.create (2);
2201       vect_get_peeling_costs_all_drs (loop_vinfo, NULL, &nopeel_inside_cost,
2202                                       &nopeel_outside_cost, &dummy, &dummy, 0);
2203       dummy.release ();
2204
2205       /* Add epilogue costs.  As we do not peel for alignment here, no prologue
2206          costs will be recorded.  */
2207       stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
2208       prologue_cost_vec.create (2);
2209       epilogue_cost_vec.create (2);
2210
2211       int dummy2;
2212       nopeel_outside_cost += vect_get_known_peeling_cost
2213         (loop_vinfo, 0, &dummy2,
2214          &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
2215          &prologue_cost_vec, &epilogue_cost_vec);
2216
2217       prologue_cost_vec.release ();
2218       epilogue_cost_vec.release ();
2219
2220       npeel = best_peel.peel_info.npeel;
2221       dr0_info = best_peel.peel_info.dr_info;
2222
2223       /* If no peeling is not more expensive than the best peeling we
2224          have so far, don't perform any peeling.  */
2225       if (nopeel_inside_cost <= best_peel.inside_cost)
2226         do_peeling = false;
2227     }
2228
2229   if (do_peeling)
2230     {
2231       stmt_vec_info stmt_info = dr0_info->stmt;
2232       if (known_alignment_for_access_p (dr0_info,
2233                                         STMT_VINFO_VECTYPE (stmt_info)))
2234         {
2235           bool negative = tree_int_cst_compare (DR_STEP (dr0_info->dr),
2236                                                 size_zero_node) < 0;
2237           if (!npeel)
2238             {
2239               /* Since it's known at compile time, compute the number of
2240                  iterations in the peeled loop (the peeling factor) for use in
2241                  updating DR_MISALIGNMENT values.  The peeling factor is the
2242                  vectorization factor minus the misalignment as an element
2243                  count.  */
2244               tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2245               poly_int64 off = 0;
2246               if (negative)
2247                 off = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
2248                        * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
2249               unsigned int mis
2250                 = dr_misalignment (dr0_info, vectype, off);
2251               mis = negative ? mis : -mis;
2252               /* If known_alignment_for_access_p then we have set
2253                  DR_MISALIGNMENT which is only done if we know it at compiler
2254                  time, so it is safe to assume target alignment is constant.
2255                */
2256               unsigned int target_align =
2257                 DR_TARGET_ALIGNMENT (dr0_info).to_constant ();
2258               npeel = ((mis & (target_align - 1))
2259                        / vect_get_scalar_dr_size (dr0_info));
2260             }
2261
2262           /* For interleaved data access every iteration accesses all the
2263              members of the group, therefore we divide the number of iterations
2264              by the group size.  */
2265           if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2266             npeel /= DR_GROUP_SIZE (stmt_info);
2267
2268           if (dump_enabled_p ())
2269             dump_printf_loc (MSG_NOTE, vect_location,
2270                              "Try peeling by %d\n", npeel);
2271         }
2272
2273       /* Ensure that all datarefs can be vectorized after the peel.  */
2274       if (!vect_peeling_supportable (loop_vinfo, dr0_info, npeel))
2275         do_peeling = false;
2276
2277       /* Check if all datarefs are supportable and log.  */
2278       if (do_peeling
2279           && npeel == 0
2280           && known_alignment_for_access_p (dr0_info,
2281                                            STMT_VINFO_VECTYPE (stmt_info)))
2282         return opt_result::success ();
2283
2284       /* Cost model #1 - honor --param vect-max-peeling-for-alignment.  */
2285       if (do_peeling)
2286         {
2287           unsigned max_allowed_peel
2288             = param_vect_max_peeling_for_alignment;
2289           if (loop_cost_model (loop) <= VECT_COST_MODEL_CHEAP)
2290             max_allowed_peel = 0;
2291           if (max_allowed_peel != (unsigned)-1)
2292             {
2293               unsigned max_peel = npeel;
2294               if (max_peel == 0)
2295                 {
2296                   poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr0_info);
2297                   unsigned HOST_WIDE_INT target_align_c;
2298                   if (target_align.is_constant (&target_align_c))
2299                     max_peel =
2300                       target_align_c / vect_get_scalar_dr_size (dr0_info) - 1;
2301                   else
2302                     {
2303                       do_peeling = false;
2304                       if (dump_enabled_p ())
2305                         dump_printf_loc (MSG_NOTE, vect_location,
2306                           "Disable peeling, max peels set and vector"
2307                           " alignment unknown\n");
2308                     }
2309                 }
2310               if (max_peel > max_allowed_peel)
2311                 {
2312                   do_peeling = false;
2313                   if (dump_enabled_p ())
2314                     dump_printf_loc (MSG_NOTE, vect_location,
2315                         "Disable peeling, max peels reached: %d\n", max_peel);
2316                 }
2317             }
2318         }
2319
2320       /* Cost model #2 - if peeling may result in a remaining loop not
2321          iterating enough to be vectorized then do not peel.  Since this
2322          is a cost heuristic rather than a correctness decision, use the
2323          most likely runtime value for variable vectorization factors.  */
2324       if (do_peeling
2325           && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2326         {
2327           unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
2328           unsigned int max_peel = npeel == 0 ? assumed_vf - 1 : npeel;
2329           if ((unsigned HOST_WIDE_INT) LOOP_VINFO_INT_NITERS (loop_vinfo)
2330               < assumed_vf + max_peel)
2331             do_peeling = false;
2332         }
2333
2334       if (do_peeling)
2335         {
2336           /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
2337              If the misalignment of DR_i is identical to that of dr0 then set
2338              DR_MISALIGNMENT (DR_i) to zero.  If the misalignment of DR_i and
2339              dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
2340              by the peeling factor times the element size of DR_i (MOD the
2341              vectorization factor times the size).  Otherwise, the
2342              misalignment of DR_i must be set to unknown.  */
2343           FOR_EACH_VEC_ELT (datarefs, i, dr)
2344             if (dr != dr0_info->dr)
2345               {
2346                 dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
2347                 if (!vect_relevant_for_alignment_p (dr_info))
2348                   continue;
2349
2350                 vect_update_misalignment_for_peel (dr_info, dr0_info, npeel);
2351               }
2352
2353           LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0_info;
2354           if (npeel)
2355             LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
2356           else
2357             LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = -1;
2358           SET_DR_MISALIGNMENT (dr0_info,
2359                                vect_dr_misalign_for_aligned_access (dr0_info));
2360           if (dump_enabled_p ())
2361             {
2362               dump_printf_loc (MSG_NOTE, vect_location,
2363                                "Alignment of access forced using peeling.\n");
2364               dump_printf_loc (MSG_NOTE, vect_location,
2365                                "Peeling for alignment will be applied.\n");
2366             }
2367
2368           /* The inside-loop cost will be accounted for in vectorizable_load
2369              and vectorizable_store correctly with adjusted alignments.
2370              Drop the body_cst_vec on the floor here.  */
2371           return opt_result::success ();
2372         }
2373     }
2374
2375   /* (2) Versioning to force alignment.  */
2376
2377   /* Try versioning if:
2378      1) optimize loop for speed and the cost-model is not cheap
2379      2) there is at least one unsupported misaligned data ref with an unknown
2380         misalignment, and
2381      3) all misaligned data refs with a known misalignment are supported, and
2382      4) the number of runtime alignment checks is within reason.  */
2383
2384   do_versioning
2385     = (optimize_loop_nest_for_speed_p (loop)
2386        && !loop->inner /* FORNOW */
2387        && loop_cost_model (loop) > VECT_COST_MODEL_CHEAP);
2388
2389   if (do_versioning)
2390     {
2391       FOR_EACH_VEC_ELT (datarefs, i, dr)
2392         {
2393           dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
2394           if (!vect_relevant_for_alignment_p (dr_info))
2395             continue;
2396
2397           stmt_vec_info stmt_info = dr_info->stmt;
2398           if (STMT_VINFO_STRIDED_P (stmt_info))
2399             {
2400               do_versioning = false;
2401               break;
2402             }
2403
2404           tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2405           bool negative = tree_int_cst_compare (DR_STEP (dr),
2406                                                 size_zero_node) < 0;
2407           poly_int64 off = 0;
2408           if (negative)
2409             off = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
2410                    * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
2411           int misalignment;
2412           if ((misalignment = dr_misalignment (dr_info, vectype, off)) == 0)
2413             continue;
2414
2415           enum dr_alignment_support supportable_dr_alignment
2416             = vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
2417                                              misalignment);
2418           if (supportable_dr_alignment == dr_unaligned_unsupported)
2419             {
2420               if (misalignment != DR_MISALIGNMENT_UNKNOWN
2421                   || (LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ()
2422                       >= (unsigned) param_vect_max_version_for_alignment_checks))
2423                 {
2424                   do_versioning = false;
2425                   break;
2426                 }
2427
2428               /* At present we don't support versioning for alignment
2429                  with variable VF, since there's no guarantee that the
2430                  VF is a power of two.  We could relax this if we added
2431                  a way of enforcing a power-of-two size.  */
2432               unsigned HOST_WIDE_INT size;
2433               if (!GET_MODE_SIZE (TYPE_MODE (vectype)).is_constant (&size))
2434                 {
2435                   do_versioning = false;
2436                   break;
2437                 }
2438
2439               /* Forcing alignment in the first iteration is no good if
2440                  we don't keep it across iterations.  For now, just disable
2441                  versioning in this case.
2442                  ?? We could actually unroll the loop to achieve the required
2443                  overall step alignment, and forcing the alignment could be
2444                  done by doing some iterations of the non-vectorized loop.  */
2445               if (!multiple_p (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2446                                * DR_STEP_ALIGNMENT (dr),
2447                                DR_TARGET_ALIGNMENT (dr_info)))
2448                 {
2449                   do_versioning = false;
2450                   break;
2451                 }
2452
2453               /* The rightmost bits of an aligned address must be zeros.
2454                  Construct the mask needed for this test.  For example,
2455                  GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
2456                  mask must be 15 = 0xf. */
2457               int mask = size - 1;
2458
2459               /* FORNOW: use the same mask to test all potentially unaligned
2460                  references in the loop.  */
2461               if (LOOP_VINFO_PTR_MASK (loop_vinfo)
2462                   && LOOP_VINFO_PTR_MASK (loop_vinfo) != mask)
2463                 {
2464                   do_versioning = false;
2465                   break;
2466                 }
2467
2468               LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2469               LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).safe_push (stmt_info);
2470             }
2471         }
2472
2473       /* Versioning requires at least one misaligned data reference.  */
2474       if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2475         do_versioning = false;
2476       else if (!do_versioning)
2477         LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
2478     }
2479
2480   if (do_versioning)
2481     {
2482       const vec<stmt_vec_info> &may_misalign_stmts
2483         = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2484       stmt_vec_info stmt_info;
2485
2486       /* It can now be assumed that the data references in the statements
2487          in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2488          of the loop being vectorized.  */
2489       FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt_info)
2490         {
2491           dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2492           SET_DR_MISALIGNMENT (dr_info,
2493                                vect_dr_misalign_for_aligned_access (dr_info));
2494           if (dump_enabled_p ())
2495             dump_printf_loc (MSG_NOTE, vect_location,
2496                              "Alignment of access forced using versioning.\n");
2497         }
2498
2499       if (dump_enabled_p ())
2500         dump_printf_loc (MSG_NOTE, vect_location,
2501                          "Versioning for alignment will be applied.\n");
2502
2503       /* Peeling and versioning can't be done together at this time.  */
2504       gcc_assert (! (do_peeling && do_versioning));
2505
2506       return opt_result::success ();
2507     }
2508
2509   /* This point is reached if neither peeling nor versioning is being done.  */
2510   gcc_assert (! (do_peeling || do_versioning));
2511
2512   return opt_result::success ();
2513 }
2514
2515
2516 /* Function vect_analyze_data_refs_alignment
2517
2518    Analyze the alignment of the data-references in the loop.
2519    Return FALSE if a data reference is found that cannot be vectorized.  */
2520
2521 opt_result
2522 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
2523 {
2524   DUMP_VECT_SCOPE ("vect_analyze_data_refs_alignment");
2525
2526   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2527   struct data_reference *dr;
2528   unsigned int i;
2529
2530   vect_record_base_alignments (loop_vinfo);
2531   FOR_EACH_VEC_ELT (datarefs, i, dr)
2532     {
2533       dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
2534       if (STMT_VINFO_VECTORIZABLE (dr_info->stmt))
2535         {
2536           if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt)
2537               && DR_GROUP_FIRST_ELEMENT (dr_info->stmt) != dr_info->stmt)
2538             continue;
2539           vect_compute_data_ref_alignment (loop_vinfo, dr_info,
2540                                            STMT_VINFO_VECTYPE (dr_info->stmt));
2541         }
2542     }
2543
2544   return opt_result::success ();
2545 }
2546
2547
2548 /* Analyze alignment of DRs of stmts in NODE.  */
2549
2550 static bool
2551 vect_slp_analyze_node_alignment (vec_info *vinfo, slp_tree node)
2552 {
2553   /* Alignment is maintained in the first element of the group.  */
2554   stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2555   first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
2556   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2557   tree vectype = SLP_TREE_VECTYPE (node);
2558   poly_uint64 vector_alignment
2559     = exact_div (targetm.vectorize.preferred_vector_alignment (vectype),
2560                  BITS_PER_UNIT);
2561   if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
2562     vect_compute_data_ref_alignment (vinfo, dr_info, SLP_TREE_VECTYPE (node));
2563   /* Re-analyze alignment when we're facing a vectorization with a bigger
2564      alignment requirement.  */
2565   else if (known_lt (dr_info->target_alignment, vector_alignment))
2566     {
2567       poly_uint64 old_target_alignment = dr_info->target_alignment;
2568       int old_misalignment = dr_info->misalignment;
2569       vect_compute_data_ref_alignment (vinfo, dr_info, SLP_TREE_VECTYPE (node));
2570       /* But keep knowledge about a smaller alignment.  */
2571       if (old_misalignment != DR_MISALIGNMENT_UNKNOWN
2572           && dr_info->misalignment == DR_MISALIGNMENT_UNKNOWN)
2573         {
2574           dr_info->target_alignment = old_target_alignment;
2575           dr_info->misalignment = old_misalignment;
2576         }
2577     }
2578   /* When we ever face unordered target alignments the first one wins in terms
2579      of analyzing and the other will become unknown in dr_misalignment.  */
2580   return true;
2581 }
2582
2583 /* Function vect_slp_analyze_instance_alignment
2584
2585    Analyze the alignment of the data-references in the SLP instance.
2586    Return FALSE if a data reference is found that cannot be vectorized.  */
2587
2588 bool
2589 vect_slp_analyze_instance_alignment (vec_info *vinfo,
2590                                                 slp_instance instance)
2591 {
2592   DUMP_VECT_SCOPE ("vect_slp_analyze_instance_alignment");
2593
2594   slp_tree node;
2595   unsigned i;
2596   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, node)
2597     if (! vect_slp_analyze_node_alignment (vinfo, node))
2598       return false;
2599
2600   if (SLP_INSTANCE_KIND (instance) == slp_inst_kind_store
2601       && ! vect_slp_analyze_node_alignment
2602              (vinfo, SLP_INSTANCE_TREE (instance)))
2603     return false;
2604
2605   return true;
2606 }
2607
2608
2609 /* Analyze groups of accesses: check that DR_INFO belongs to a group of
2610    accesses of legal size, step, etc.  Detect gaps, single element
2611    interleaving, and other special cases. Set grouped access info.
2612    Collect groups of strided stores for further use in SLP analysis.
2613    Worker for vect_analyze_group_access.  */
2614
2615 static bool
2616 vect_analyze_group_access_1 (vec_info *vinfo, dr_vec_info *dr_info)
2617 {
2618   data_reference *dr = dr_info->dr;
2619   tree step = DR_STEP (dr);
2620   tree scalar_type = TREE_TYPE (DR_REF (dr));
2621   HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2622   stmt_vec_info stmt_info = dr_info->stmt;
2623   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2624   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
2625   HOST_WIDE_INT dr_step = -1;
2626   HOST_WIDE_INT groupsize, last_accessed_element = 1;
2627   bool slp_impossible = false;
2628
2629   /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
2630      size of the interleaving group (including gaps).  */
2631   if (tree_fits_shwi_p (step))
2632     {
2633       dr_step = tree_to_shwi (step);
2634       /* Check that STEP is a multiple of type size.  Otherwise there is
2635          a non-element-sized gap at the end of the group which we
2636          cannot represent in DR_GROUP_GAP or DR_GROUP_SIZE.
2637          ???  As we can handle non-constant step fine here we should
2638          simply remove uses of DR_GROUP_GAP between the last and first
2639          element and instead rely on DR_STEP.  DR_GROUP_SIZE then would
2640          simply not include that gap.  */
2641       if ((dr_step % type_size) != 0)
2642         {
2643           if (dump_enabled_p ())
2644             dump_printf_loc (MSG_NOTE, vect_location,
2645                              "Step %T is not a multiple of the element size"
2646                              " for %T\n",
2647                              step, DR_REF (dr));
2648           return false;
2649         }
2650       groupsize = absu_hwi (dr_step) / type_size;
2651     }
2652   else
2653     groupsize = 0;
2654
2655   /* Not consecutive access is possible only if it is a part of interleaving.  */
2656   if (!DR_GROUP_FIRST_ELEMENT (stmt_info))
2657     {
2658       /* Check if it this DR is a part of interleaving, and is a single
2659          element of the group that is accessed in the loop.  */
2660
2661       /* Gaps are supported only for loads. STEP must be a multiple of the type
2662          size.  */
2663       if (DR_IS_READ (dr)
2664           && (dr_step % type_size) == 0
2665           && groupsize > 0
2666           /* This could be UINT_MAX but as we are generating code in a very
2667              inefficient way we have to cap earlier.
2668              See PR91403 for example.  */
2669           && groupsize <= 4096)
2670         {
2671           DR_GROUP_FIRST_ELEMENT (stmt_info) = stmt_info;
2672           DR_GROUP_SIZE (stmt_info) = groupsize;
2673           DR_GROUP_GAP (stmt_info) = groupsize - 1;
2674           if (dump_enabled_p ())
2675             dump_printf_loc (MSG_NOTE, vect_location,
2676                              "Detected single element interleaving %T"
2677                              " step %T\n",
2678                              DR_REF (dr), step);
2679
2680           return true;
2681         }
2682
2683       if (dump_enabled_p ())
2684         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2685                          "not consecutive access %G", stmt_info->stmt);
2686
2687       if (bb_vinfo)
2688         {
2689           /* Mark the statement as unvectorizable.  */
2690           STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2691           return true;
2692         }
2693
2694       if (dump_enabled_p ())
2695         dump_printf_loc (MSG_NOTE, vect_location, "using strided accesses\n");
2696       STMT_VINFO_STRIDED_P (stmt_info) = true;
2697       return true;
2698     }
2699
2700   if (DR_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info)
2701     {
2702       /* First stmt in the interleaving chain. Check the chain.  */
2703       stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (stmt_info);
2704       struct data_reference *data_ref = dr;
2705       unsigned int count = 1;
2706       tree prev_init = DR_INIT (data_ref);
2707       HOST_WIDE_INT diff, gaps = 0;
2708
2709       /* By construction, all group members have INTEGER_CST DR_INITs.  */
2710       while (next)
2711         {
2712           /* We never have the same DR multiple times.  */
2713           gcc_assert (tree_int_cst_compare (DR_INIT (data_ref),
2714                                 DR_INIT (STMT_VINFO_DATA_REF (next))) != 0);
2715
2716           data_ref = STMT_VINFO_DATA_REF (next);
2717
2718           /* All group members have the same STEP by construction.  */
2719           gcc_checking_assert (operand_equal_p (DR_STEP (data_ref), step, 0));
2720
2721           /* Check that the distance between two accesses is equal to the type
2722              size. Otherwise, we have gaps.  */
2723           diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2724                   - TREE_INT_CST_LOW (prev_init)) / type_size;
2725           if (diff != 1)
2726             {
2727               /* FORNOW: SLP of accesses with gaps is not supported.  */
2728               slp_impossible = true;
2729               if (DR_IS_WRITE (data_ref))
2730                 {
2731                   if (dump_enabled_p ())
2732                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2733                                      "interleaved store with gaps\n");
2734                   return false;
2735                 }
2736
2737               gaps += diff - 1;
2738             }
2739
2740           last_accessed_element += diff;
2741
2742           /* Store the gap from the previous member of the group. If there is no
2743              gap in the access, DR_GROUP_GAP is always 1.  */
2744           DR_GROUP_GAP (next) = diff;
2745
2746           prev_init = DR_INIT (data_ref);
2747           next = DR_GROUP_NEXT_ELEMENT (next);
2748           /* Count the number of data-refs in the chain.  */
2749           count++;
2750         }
2751
2752       if (groupsize == 0)
2753         groupsize = count + gaps;
2754
2755       /* This could be UINT_MAX but as we are generating code in a very
2756          inefficient way we have to cap earlier.  See PR78699 for example.  */
2757       if (groupsize > 4096)
2758         {
2759           if (dump_enabled_p ())
2760             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2761                              "group is too large\n");
2762           return false;
2763         }
2764
2765       /* Check that the size of the interleaving is equal to count for stores,
2766          i.e., that there are no gaps.  */
2767       if (groupsize != count
2768           && !DR_IS_READ (dr))
2769         {
2770           groupsize = count;
2771           STMT_VINFO_STRIDED_P (stmt_info) = true;
2772         }
2773
2774       /* If there is a gap after the last load in the group it is the
2775          difference between the groupsize and the last accessed
2776          element.
2777          When there is no gap, this difference should be 0.  */
2778       DR_GROUP_GAP (stmt_info) = groupsize - last_accessed_element;
2779
2780       DR_GROUP_SIZE (stmt_info) = groupsize;
2781       if (dump_enabled_p ())
2782         {
2783           dump_printf_loc (MSG_NOTE, vect_location,
2784                            "Detected interleaving ");
2785           if (DR_IS_READ (dr))
2786             dump_printf (MSG_NOTE, "load ");
2787           else if (STMT_VINFO_STRIDED_P (stmt_info))
2788             dump_printf (MSG_NOTE, "strided store ");
2789           else
2790             dump_printf (MSG_NOTE, "store ");
2791           dump_printf (MSG_NOTE, "of size %u\n",
2792                        (unsigned)groupsize);
2793           dump_printf_loc (MSG_NOTE, vect_location, "\t%G", stmt_info->stmt);
2794           next = DR_GROUP_NEXT_ELEMENT (stmt_info);
2795           while (next)
2796             {
2797               if (DR_GROUP_GAP (next) != 1)
2798                 dump_printf_loc (MSG_NOTE, vect_location,
2799                                  "\t<gap of %d elements>\n",
2800                                  DR_GROUP_GAP (next) - 1);
2801               dump_printf_loc (MSG_NOTE, vect_location, "\t%G", next->stmt);
2802               next = DR_GROUP_NEXT_ELEMENT (next);
2803             }
2804           if (DR_GROUP_GAP (stmt_info) != 0)
2805             dump_printf_loc (MSG_NOTE, vect_location,
2806                              "\t<gap of %d elements>\n",
2807                              DR_GROUP_GAP (stmt_info));
2808         }
2809
2810       /* SLP: create an SLP data structure for every interleaving group of
2811          stores for further analysis in vect_analyse_slp.  */
2812       if (DR_IS_WRITE (dr) && !slp_impossible)
2813         {
2814           if (loop_vinfo)
2815             LOOP_VINFO_GROUPED_STORES (loop_vinfo).safe_push (stmt_info);
2816           if (bb_vinfo)
2817             BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt_info);
2818         }
2819     }
2820
2821   return true;
2822 }
2823
2824 /* Analyze groups of accesses: check that DR_INFO belongs to a group of
2825    accesses of legal size, step, etc.  Detect gaps, single element
2826    interleaving, and other special cases. Set grouped access info.
2827    Collect groups of strided stores for further use in SLP analysis.  */
2828
2829 static bool
2830 vect_analyze_group_access (vec_info *vinfo, dr_vec_info *dr_info)
2831 {
2832   if (!vect_analyze_group_access_1 (vinfo, dr_info))
2833     {
2834       /* Dissolve the group if present.  */
2835       stmt_vec_info stmt_info = DR_GROUP_FIRST_ELEMENT (dr_info->stmt);
2836       while (stmt_info)
2837         {
2838           stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (stmt_info);
2839           DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
2840           DR_GROUP_NEXT_ELEMENT (stmt_info) = NULL;
2841           stmt_info = next;
2842         }
2843       return false;
2844     }
2845   return true;
2846 }
2847
2848 /* Analyze the access pattern of the data-reference DR_INFO.
2849    In case of non-consecutive accesses call vect_analyze_group_access() to
2850    analyze groups of accesses.  */
2851
2852 static bool
2853 vect_analyze_data_ref_access (vec_info *vinfo, dr_vec_info *dr_info)
2854 {
2855   data_reference *dr = dr_info->dr;
2856   tree step = DR_STEP (dr);
2857   tree scalar_type = TREE_TYPE (DR_REF (dr));
2858   stmt_vec_info stmt_info = dr_info->stmt;
2859   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2860   class loop *loop = NULL;
2861
2862   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2863     return true;
2864
2865   if (loop_vinfo)
2866     loop = LOOP_VINFO_LOOP (loop_vinfo);
2867
2868   if (loop_vinfo && !step)
2869     {
2870       if (dump_enabled_p ())
2871         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2872                          "bad data-ref access in loop\n");
2873       return false;
2874     }
2875
2876   /* Allow loads with zero step in inner-loop vectorization.  */
2877   if (loop_vinfo && integer_zerop (step))
2878     {
2879       DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
2880       if (!nested_in_vect_loop_p (loop, stmt_info))
2881         return DR_IS_READ (dr);
2882       /* Allow references with zero step for outer loops marked
2883          with pragma omp simd only - it guarantees absence of
2884          loop-carried dependencies between inner loop iterations.  */
2885       if (loop->safelen < 2)
2886         {
2887           if (dump_enabled_p ())
2888             dump_printf_loc (MSG_NOTE, vect_location,
2889                              "zero step in inner loop of nest\n");
2890           return false;
2891         }
2892     }
2893
2894   if (loop && nested_in_vect_loop_p (loop, stmt_info))
2895     {
2896       /* Interleaved accesses are not yet supported within outer-loop
2897         vectorization for references in the inner-loop.  */
2898       DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
2899
2900       /* For the rest of the analysis we use the outer-loop step.  */
2901       step = STMT_VINFO_DR_STEP (stmt_info);
2902       if (integer_zerop (step))
2903         {
2904           if (dump_enabled_p ())
2905             dump_printf_loc (MSG_NOTE, vect_location,
2906                              "zero step in outer loop.\n");
2907           return DR_IS_READ (dr);
2908         }
2909     }
2910
2911   /* Consecutive?  */
2912   if (TREE_CODE (step) == INTEGER_CST)
2913     {
2914       HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2915       if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2916           || (dr_step < 0
2917               && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2918         {
2919           /* Mark that it is not interleaving.  */
2920           DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
2921           return true;
2922         }
2923     }
2924
2925   if (loop && nested_in_vect_loop_p (loop, stmt_info))
2926     {
2927       if (dump_enabled_p ())
2928         dump_printf_loc (MSG_NOTE, vect_location,
2929                          "grouped access in outer loop.\n");
2930       return false;
2931     }
2932
2933
2934   /* Assume this is a DR handled by non-constant strided load case.  */
2935   if (TREE_CODE (step) != INTEGER_CST)
2936     return (STMT_VINFO_STRIDED_P (stmt_info)
2937             && (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2938                 || vect_analyze_group_access (vinfo, dr_info)));
2939
2940   /* Not consecutive access - check if it's a part of interleaving group.  */
2941   return vect_analyze_group_access (vinfo, dr_info);
2942 }
2943
2944 /* Compare two data-references DRA and DRB to group them into chunks
2945    suitable for grouping.  */
2946
2947 static int
2948 dr_group_sort_cmp (const void *dra_, const void *drb_)
2949 {
2950   dr_vec_info *dra_info = *(dr_vec_info **)const_cast<void *>(dra_);
2951   dr_vec_info *drb_info = *(dr_vec_info **)const_cast<void *>(drb_);
2952   data_reference_p dra = dra_info->dr;
2953   data_reference_p drb = drb_info->dr;
2954   int cmp;
2955
2956   /* Stabilize sort.  */
2957   if (dra == drb)
2958     return 0;
2959
2960   /* Different group IDs lead never belong to the same group.  */
2961   if (dra_info->group != drb_info->group)
2962     return dra_info->group < drb_info->group ? -1 : 1;
2963
2964   /* Ordering of DRs according to base.  */
2965   cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra),
2966                                DR_BASE_ADDRESS (drb));
2967   if (cmp != 0)
2968     return cmp;
2969
2970   /* And according to DR_OFFSET.  */
2971   cmp = data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
2972   if (cmp != 0)
2973     return cmp;
2974
2975   /* Put reads before writes.  */
2976   if (DR_IS_READ (dra) != DR_IS_READ (drb))
2977     return DR_IS_READ (dra) ? -1 : 1;
2978
2979   /* Then sort after access size.  */
2980   cmp = data_ref_compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
2981                                TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
2982   if (cmp != 0)
2983     return cmp;
2984
2985   /* And after step.  */
2986   cmp = data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb));
2987   if (cmp != 0)
2988     return cmp;
2989
2990   /* Then sort after DR_INIT.  In case of identical DRs sort after stmt UID.  */
2991   cmp = data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb));
2992   if (cmp == 0)
2993     return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -1 : 1;
2994   return cmp;
2995 }
2996
2997 /* If OP is the result of a conversion, return the unconverted value,
2998    otherwise return null.  */
2999
3000 static tree
3001 strip_conversion (tree op)
3002 {
3003   if (TREE_CODE (op) != SSA_NAME)
3004     return NULL_TREE;
3005   gimple *stmt = SSA_NAME_DEF_STMT (op);
3006   if (!is_gimple_assign (stmt)
3007       || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)))
3008     return NULL_TREE;
3009   return gimple_assign_rhs1 (stmt);
3010 }
3011
3012 /* Return true if vectorizable_* routines can handle statements STMT1_INFO
3013    and STMT2_INFO being in a single group.  When ALLOW_SLP_P, masked loads can
3014    be grouped in SLP mode.  */
3015
3016 static bool
3017 can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info,
3018                    bool allow_slp_p)
3019 {
3020   if (gimple_assign_single_p (stmt1_info->stmt))
3021     return gimple_assign_single_p (stmt2_info->stmt);
3022
3023   gcall *call1 = dyn_cast <gcall *> (stmt1_info->stmt);
3024   if (call1 && gimple_call_internal_p (call1))
3025     {
3026       /* Check for two masked loads or two masked stores.  */
3027       gcall *call2 = dyn_cast <gcall *> (stmt2_info->stmt);
3028       if (!call2 || !gimple_call_internal_p (call2))
3029         return false;
3030       internal_fn ifn = gimple_call_internal_fn (call1);
3031       if (ifn != IFN_MASK_LOAD && ifn != IFN_MASK_STORE)
3032         return false;
3033       if (ifn != gimple_call_internal_fn (call2))
3034         return false;
3035
3036       /* Check that the masks are the same.  Cope with casts of masks,
3037          like those created by build_mask_conversion.  */
3038       tree mask1 = gimple_call_arg (call1, 2);
3039       tree mask2 = gimple_call_arg (call2, 2);
3040       if (!operand_equal_p (mask1, mask2, 0)
3041           && (ifn == IFN_MASK_STORE || !allow_slp_p))
3042         {
3043           mask1 = strip_conversion (mask1);
3044           if (!mask1)
3045             return false;
3046           mask2 = strip_conversion (mask2);
3047           if (!mask2)
3048             return false;
3049           if (!operand_equal_p (mask1, mask2, 0))
3050             return false;
3051         }
3052       return true;
3053     }
3054
3055   return false;
3056 }
3057
3058 /* Function vect_analyze_data_ref_accesses.
3059
3060    Analyze the access pattern of all the data references in the loop.
3061
3062    FORNOW: the only access pattern that is considered vectorizable is a
3063            simple step 1 (consecutive) access.
3064
3065    FORNOW: handle only arrays and pointer accesses.  */
3066
3067 opt_result
3068 vect_analyze_data_ref_accesses (vec_info *vinfo,
3069                                 vec<int> *dataref_groups)
3070 {
3071   unsigned int i;
3072   vec<data_reference_p> datarefs = vinfo->shared->datarefs;
3073
3074   DUMP_VECT_SCOPE ("vect_analyze_data_ref_accesses");
3075
3076   if (datarefs.is_empty ())
3077     return opt_result::success ();
3078
3079   /* Sort the array of datarefs to make building the interleaving chains
3080      linear.  Don't modify the original vector's order, it is needed for
3081      determining what dependencies are reversed.  */
3082   vec<dr_vec_info *> datarefs_copy;
3083   datarefs_copy.create (datarefs.length ());
3084   for (unsigned i = 0; i < datarefs.length (); i++)
3085     {
3086       dr_vec_info *dr_info = vinfo->lookup_dr (datarefs[i]);
3087       /* If the caller computed DR grouping use that, otherwise group by
3088          basic blocks.  */
3089       if (dataref_groups)
3090         dr_info->group = (*dataref_groups)[i];
3091       else
3092         dr_info->group = gimple_bb (DR_STMT (datarefs[i]))->index;
3093       datarefs_copy.quick_push (dr_info);
3094     }
3095   datarefs_copy.qsort (dr_group_sort_cmp);
3096   hash_set<stmt_vec_info> to_fixup;
3097
3098   /* Build the interleaving chains.  */
3099   for (i = 0; i < datarefs_copy.length () - 1;)
3100     {
3101       dr_vec_info *dr_info_a = datarefs_copy[i];
3102       data_reference_p dra = dr_info_a->dr;
3103       int dra_group_id = dr_info_a->group;
3104       stmt_vec_info stmtinfo_a = dr_info_a->stmt;
3105       stmt_vec_info lastinfo = NULL;
3106       if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
3107           || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a))
3108         {
3109           ++i;
3110           continue;
3111         }
3112       for (i = i + 1; i < datarefs_copy.length (); ++i)
3113         {
3114           dr_vec_info *dr_info_b = datarefs_copy[i];
3115           data_reference_p drb = dr_info_b->dr;
3116           int drb_group_id = dr_info_b->group;
3117           stmt_vec_info stmtinfo_b = dr_info_b->stmt;
3118           if (!STMT_VINFO_VECTORIZABLE (stmtinfo_b)
3119               || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
3120             break;
3121
3122           /* ???  Imperfect sorting (non-compatible types, non-modulo
3123              accesses, same accesses) can lead to a group to be artificially
3124              split here as we don't just skip over those.  If it really
3125              matters we can push those to a worklist and re-iterate
3126              over them.  The we can just skip ahead to the next DR here.  */
3127
3128           /* DRs in a different DR group should not be put into the same
3129              interleaving group.  */
3130           if (dra_group_id != drb_group_id)
3131             break;
3132
3133           /* Check that the data-refs have same first location (except init)
3134              and they are both either store or load (not load and store,
3135              not masked loads or stores).  */
3136           if (DR_IS_READ (dra) != DR_IS_READ (drb)
3137               || data_ref_compare_tree (DR_BASE_ADDRESS (dra),
3138                                         DR_BASE_ADDRESS (drb)) != 0
3139               || data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)) != 0
3140               || !can_group_stmts_p (stmtinfo_a, stmtinfo_b, true))
3141             break;
3142
3143           /* Check that the data-refs have the same constant size.  */
3144           tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
3145           tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
3146           if (!tree_fits_uhwi_p (sza)
3147               || !tree_fits_uhwi_p (szb)
3148               || !tree_int_cst_equal (sza, szb))
3149             break;
3150
3151           /* Check that the data-refs have the same step.  */
3152           if (data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb)) != 0)
3153             break;
3154
3155           /* Check the types are compatible.
3156              ???  We don't distinguish this during sorting.  */
3157           if (!types_compatible_p (TREE_TYPE (DR_REF (dra)),
3158                                    TREE_TYPE (DR_REF (drb))))
3159             break;
3160
3161           /* Check that the DR_INITs are compile-time constants.  */
3162           if (TREE_CODE (DR_INIT (dra)) != INTEGER_CST
3163               || TREE_CODE (DR_INIT (drb)) != INTEGER_CST)
3164             break;
3165
3166           /* Different .GOMP_SIMD_LANE calls still give the same lane,
3167              just hold extra information.  */
3168           if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_a)
3169               && STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_b)
3170               && data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb)) == 0)
3171             break;
3172
3173           /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb).  */
3174           HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
3175           HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
3176           HOST_WIDE_INT init_prev
3177             = TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1]->dr));
3178           gcc_assert (init_a <= init_b
3179                       && init_a <= init_prev
3180                       && init_prev <= init_b);
3181
3182           /* Do not place the same access in the interleaving chain twice.  */
3183           if (init_b == init_prev)
3184             {
3185               gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1]->dr))
3186                           < gimple_uid (DR_STMT (drb)));
3187               /* Simply link in duplicates and fix up the chain below.  */
3188             }
3189           else
3190             {
3191               /* If init_b == init_a + the size of the type * k, we have an
3192                  interleaving, and DRA is accessed before DRB.  */
3193               HOST_WIDE_INT type_size_a = tree_to_uhwi (sza);
3194               if (type_size_a == 0
3195                   || (init_b - init_a) % type_size_a != 0)
3196                 break;
3197
3198               /* If we have a store, the accesses are adjacent.  This splits
3199                  groups into chunks we support (we don't support vectorization
3200                  of stores with gaps).  */
3201               if (!DR_IS_READ (dra) && init_b - init_prev != type_size_a)
3202                 break;
3203
3204               /* If the step (if not zero or non-constant) is smaller than the
3205                  difference between data-refs' inits this splits groups into
3206                  suitable sizes.  */
3207               if (tree_fits_shwi_p (DR_STEP (dra)))
3208                 {
3209                   unsigned HOST_WIDE_INT step
3210                     = absu_hwi (tree_to_shwi (DR_STEP (dra)));
3211                   if (step != 0
3212                       && step <= (unsigned HOST_WIDE_INT)(init_b - init_a))
3213                     break;
3214                 }
3215             }
3216
3217           if (dump_enabled_p ())
3218             dump_printf_loc (MSG_NOTE, vect_location,
3219                              DR_IS_READ (dra)
3220                              ? "Detected interleaving load %T and %T\n"
3221                              : "Detected interleaving store %T and %T\n",
3222                              DR_REF (dra), DR_REF (drb));
3223
3224           /* Link the found element into the group list.  */
3225           if (!DR_GROUP_FIRST_ELEMENT (stmtinfo_a))
3226             {
3227               DR_GROUP_FIRST_ELEMENT (stmtinfo_a) = stmtinfo_a;
3228               lastinfo = stmtinfo_a;
3229             }
3230           DR_GROUP_FIRST_ELEMENT (stmtinfo_b) = stmtinfo_a;
3231           DR_GROUP_NEXT_ELEMENT (lastinfo) = stmtinfo_b;
3232           lastinfo = stmtinfo_b;
3233
3234           STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a)
3235             = !can_group_stmts_p (stmtinfo_a, stmtinfo_b, false);
3236
3237           if (dump_enabled_p () && STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a))
3238             dump_printf_loc (MSG_NOTE, vect_location,
3239                              "Load suitable for SLP vectorization only.\n");
3240
3241           if (init_b == init_prev
3242               && !to_fixup.add (DR_GROUP_FIRST_ELEMENT (stmtinfo_a))
3243               && dump_enabled_p ())
3244             dump_printf_loc (MSG_NOTE, vect_location,
3245                              "Queuing group with duplicate access for fixup\n");
3246         }
3247     }
3248
3249   /* Fixup groups with duplicate entries by splitting it.  */
3250   while (1)
3251     {
3252       hash_set<stmt_vec_info>::iterator it = to_fixup.begin ();
3253       if (!(it != to_fixup.end ()))
3254         break;
3255       stmt_vec_info grp = *it;
3256       to_fixup.remove (grp);
3257
3258       /* Find the earliest duplicate group member.  */
3259       unsigned first_duplicate = -1u;
3260       stmt_vec_info next, g = grp;
3261       while ((next = DR_GROUP_NEXT_ELEMENT (g)))
3262         {
3263           if (tree_int_cst_equal (DR_INIT (STMT_VINFO_DR_INFO (next)->dr),
3264                                   DR_INIT (STMT_VINFO_DR_INFO (g)->dr))
3265               && gimple_uid (STMT_VINFO_STMT (next)) < first_duplicate)
3266             first_duplicate = gimple_uid (STMT_VINFO_STMT (next));
3267           g = next;
3268         }
3269       if (first_duplicate == -1U)
3270         continue;
3271
3272       /* Then move all stmts after the first duplicate to a new group.
3273          Note this is a heuristic but one with the property that *it
3274          is fixed up completely.  */
3275       g = grp;
3276       stmt_vec_info newgroup = NULL, ng = grp;
3277       while ((next = DR_GROUP_NEXT_ELEMENT (g)))
3278         {
3279           if (gimple_uid (STMT_VINFO_STMT (next)) >= first_duplicate)
3280             {
3281               DR_GROUP_NEXT_ELEMENT (g) = DR_GROUP_NEXT_ELEMENT (next);
3282               if (!newgroup)
3283                 newgroup = next;
3284               else
3285                 DR_GROUP_NEXT_ELEMENT (ng) = next;
3286               ng = next;
3287               DR_GROUP_FIRST_ELEMENT (ng) = newgroup;
3288             }
3289           else
3290             g = DR_GROUP_NEXT_ELEMENT (g);
3291         }
3292       DR_GROUP_NEXT_ELEMENT (ng) = NULL;
3293
3294       /* Fixup the new group which still may contain duplicates.  */
3295       to_fixup.add (newgroup);
3296     }
3297
3298   dr_vec_info *dr_info;
3299   FOR_EACH_VEC_ELT (datarefs_copy, i, dr_info)
3300     {
3301       if (STMT_VINFO_VECTORIZABLE (dr_info->stmt)
3302           && !vect_analyze_data_ref_access (vinfo, dr_info))
3303         {
3304           if (dump_enabled_p ())
3305             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3306                              "not vectorized: complicated access pattern.\n");
3307
3308           if (is_a <bb_vec_info> (vinfo))
3309             {
3310               /* Mark the statement as not vectorizable.  */
3311               STMT_VINFO_VECTORIZABLE (dr_info->stmt) = false;
3312               continue;
3313             }
3314           else
3315             {
3316               datarefs_copy.release ();
3317               return opt_result::failure_at (dr_info->stmt->stmt,
3318                                              "not vectorized:"
3319                                              " complicated access pattern.\n");
3320             }
3321         }
3322     }
3323
3324   datarefs_copy.release ();
3325   return opt_result::success ();
3326 }
3327
3328 /* Function vect_vfa_segment_size.
3329
3330    Input:
3331      DR_INFO: The data reference.
3332      LENGTH_FACTOR: segment length to consider.
3333
3334    Return a value suitable for the dr_with_seg_len::seg_len field.
3335    This is the "distance travelled" by the pointer from the first
3336    iteration in the segment to the last.  Note that it does not include
3337    the size of the access; in effect it only describes the first byte.  */
3338
3339 static tree
3340 vect_vfa_segment_size (dr_vec_info *dr_info, tree length_factor)
3341 {
3342   length_factor = size_binop (MINUS_EXPR,
3343                               fold_convert (sizetype, length_factor),
3344                               size_one_node);
3345   return size_binop (MULT_EXPR, fold_convert (sizetype, DR_STEP (dr_info->dr)),
3346                      length_factor);
3347 }
3348
3349 /* Return a value that, when added to abs (vect_vfa_segment_size (DR_INFO)),
3350    gives the worst-case number of bytes covered by the segment.  */
3351
3352 static unsigned HOST_WIDE_INT
3353 vect_vfa_access_size (vec_info *vinfo, dr_vec_info *dr_info)
3354 {
3355   stmt_vec_info stmt_vinfo = dr_info->stmt;
3356   tree ref_type = TREE_TYPE (DR_REF (dr_info->dr));
3357   unsigned HOST_WIDE_INT ref_size = tree_to_uhwi (TYPE_SIZE_UNIT (ref_type));
3358   unsigned HOST_WIDE_INT access_size = ref_size;
3359   if (DR_GROUP_FIRST_ELEMENT (stmt_vinfo))
3360     {
3361       gcc_assert (DR_GROUP_FIRST_ELEMENT (stmt_vinfo) == stmt_vinfo);
3362       access_size *= DR_GROUP_SIZE (stmt_vinfo) - DR_GROUP_GAP (stmt_vinfo);
3363     }
3364   tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3365   int misalignment;
3366   if (STMT_VINFO_VEC_STMTS (stmt_vinfo).exists ()
3367       && ((misalignment = dr_misalignment (dr_info, vectype)), true)
3368       && (vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment)
3369           == dr_explicit_realign_optimized))
3370     {
3371       /* We might access a full vector's worth.  */
3372       access_size += tree_to_uhwi (TYPE_SIZE_UNIT (vectype)) - ref_size;
3373     }
3374   return access_size;
3375 }
3376
3377 /* Get the minimum alignment for all the scalar accesses that DR_INFO
3378    describes.  */
3379
3380 static unsigned int
3381 vect_vfa_align (dr_vec_info *dr_info)
3382 {
3383   return dr_alignment (dr_info->dr);
3384 }
3385
3386 /* Function vect_no_alias_p.
3387
3388    Given data references A and B with equal base and offset, see whether
3389    the alias relation can be decided at compilation time.  Return 1 if
3390    it can and the references alias, 0 if it can and the references do
3391    not alias, and -1 if we cannot decide at compile time.  SEGMENT_LENGTH_A,
3392    SEGMENT_LENGTH_B, ACCESS_SIZE_A and ACCESS_SIZE_B are the equivalent
3393    of dr_with_seg_len::{seg_len,access_size} for A and B.  */
3394
3395 static int
3396 vect_compile_time_alias (dr_vec_info *a, dr_vec_info *b,
3397                          tree segment_length_a, tree segment_length_b,
3398                          unsigned HOST_WIDE_INT access_size_a,
3399                          unsigned HOST_WIDE_INT access_size_b)
3400 {
3401   poly_offset_int offset_a = wi::to_poly_offset (DR_INIT (a->dr));
3402   poly_offset_int offset_b = wi::to_poly_offset (DR_INIT (b->dr));
3403   poly_uint64 const_length_a;
3404   poly_uint64 const_length_b;
3405
3406   /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
3407      bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
3408      [a, a+12) */
3409   if (tree_int_cst_compare (DR_STEP (a->dr), size_zero_node) < 0)
3410     {
3411       const_length_a = (-wi::to_poly_wide (segment_length_a)).force_uhwi ();
3412       offset_a -= const_length_a;
3413     }
3414   else
3415     const_length_a = tree_to_poly_uint64 (segment_length_a);
3416   if (tree_int_cst_compare (DR_STEP (b->dr), size_zero_node) < 0)
3417     {
3418       const_length_b = (-wi::to_poly_wide (segment_length_b)).force_uhwi ();
3419       offset_b -= const_length_b;
3420     }
3421   else
3422     const_length_b = tree_to_poly_uint64 (segment_length_b);
3423
3424   const_length_a += access_size_a;
3425   const_length_b += access_size_b;
3426
3427   if (ranges_known_overlap_p (offset_a, const_length_a,
3428                               offset_b, const_length_b))
3429     return 1;
3430
3431   if (!ranges_maybe_overlap_p (offset_a, const_length_a,
3432                                offset_b, const_length_b))
3433     return 0;
3434
3435   return -1;
3436 }
3437
3438 /* Return true if the minimum nonzero dependence distance for loop LOOP_DEPTH
3439    in DDR is >= VF.  */
3440
3441 static bool
3442 dependence_distance_ge_vf (data_dependence_relation *ddr,
3443                            unsigned int loop_depth, poly_uint64 vf)
3444 {
3445   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE
3446       || DDR_NUM_DIST_VECTS (ddr) == 0)
3447     return false;
3448
3449   /* If the dependence is exact, we should have limited the VF instead.  */
3450   gcc_checking_assert (DDR_COULD_BE_INDEPENDENT_P (ddr));
3451
3452   unsigned int i;
3453   lambda_vector dist_v;
3454   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
3455     {
3456       HOST_WIDE_INT dist = dist_v[loop_depth];
3457       if (dist != 0
3458           && !(dist > 0 && DDR_REVERSED_P (ddr))
3459           && maybe_lt ((unsigned HOST_WIDE_INT) abs_hwi (dist), vf))
3460         return false;
3461     }
3462
3463   if (dump_enabled_p ())
3464     dump_printf_loc (MSG_NOTE, vect_location,
3465                      "dependence distance between %T and %T is >= VF\n",
3466                      DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
3467
3468   return true;
3469 }
3470
3471 /* Dump LOWER_BOUND using flags DUMP_KIND.  Dumps are known to be enabled.  */
3472
3473 static void
3474 dump_lower_bound (dump_flags_t dump_kind, const vec_lower_bound &lower_bound)
3475 {
3476   dump_printf (dump_kind, "%s (%T) >= ",
3477                lower_bound.unsigned_p ? "unsigned" : "abs",
3478                lower_bound.expr);
3479   dump_dec (dump_kind, lower_bound.min_value);
3480 }
3481
3482 /* Record that the vectorized loop requires the vec_lower_bound described
3483    by EXPR, UNSIGNED_P and MIN_VALUE.  */
3484
3485 static void
3486 vect_check_lower_bound (loop_vec_info loop_vinfo, tree expr, bool unsigned_p,
3487                         poly_uint64 min_value)
3488 {
3489   vec<vec_lower_bound> &lower_bounds
3490     = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
3491   for (unsigned int i = 0; i < lower_bounds.length (); ++i)
3492     if (operand_equal_p (lower_bounds[i].expr, expr, 0))
3493       {
3494         unsigned_p &= lower_bounds[i].unsigned_p;
3495         min_value = upper_bound (lower_bounds[i].min_value, min_value);
3496         if (lower_bounds[i].unsigned_p != unsigned_p
3497             || maybe_lt (lower_bounds[i].min_value, min_value))
3498           {
3499             lower_bounds[i].unsigned_p = unsigned_p;
3500             lower_bounds[i].min_value = min_value;
3501             if (dump_enabled_p ())
3502               {
3503                 dump_printf_loc (MSG_NOTE, vect_location,
3504                                  "updating run-time check to ");
3505                 dump_lower_bound (MSG_NOTE, lower_bounds[i]);
3506                 dump_printf (MSG_NOTE, "\n");
3507               }
3508           }
3509         return;
3510       }
3511
3512   vec_lower_bound lower_bound (expr, unsigned_p, min_value);
3513   if (dump_enabled_p ())
3514     {
3515       dump_printf_loc (MSG_NOTE, vect_location, "need a run-time check that ");
3516       dump_lower_bound (MSG_NOTE, lower_bound);
3517       dump_printf (MSG_NOTE, "\n");
3518     }
3519   LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).safe_push (lower_bound);
3520 }
3521
3522 /* Return true if it's unlikely that the step of the vectorized form of DR_INFO
3523    will span fewer than GAP bytes.  */
3524
3525 static bool
3526 vect_small_gap_p (loop_vec_info loop_vinfo, dr_vec_info *dr_info,
3527                   poly_int64 gap)
3528 {
3529   stmt_vec_info stmt_info = dr_info->stmt;
3530   HOST_WIDE_INT count
3531     = estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
3532   if (DR_GROUP_FIRST_ELEMENT (stmt_info))
3533     count *= DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info));
3534   return (estimated_poly_value (gap)
3535           <= count * vect_get_scalar_dr_size (dr_info));
3536 }
3537
3538 /* Return true if we know that there is no alias between DR_INFO_A and
3539    DR_INFO_B when abs (DR_STEP (DR_INFO_A->dr)) >= N for some N.
3540    When returning true, set *LOWER_BOUND_OUT to this N.  */
3541
3542 static bool
3543 vectorizable_with_step_bound_p (dr_vec_info *dr_info_a, dr_vec_info *dr_info_b,
3544                                 poly_uint64 *lower_bound_out)
3545 {
3546   /* Check that there is a constant gap of known sign between DR_A
3547      and DR_B.  */
3548   data_reference *dr_a = dr_info_a->dr;
3549   data_reference *dr_b = dr_info_b->dr;
3550   poly_int64 init_a, init_b;
3551   if (!operand_equal_p (DR_BASE_ADDRESS (dr_a), DR_BASE_ADDRESS (dr_b), 0)
3552       || !operand_equal_p (DR_OFFSET (dr_a), DR_OFFSET (dr_b), 0)
3553       || !operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0)
3554       || !poly_int_tree_p (DR_INIT (dr_a), &init_a)
3555       || !poly_int_tree_p (DR_INIT (dr_b), &init_b)
3556       || !ordered_p (init_a, init_b))
3557     return false;
3558
3559   /* Sort DR_A and DR_B by the address they access.  */
3560   if (maybe_lt (init_b, init_a))
3561     {
3562       std::swap (init_a, init_b);
3563       std::swap (dr_info_a, dr_info_b);
3564       std::swap (dr_a, dr_b);
3565     }
3566
3567   /* If the two accesses could be dependent within a scalar iteration,
3568      make sure that we'd retain their order.  */
3569   if (maybe_gt (init_a + vect_get_scalar_dr_size (dr_info_a), init_b)
3570       && !vect_preserves_scalar_order_p (dr_info_a, dr_info_b))
3571     return false;
3572
3573   /* There is no alias if abs (DR_STEP) is greater than or equal to
3574      the bytes spanned by the combination of the two accesses.  */
3575   *lower_bound_out = init_b + vect_get_scalar_dr_size (dr_info_b) - init_a;
3576   return true;
3577 }
3578
3579 /* Function vect_prune_runtime_alias_test_list.
3580
3581    Prune a list of ddrs to be tested at run-time by versioning for alias.
3582    Merge several alias checks into one if possible.
3583    Return FALSE if resulting list of ddrs is longer then allowed by
3584    PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE.  */
3585
3586 opt_result
3587 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
3588 {
3589   typedef pair_hash <tree_operand_hash, tree_operand_hash> tree_pair_hash;
3590   hash_set <tree_pair_hash> compared_objects;
3591
3592   const vec<ddr_p> &may_alias_ddrs = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
3593   vec<dr_with_seg_len_pair_t> &comp_alias_ddrs
3594     = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
3595   const vec<vec_object_pair> &check_unequal_addrs
3596     = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
3597   poly_uint64 vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3598   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
3599
3600   ddr_p ddr;
3601   unsigned int i;
3602   tree length_factor;
3603
3604   DUMP_VECT_SCOPE ("vect_prune_runtime_alias_test_list");
3605
3606   /* Step values are irrelevant for aliasing if the number of vector
3607      iterations is equal to the number of scalar iterations (which can
3608      happen for fully-SLP loops).  */
3609   bool vf_one_p = known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), 1U);
3610
3611   if (!vf_one_p)
3612     {
3613       /* Convert the checks for nonzero steps into bound tests.  */
3614       tree value;
3615       FOR_EACH_VEC_ELT (LOOP_VINFO_CHECK_NONZERO (loop_vinfo), i, value)
3616         vect_check_lower_bound (loop_vinfo, value, true, 1);
3617     }
3618
3619   if (may_alias_ddrs.is_empty ())
3620     return opt_result::success ();
3621
3622   comp_alias_ddrs.create (may_alias_ddrs.length ());
3623
3624   unsigned int loop_depth
3625     = index_in_loop_nest (LOOP_VINFO_LOOP (loop_vinfo)->num,
3626                           LOOP_VINFO_LOOP_NEST (loop_vinfo));
3627
3628   /* First, we collect all data ref pairs for aliasing checks.  */
3629   FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
3630     {
3631       poly_uint64 lower_bound;
3632       tree segment_length_a, segment_length_b;
3633       unsigned HOST_WIDE_INT access_size_a, access_size_b;
3634       unsigned int align_a, align_b;
3635
3636       /* Ignore the alias if the VF we chose ended up being no greater
3637          than the dependence distance.  */
3638       if (dependence_distance_ge_vf (ddr, loop_depth, vect_factor))
3639         continue;
3640
3641       if (DDR_OBJECT_A (ddr))
3642         {
3643           vec_object_pair new_pair (DDR_OBJECT_A (ddr), DDR_OBJECT_B (ddr));
3644           if (!compared_objects.add (new_pair))
3645             {
3646               if (dump_enabled_p ())
3647                 dump_printf_loc (MSG_NOTE, vect_location,
3648                                  "checking that %T and %T"
3649                                  " have different addresses\n",
3650                                  new_pair.first, new_pair.second);
3651               LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).safe_push (new_pair);
3652             }
3653           continue;
3654         }
3655
3656       dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (DDR_A (ddr));
3657       stmt_vec_info stmt_info_a = dr_info_a->stmt;
3658
3659       dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr));
3660       stmt_vec_info stmt_info_b = dr_info_b->stmt;
3661
3662       bool preserves_scalar_order_p
3663         = vect_preserves_scalar_order_p (dr_info_a, dr_info_b);
3664       bool ignore_step_p
3665           = (vf_one_p
3666              && (preserves_scalar_order_p
3667                  || operand_equal_p (DR_STEP (dr_info_a->dr),
3668                                      DR_STEP (dr_info_b->dr))));
3669
3670       /* Skip the pair if inter-iteration dependencies are irrelevant
3671          and intra-iteration dependencies are guaranteed to be honored.  */
3672       if (ignore_step_p
3673           && (preserves_scalar_order_p
3674               || vectorizable_with_step_bound_p (dr_info_a, dr_info_b,
3675                                                  &lower_bound)))
3676         {
3677           if (dump_enabled_p ())
3678             dump_printf_loc (MSG_NOTE, vect_location,
3679                              "no need for alias check between "
3680                              "%T and %T when VF is 1\n",
3681                              DR_REF (dr_info_a->dr), DR_REF (dr_info_b->dr));
3682           continue;
3683         }
3684
3685       /* See whether we can handle the alias using a bounds check on
3686          the step, and whether that's likely to be the best approach.
3687          (It might not be, for example, if the minimum step is much larger
3688          than the number of bytes handled by one vector iteration.)  */
3689       if (!ignore_step_p
3690           && TREE_CODE (DR_STEP (dr_info_a->dr)) != INTEGER_CST
3691           && vectorizable_with_step_bound_p (dr_info_a, dr_info_b,
3692                                              &lower_bound)
3693           && (vect_small_gap_p (loop_vinfo, dr_info_a, lower_bound)
3694               || vect_small_gap_p (loop_vinfo, dr_info_b, lower_bound)))
3695         {
3696           bool unsigned_p = dr_known_forward_stride_p (dr_info_a->dr);
3697           if (dump_enabled_p ())
3698             {
3699               dump_printf_loc (MSG_NOTE, vect_location, "no alias between "
3700                                "%T and %T when the step %T is outside ",
3701                                DR_REF (dr_info_a->dr),
3702                                DR_REF (dr_info_b->dr),
3703                                DR_STEP (dr_info_a->dr));
3704               if (unsigned_p)
3705                 dump_printf (MSG_NOTE, "[0");
3706               else
3707                 {
3708                   dump_printf (MSG_NOTE, "(");
3709                   dump_dec (MSG_NOTE, poly_int64 (-lower_bound));
3710                 }
3711               dump_printf (MSG_NOTE, ", ");
3712               dump_dec (MSG_NOTE, lower_bound);
3713               dump_printf (MSG_NOTE, ")\n");
3714             }
3715           vect_check_lower_bound (loop_vinfo, DR_STEP (dr_info_a->dr),
3716                                   unsigned_p, lower_bound);
3717           continue;
3718         }
3719
3720       stmt_vec_info dr_group_first_a = DR_GROUP_FIRST_ELEMENT (stmt_info_a);
3721       if (dr_group_first_a)
3722         {
3723           stmt_info_a = dr_group_first_a;
3724           dr_info_a = STMT_VINFO_DR_INFO (stmt_info_a);
3725         }
3726
3727       stmt_vec_info dr_group_first_b = DR_GROUP_FIRST_ELEMENT (stmt_info_b);
3728       if (dr_group_first_b)
3729         {
3730           stmt_info_b = dr_group_first_b;
3731           dr_info_b = STMT_VINFO_DR_INFO (stmt_info_b);
3732         }
3733
3734       if (ignore_step_p)
3735         {
3736           segment_length_a = size_zero_node;
3737           segment_length_b = size_zero_node;
3738         }
3739       else
3740         {
3741           if (!operand_equal_p (DR_STEP (dr_info_a->dr),
3742                                 DR_STEP (dr_info_b->dr), 0))
3743             length_factor = scalar_loop_iters;
3744           else
3745             length_factor = size_int (vect_factor);
3746           segment_length_a = vect_vfa_segment_size (dr_info_a, length_factor);
3747           segment_length_b = vect_vfa_segment_size (dr_info_b, length_factor);
3748         }
3749       access_size_a = vect_vfa_access_size (loop_vinfo, dr_info_a);
3750       access_size_b = vect_vfa_access_size (loop_vinfo, dr_info_b);
3751       align_a = vect_vfa_align (dr_info_a);
3752       align_b = vect_vfa_align (dr_info_b);
3753
3754       /* See whether the alias is known at compilation time.  */
3755       if (operand_equal_p (DR_BASE_ADDRESS (dr_info_a->dr),
3756                            DR_BASE_ADDRESS (dr_info_b->dr), 0)
3757           && operand_equal_p (DR_OFFSET (dr_info_a->dr),
3758                               DR_OFFSET (dr_info_b->dr), 0)
3759           && TREE_CODE (DR_STEP (dr_info_a->dr)) == INTEGER_CST
3760           && TREE_CODE (DR_STEP (dr_info_b->dr)) == INTEGER_CST
3761           && poly_int_tree_p (segment_length_a)
3762           && poly_int_tree_p (segment_length_b))
3763         {
3764           int res = vect_compile_time_alias (dr_info_a, dr_info_b,
3765                                              segment_length_a,
3766                                              segment_length_b,
3767                                              access_size_a,
3768                                              access_size_b);
3769           if (res >= 0 && dump_enabled_p ())
3770             {
3771               dump_printf_loc (MSG_NOTE, vect_location,
3772                                "can tell at compile time that %T and %T",
3773                                DR_REF (dr_info_a->dr), DR_REF (dr_info_b->dr));
3774               if (res == 0)
3775                 dump_printf (MSG_NOTE, " do not alias\n");
3776               else
3777                 dump_printf (MSG_NOTE, " alias\n");
3778             }
3779
3780           if (res == 0)
3781             continue;
3782
3783           if (res == 1)
3784             return opt_result::failure_at (stmt_info_b->stmt,
3785                                            "not vectorized:"
3786                                            " compilation time alias: %G%G",
3787                                            stmt_info_a->stmt,
3788                                            stmt_info_b->stmt);
3789         }
3790
3791       dr_with_seg_len dr_a (dr_info_a->dr, segment_length_a,
3792                             access_size_a, align_a);
3793       dr_with_seg_len dr_b (dr_info_b->dr, segment_length_b,
3794                             access_size_b, align_b);
3795       /* Canonicalize the order to be the one that's needed for accurate
3796          RAW, WAR and WAW flags, in cases where the data references are
3797          well-ordered.  The order doesn't really matter otherwise,
3798          but we might as well be consistent.  */
3799       if (get_later_stmt (stmt_info_a, stmt_info_b) == stmt_info_a)
3800         std::swap (dr_a, dr_b);
3801
3802       dr_with_seg_len_pair_t dr_with_seg_len_pair
3803         (dr_a, dr_b, (preserves_scalar_order_p
3804                       ? dr_with_seg_len_pair_t::WELL_ORDERED
3805                       : dr_with_seg_len_pair_t::REORDERED));
3806
3807       comp_alias_ddrs.safe_push (dr_with_seg_len_pair);
3808     }
3809
3810   prune_runtime_alias_test_list (&comp_alias_ddrs, vect_factor);
3811
3812   unsigned int count = (comp_alias_ddrs.length ()
3813                         + check_unequal_addrs.length ());
3814
3815   if (count
3816       && (loop_cost_model (LOOP_VINFO_LOOP (loop_vinfo))
3817           == VECT_COST_MODEL_VERY_CHEAP))
3818     return opt_result::failure_at
3819       (vect_location, "would need a runtime alias check\n");
3820
3821   if (dump_enabled_p ())
3822     dump_printf_loc (MSG_NOTE, vect_location,
3823                      "improved number of alias checks from %d to %d\n",
3824                      may_alias_ddrs.length (), count);
3825   unsigned limit = param_vect_max_version_for_alias_checks;
3826   if (loop_cost_model (LOOP_VINFO_LOOP (loop_vinfo)) == VECT_COST_MODEL_CHEAP)
3827     limit = param_vect_max_version_for_alias_checks * 6 / 10;
3828   if (count > limit)
3829     return opt_result::failure_at
3830       (vect_location,
3831        "number of versioning for alias run-time tests exceeds %d "
3832        "(--param vect-max-version-for-alias-checks)\n", limit);
3833
3834   return opt_result::success ();
3835 }
3836
3837 /* Check whether we can use an internal function for a gather load
3838    or scatter store.  READ_P is true for loads and false for stores.
3839    MASKED_P is true if the load or store is conditional.  MEMORY_TYPE is
3840    the type of the memory elements being loaded or stored.  OFFSET_TYPE
3841    is the type of the offset that is being applied to the invariant
3842    base address.  SCALE is the amount by which the offset should
3843    be multiplied *after* it has been converted to address width.
3844
3845    Return true if the function is supported, storing the function id in
3846    *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT.  */
3847
3848 bool
3849 vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
3850                           tree vectype, tree memory_type, tree offset_type,
3851                           int scale, internal_fn *ifn_out,
3852                           tree *offset_vectype_out)
3853 {
3854   unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
3855   unsigned int element_bits = vector_element_bits (vectype);
3856   if (element_bits != memory_bits)
3857     /* For now the vector elements must be the same width as the
3858        memory elements.  */
3859     return false;
3860
3861   /* Work out which function we need.  */
3862   internal_fn ifn, alt_ifn;
3863   if (read_p)
3864     {
3865       ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
3866       alt_ifn = IFN_MASK_GATHER_LOAD;
3867     }
3868   else
3869     {
3870       ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE;
3871       alt_ifn = IFN_MASK_SCATTER_STORE;
3872     }
3873
3874   for (;;)
3875     {
3876       tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type);
3877       if (!offset_vectype)
3878         return false;
3879
3880       /* Test whether the target supports this combination.  */
3881       if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
3882                                                   offset_vectype, scale))
3883         {
3884           *ifn_out = ifn;
3885           *offset_vectype_out = offset_vectype;
3886           return true;
3887         }
3888       else if (!masked_p
3889                && internal_gather_scatter_fn_supported_p (alt_ifn, vectype,
3890                                                           memory_type,
3891                                                           offset_vectype,
3892                                                           scale))
3893         {
3894           *ifn_out = alt_ifn;
3895           *offset_vectype_out = offset_vectype;
3896           return true;
3897         }
3898
3899       if (TYPE_PRECISION (offset_type) >= POINTER_SIZE
3900           && TYPE_PRECISION (offset_type) >= element_bits)
3901         return false;
3902
3903       offset_type = build_nonstandard_integer_type
3904         (TYPE_PRECISION (offset_type) * 2, TYPE_UNSIGNED (offset_type));
3905     }
3906 }
3907
3908 /* STMT_INFO is a call to an internal gather load or scatter store function.
3909    Describe the operation in INFO.  */
3910
3911 static void
3912 vect_describe_gather_scatter_call (stmt_vec_info stmt_info,
3913                                    gather_scatter_info *info)
3914 {
3915   gcall *call = as_a <gcall *> (stmt_info->stmt);
3916   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3917   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3918
3919   info->ifn = gimple_call_internal_fn (call);
3920   info->decl = NULL_TREE;
3921   info->base = gimple_call_arg (call, 0);
3922   info->offset = gimple_call_arg (call, 1);
3923   info->offset_dt = vect_unknown_def_type;
3924   info->offset_vectype = NULL_TREE;
3925   info->scale = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
3926   info->element_type = TREE_TYPE (vectype);
3927   info->memory_type = TREE_TYPE (DR_REF (dr));
3928 }
3929
3930 /* Return true if a non-affine read or write in STMT_INFO is suitable for a
3931    gather load or scatter store.  Describe the operation in *INFO if so.  */
3932
3933 bool
3934 vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
3935                            gather_scatter_info *info)
3936 {
3937   HOST_WIDE_INT scale = 1;
3938   poly_int64 pbitpos, pbitsize;
3939   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3940   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3941   tree offtype = NULL_TREE;
3942   tree decl = NULL_TREE, base, off;
3943   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3944   tree memory_type = TREE_TYPE (DR_REF (dr));
3945   machine_mode pmode;
3946   int punsignedp, reversep, pvolatilep = 0;
3947   internal_fn ifn;
3948   tree offset_vectype;
3949   bool masked_p = false;
3950
3951   /* See whether this is already a call to a gather/scatter internal function.
3952      If not, see whether it's a masked load or store.  */
3953   gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
3954   if (call && gimple_call_internal_p (call))
3955     {
3956       ifn = gimple_call_internal_fn (call);
3957       if (internal_gather_scatter_fn_p (ifn))
3958         {
3959           vect_describe_gather_scatter_call (stmt_info, info);
3960           return true;
3961         }
3962       masked_p = (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE);
3963     }
3964
3965   /* True if we should aim to use internal functions rather than
3966      built-in functions.  */
3967   bool use_ifn_p = (DR_IS_READ (dr)
3968                     ? supports_vec_gather_load_p (TYPE_MODE (vectype))
3969                     : supports_vec_scatter_store_p (TYPE_MODE (vectype)));
3970
3971   base = DR_REF (dr);
3972   /* For masked loads/stores, DR_REF (dr) is an artificial MEM_REF,
3973      see if we can use the def stmt of the address.  */
3974   if (masked_p
3975       && TREE_CODE (base) == MEM_REF
3976       && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
3977       && integer_zerop (TREE_OPERAND (base, 1))
3978       && !expr_invariant_in_loop_p (loop, TREE_OPERAND (base, 0)))
3979     {
3980       gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base, 0));
3981       if (is_gimple_assign (def_stmt)
3982           && gimple_assign_rhs_code (def_stmt) == ADDR_EXPR)
3983         base = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
3984     }
3985
3986   /* The gather and scatter builtins need address of the form
3987      loop_invariant + vector * {1, 2, 4, 8}
3988      or
3989      loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
3990      Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
3991      of loop invariants/SSA_NAMEs defined in the loop, with casts,
3992      multiplications and additions in it.  To get a vector, we need
3993      a single SSA_NAME that will be defined in the loop and will
3994      contain everything that is not loop invariant and that can be
3995      vectorized.  The following code attempts to find such a preexistng
3996      SSA_NAME OFF and put the loop invariants into a tree BASE
3997      that can be gimplified before the loop.  */
3998   base = get_inner_reference (base, &pbitsize, &pbitpos, &off, &pmode,
3999                               &punsignedp, &reversep, &pvolatilep);
4000   if (reversep)
4001     return false;
4002
4003   poly_int64 pbytepos = exact_div (pbitpos, BITS_PER_UNIT);
4004
4005   if (TREE_CODE (base) == MEM_REF)
4006     {
4007       if (!integer_zerop (TREE_OPERAND (base, 1)))
4008         {
4009           if (off == NULL_TREE)
4010             off = wide_int_to_tree (sizetype, mem_ref_offset (base));
4011           else
4012             off = size_binop (PLUS_EXPR, off,
4013                               fold_convert (sizetype, TREE_OPERAND (base, 1)));
4014         }
4015       base = TREE_OPERAND (base, 0);
4016     }
4017   else
4018     base = build_fold_addr_expr (base);
4019
4020   if (off == NULL_TREE)
4021     off = size_zero_node;
4022
4023   /* If base is not loop invariant, either off is 0, then we start with just
4024      the constant offset in the loop invariant BASE and continue with base
4025      as OFF, otherwise give up.
4026      We could handle that case by gimplifying the addition of base + off
4027      into some SSA_NAME and use that as off, but for now punt.  */
4028   if (!expr_invariant_in_loop_p (loop, base))
4029     {
4030       if (!integer_zerop (off))
4031         return false;
4032       off = base;
4033       base = size_int (pbytepos);
4034     }
4035   /* Otherwise put base + constant offset into the loop invariant BASE
4036      and continue with OFF.  */
4037   else
4038     {
4039       base = fold_convert (sizetype, base);
4040       base = size_binop (PLUS_EXPR, base, size_int (pbytepos));
4041     }
4042
4043   /* OFF at this point may be either a SSA_NAME or some tree expression
4044      from get_inner_reference.  Try to peel off loop invariants from it
4045      into BASE as long as possible.  */
4046   STRIP_NOPS (off);
4047   while (offtype == NULL_TREE)
4048     {
4049       enum tree_code code;
4050       tree op0, op1, add = NULL_TREE;
4051
4052       if (TREE_CODE (off) == SSA_NAME)
4053         {
4054           gimple *def_stmt = SSA_NAME_DEF_STMT (off);
4055
4056           if (expr_invariant_in_loop_p (loop, off))
4057             return false;
4058
4059           if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
4060             break;
4061
4062           op0 = gimple_assign_rhs1 (def_stmt);
4063           code = gimple_assign_rhs_code (def_stmt);
4064           op1 = gimple_assign_rhs2 (def_stmt);
4065         }
4066       else
4067         {
4068           if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
4069             return false;
4070           code = TREE_CODE (off);
4071           extract_ops_from_tree (off, &code, &op0, &op1);
4072         }
4073       switch (code)
4074         {
4075         case POINTER_PLUS_EXPR:
4076         case PLUS_EXPR:
4077           if (expr_invariant_in_loop_p (loop, op0))
4078             {
4079               add = op0;
4080               off = op1;
4081             do_add:
4082               add = fold_convert (sizetype, add);
4083               if (scale != 1)
4084                 add = size_binop (MULT_EXPR, add, size_int (scale));
4085               base = size_binop (PLUS_EXPR, base, add);
4086               continue;
4087             }
4088           if (expr_invariant_in_loop_p (loop, op1))
4089             {
4090               add = op1;
4091               off = op0;
4092               goto do_add;
4093             }
4094           break;
4095         case MINUS_EXPR:
4096           if (expr_invariant_in_loop_p (loop, op1))
4097             {
4098               add = fold_convert (sizetype, op1);
4099               add = size_binop (MINUS_EXPR, size_zero_node, add);
4100               off = op0;
4101               goto do_add;
4102             }
4103           break;
4104         case MULT_EXPR:
4105           if (scale == 1 && tree_fits_shwi_p (op1))
4106             {
4107               int new_scale = tree_to_shwi (op1);
4108               /* Only treat this as a scaling operation if the target
4109                  supports it for at least some offset type.  */
4110               if (use_ifn_p
4111                   && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
4112                                                 masked_p, vectype, memory_type,
4113                                                 signed_char_type_node,
4114                                                 new_scale, &ifn,
4115                                                 &offset_vectype)
4116                   && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
4117                                                 masked_p, vectype, memory_type,
4118                                                 unsigned_char_type_node,
4119                                                 new_scale, &ifn,
4120                                                 &offset_vectype))
4121                 break;
4122               scale = new_scale;
4123               off = op0;
4124               continue;
4125             }
4126           break;
4127         case SSA_NAME:
4128           off = op0;
4129           continue;
4130         CASE_CONVERT:
4131           if (!POINTER_TYPE_P (TREE_TYPE (op0))
4132               && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
4133             break;
4134
4135           /* Don't include the conversion if the target is happy with
4136              the current offset type.  */
4137           if (use_ifn_p
4138               && !POINTER_TYPE_P (TREE_TYPE (off))
4139               && vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
4140                                            masked_p, vectype, memory_type,
4141                                            TREE_TYPE (off), scale, &ifn,
4142                                            &offset_vectype))
4143             break;
4144
4145           if (TYPE_PRECISION (TREE_TYPE (op0))
4146               == TYPE_PRECISION (TREE_TYPE (off)))
4147             {
4148               off = op0;
4149               continue;
4150             }
4151
4152           /* Include the conversion if it is widening and we're using
4153              the IFN path or the target can handle the converted from
4154              offset or the current size is not already the same as the
4155              data vector element size.  */
4156           if ((TYPE_PRECISION (TREE_TYPE (op0))
4157                < TYPE_PRECISION (TREE_TYPE (off)))
4158               && (use_ifn_p
4159                   || (DR_IS_READ (dr)
4160                       ? (targetm.vectorize.builtin_gather
4161                          && targetm.vectorize.builtin_gather (vectype,
4162                                                               TREE_TYPE (op0),
4163                                                               scale))
4164                       : (targetm.vectorize.builtin_scatter
4165                          && targetm.vectorize.builtin_scatter (vectype,
4166                                                                TREE_TYPE (op0),
4167                                                                scale)))
4168                   || !operand_equal_p (TYPE_SIZE (TREE_TYPE (off)),
4169                                        TYPE_SIZE (TREE_TYPE (vectype)), 0)))
4170             {
4171               off = op0;
4172               offtype = TREE_TYPE (off);
4173               STRIP_NOPS (off);
4174               continue;
4175             }
4176           break;
4177         default:
4178           break;
4179         }
4180       break;
4181     }
4182
4183   /* If at the end OFF still isn't a SSA_NAME or isn't
4184      defined in the loop, punt.  */
4185   if (TREE_CODE (off) != SSA_NAME
4186       || expr_invariant_in_loop_p (loop, off))
4187     return false;
4188
4189   if (offtype == NULL_TREE)
4190     offtype = TREE_TYPE (off);
4191
4192   if (use_ifn_p)
4193     {
4194       if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
4195                                      vectype, memory_type, offtype, scale,
4196                                      &ifn, &offset_vectype))
4197         ifn = IFN_LAST;
4198       decl = NULL_TREE;
4199     }
4200   else
4201     {
4202       if (DR_IS_READ (dr))
4203         {
4204           if (targetm.vectorize.builtin_gather)
4205             decl = targetm.vectorize.builtin_gather (vectype, offtype, scale);
4206         }
4207       else
4208         {
4209           if (targetm.vectorize.builtin_scatter)
4210             decl = targetm.vectorize.builtin_scatter (vectype, offtype, scale);
4211         }
4212       ifn = IFN_LAST;
4213       /* The offset vector type will be read from DECL when needed.  */
4214       offset_vectype = NULL_TREE;
4215     }
4216
4217   info->ifn = ifn;
4218   info->decl = decl;
4219   info->base = base;
4220   info->offset = off;
4221   info->offset_dt = vect_unknown_def_type;
4222   info->offset_vectype = offset_vectype;
4223   info->scale = scale;
4224   info->element_type = TREE_TYPE (vectype);
4225   info->memory_type = memory_type;
4226   return true;
4227 }
4228
4229 /* Find the data references in STMT, analyze them with respect to LOOP and
4230    append them to DATAREFS.  Return false if datarefs in this stmt cannot
4231    be handled.  */
4232
4233 opt_result
4234 vect_find_stmt_data_reference (loop_p loop, gimple *stmt,
4235                                vec<data_reference_p> *datarefs,
4236                                vec<int> *dataref_groups, int group_id)
4237 {
4238   /* We can ignore clobbers for dataref analysis - they are removed during
4239      loop vectorization and BB vectorization checks dependences with a
4240      stmt walk.  */
4241   if (gimple_clobber_p (stmt))
4242     return opt_result::success ();
4243
4244   if (gimple_has_volatile_ops (stmt))
4245     return opt_result::failure_at (stmt, "not vectorized: volatile type: %G",
4246                                    stmt);
4247
4248   if (stmt_can_throw_internal (cfun, stmt))
4249     return opt_result::failure_at (stmt,
4250                                    "not vectorized:"
4251                                    " statement can throw an exception: %G",
4252                                    stmt);
4253
4254   auto_vec<data_reference_p, 2> refs;
4255   opt_result res = find_data_references_in_stmt (loop, stmt, &refs);
4256   if (!res)
4257     return res;
4258
4259   if (refs.is_empty ())
4260     return opt_result::success ();
4261
4262   if (refs.length () > 1)
4263     {
4264       while (!refs.is_empty ())
4265         free_data_ref (refs.pop ());
4266       return opt_result::failure_at (stmt,
4267                                      "not vectorized: more than one "
4268                                      "data ref in stmt: %G", stmt);
4269     }
4270
4271   data_reference_p dr = refs.pop ();
4272   if (gcall *call = dyn_cast <gcall *> (stmt))
4273     if (!gimple_call_internal_p (call)
4274         || (gimple_call_internal_fn (call) != IFN_MASK_LOAD
4275             && gimple_call_internal_fn (call) != IFN_MASK_STORE))
4276       {
4277         free_data_ref (dr);
4278         return opt_result::failure_at (stmt,
4279                                        "not vectorized: dr in a call %G", stmt);
4280       }
4281
4282   if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
4283       && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
4284     {
4285       free_data_ref (dr);
4286       return opt_result::failure_at (stmt,
4287                                      "not vectorized:"
4288                                      " statement is bitfield access %G", stmt);
4289     }
4290
4291   if (DR_BASE_ADDRESS (dr)
4292       && TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
4293     {
4294       free_data_ref (dr);
4295       return opt_result::failure_at (stmt,
4296                                      "not vectorized:"
4297                                      " base addr of dr is a constant\n");
4298     }
4299
4300   /* Check whether this may be a SIMD lane access and adjust the
4301      DR to make it easier for us to handle it.  */
4302   if (loop
4303       && loop->simduid
4304       && (!DR_BASE_ADDRESS (dr)
4305           || !DR_OFFSET (dr)
4306           || !DR_INIT (dr)
4307           || !DR_STEP (dr)))
4308     {
4309       struct data_reference *newdr
4310         = create_data_ref (NULL, loop_containing_stmt (stmt), DR_REF (dr), stmt,
4311                            DR_IS_READ (dr), DR_IS_CONDITIONAL_IN_STMT (dr));
4312       if (DR_BASE_ADDRESS (newdr)
4313           && DR_OFFSET (newdr)
4314           && DR_INIT (newdr)
4315           && DR_STEP (newdr)
4316           && TREE_CODE (DR_INIT (newdr)) == INTEGER_CST
4317           && integer_zerop (DR_STEP (newdr)))
4318         {
4319           tree base_address = DR_BASE_ADDRESS (newdr);
4320           tree off = DR_OFFSET (newdr);
4321           tree step = ssize_int (1);
4322           if (integer_zerop (off)
4323               && TREE_CODE (base_address) == POINTER_PLUS_EXPR)
4324             {
4325               off = TREE_OPERAND (base_address, 1);
4326               base_address = TREE_OPERAND (base_address, 0);
4327             }
4328           STRIP_NOPS (off);
4329           if (TREE_CODE (off) == MULT_EXPR
4330               && tree_fits_uhwi_p (TREE_OPERAND (off, 1)))
4331             {
4332               step = TREE_OPERAND (off, 1);
4333               off = TREE_OPERAND (off, 0);
4334               STRIP_NOPS (off);
4335             }
4336           if (CONVERT_EXPR_P (off)
4337               && (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (off, 0)))
4338                   < TYPE_PRECISION (TREE_TYPE (off))))
4339             off = TREE_OPERAND (off, 0);
4340           if (TREE_CODE (off) == SSA_NAME)
4341             {
4342               gimple *def = SSA_NAME_DEF_STMT (off);
4343               /* Look through widening conversion.  */
4344               if (is_gimple_assign (def)
4345                   && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
4346                 {
4347                   tree rhs1 = gimple_assign_rhs1 (def);
4348                   if (TREE_CODE (rhs1) == SSA_NAME
4349                       && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
4350                       && (TYPE_PRECISION (TREE_TYPE (off))
4351                           > TYPE_PRECISION (TREE_TYPE (rhs1))))
4352                     def = SSA_NAME_DEF_STMT (rhs1);
4353                 }
4354               if (is_gimple_call (def)
4355                   && gimple_call_internal_p (def)
4356                   && (gimple_call_internal_fn (def) == IFN_GOMP_SIMD_LANE))
4357                 {
4358                   tree arg = gimple_call_arg (def, 0);
4359                   tree reft = TREE_TYPE (DR_REF (newdr));
4360                   gcc_assert (TREE_CODE (arg) == SSA_NAME);
4361                   arg = SSA_NAME_VAR (arg);
4362                   if (arg == loop->simduid
4363                       /* For now.  */
4364                       && tree_int_cst_equal (TYPE_SIZE_UNIT (reft), step))
4365                     {
4366                       DR_BASE_ADDRESS (newdr) = base_address;
4367                       DR_OFFSET (newdr) = ssize_int (0);
4368                       DR_STEP (newdr) = step;
4369                       DR_OFFSET_ALIGNMENT (newdr) = BIGGEST_ALIGNMENT;
4370                       DR_STEP_ALIGNMENT (newdr) = highest_pow2_factor (step);
4371                       /* Mark as simd-lane access.  */
4372                       tree arg2 = gimple_call_arg (def, 1);
4373                       newdr->aux = (void *) (-1 - tree_to_uhwi (arg2));
4374                       free_data_ref (dr);
4375                       datarefs->safe_push (newdr);
4376                       if (dataref_groups)
4377                         dataref_groups->safe_push (group_id);
4378                       return opt_result::success ();
4379                     }
4380                 }
4381             }
4382         }
4383       free_data_ref (newdr);
4384     }
4385
4386   datarefs->safe_push (dr);
4387   if (dataref_groups)
4388     dataref_groups->safe_push (group_id);
4389   return opt_result::success ();
4390 }
4391
4392 /* Function vect_analyze_data_refs.
4393
4394   Find all the data references in the loop or basic block.
4395
4396    The general structure of the analysis of data refs in the vectorizer is as
4397    follows:
4398    1- vect_analyze_data_refs(loop/bb): call
4399       compute_data_dependences_for_loop/bb to find and analyze all data-refs
4400       in the loop/bb and their dependences.
4401    2- vect_analyze_dependences(): apply dependence testing using ddrs.
4402    3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
4403    4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
4404
4405 */
4406
4407 opt_result
4408 vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf, bool *fatal)
4409 {
4410   class loop *loop = NULL;
4411   unsigned int i;
4412   struct data_reference *dr;
4413   tree scalar_type;
4414
4415   DUMP_VECT_SCOPE ("vect_analyze_data_refs");
4416
4417   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4418     loop = LOOP_VINFO_LOOP (loop_vinfo);
4419
4420   /* Go through the data-refs, check that the analysis succeeded.  Update
4421      pointer from stmt_vec_info struct to DR and vectype.  */
4422
4423   vec<data_reference_p> datarefs = vinfo->shared->datarefs;
4424   FOR_EACH_VEC_ELT (datarefs, i, dr)
4425     {
4426       enum { SG_NONE, GATHER, SCATTER } gatherscatter = SG_NONE;
4427       poly_uint64 vf;
4428
4429       gcc_assert (DR_REF (dr));
4430       stmt_vec_info stmt_info = vinfo->lookup_stmt (DR_STMT (dr));
4431       gcc_assert (!stmt_info->dr_aux.dr);
4432       stmt_info->dr_aux.dr = dr;
4433       stmt_info->dr_aux.stmt = stmt_info;
4434
4435       /* Check that analysis of the data-ref succeeded.  */
4436       if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
4437           || !DR_STEP (dr))
4438         {
4439           bool maybe_gather
4440             = DR_IS_READ (dr)
4441               && !TREE_THIS_VOLATILE (DR_REF (dr));
4442           bool maybe_scatter
4443             = DR_IS_WRITE (dr)
4444               && !TREE_THIS_VOLATILE (DR_REF (dr))
4445               && (targetm.vectorize.builtin_scatter != NULL
4446                   || supports_vec_scatter_store_p ());
4447
4448           /* If target supports vector gather loads or scatter stores,
4449              see if they can't be used.  */
4450           if (is_a <loop_vec_info> (vinfo)
4451               && !nested_in_vect_loop_p (loop, stmt_info))
4452             {
4453               if (maybe_gather || maybe_scatter)
4454                 {
4455                   if (maybe_gather)
4456                     gatherscatter = GATHER;
4457                   else
4458                     gatherscatter = SCATTER;
4459                 }
4460             }
4461
4462           if (gatherscatter == SG_NONE)
4463             {
4464               if (dump_enabled_p ())
4465                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4466                                  "not vectorized: data ref analysis "
4467                                  "failed %G", stmt_info->stmt);
4468               if (is_a <bb_vec_info> (vinfo))
4469                 {
4470                   /* In BB vectorization the ref can still participate
4471                      in dependence analysis, we just can't vectorize it.  */
4472                   STMT_VINFO_VECTORIZABLE (stmt_info) = false;
4473                   continue;
4474                 }
4475               return opt_result::failure_at (stmt_info->stmt,
4476                                              "not vectorized:"
4477                                              " data ref analysis failed: %G",
4478                                              stmt_info->stmt);
4479             }
4480         }
4481
4482       /* See if this was detected as SIMD lane access.  */
4483       if (dr->aux == (void *)-1
4484           || dr->aux == (void *)-2
4485           || dr->aux == (void *)-3
4486           || dr->aux == (void *)-4)
4487         {
4488           if (nested_in_vect_loop_p (loop, stmt_info))
4489             return opt_result::failure_at (stmt_info->stmt,
4490                                            "not vectorized:"
4491                                            " data ref analysis failed: %G",
4492                                            stmt_info->stmt);
4493           STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)
4494             = -(uintptr_t) dr->aux;
4495         }
4496
4497       tree base = get_base_address (DR_REF (dr));
4498       if (base && VAR_P (base) && DECL_NONALIASED (base))
4499         {
4500           if (dump_enabled_p ())
4501             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4502                              "not vectorized: base object not addressable "
4503                              "for stmt: %G", stmt_info->stmt);
4504           if (is_a <bb_vec_info> (vinfo))
4505             {
4506               /* In BB vectorization the ref can still participate
4507                  in dependence analysis, we just can't vectorize it.  */
4508               STMT_VINFO_VECTORIZABLE (stmt_info) = false;
4509               continue;
4510             }
4511           return opt_result::failure_at (stmt_info->stmt,
4512                                          "not vectorized: base object not"
4513                                          " addressable for stmt: %G",
4514                                          stmt_info->stmt);
4515         }
4516
4517       if (is_a <loop_vec_info> (vinfo)
4518           && DR_STEP (dr)
4519           && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
4520         {
4521           if (nested_in_vect_loop_p (loop, stmt_info))
4522             return opt_result::failure_at (stmt_info->stmt,
4523                                            "not vectorized: "
4524                                            "not suitable for strided load %G",
4525                                            stmt_info->stmt);
4526           STMT_VINFO_STRIDED_P (stmt_info) = true;
4527         }
4528
4529       /* Update DR field in stmt_vec_info struct.  */
4530
4531       /* If the dataref is in an inner-loop of the loop that is considered for
4532          for vectorization, we also want to analyze the access relative to
4533          the outer-loop (DR contains information only relative to the
4534          inner-most enclosing loop).  We do that by building a reference to the
4535          first location accessed by the inner-loop, and analyze it relative to
4536          the outer-loop.  */
4537       if (loop && nested_in_vect_loop_p (loop, stmt_info))
4538         {
4539           /* Build a reference to the first location accessed by the
4540              inner loop: *(BASE + INIT + OFFSET).  By construction,
4541              this address must be invariant in the inner loop, so we
4542              can consider it as being used in the outer loop.  */
4543           tree base = unshare_expr (DR_BASE_ADDRESS (dr));
4544           tree offset = unshare_expr (DR_OFFSET (dr));
4545           tree init = unshare_expr (DR_INIT (dr));
4546           tree init_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset),
4547                                           init, offset);
4548           tree init_addr = fold_build_pointer_plus (base, init_offset);
4549           tree init_ref = build_fold_indirect_ref (init_addr);
4550
4551           if (dump_enabled_p ())
4552             dump_printf_loc (MSG_NOTE, vect_location,
4553                              "analyze in outer loop: %T\n", init_ref);
4554
4555           opt_result res
4556             = dr_analyze_innermost (&STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info),
4557                                     init_ref, loop, stmt_info->stmt);
4558           if (!res)
4559             /* dr_analyze_innermost already explained the failure.  */
4560             return res;
4561
4562           if (dump_enabled_p ())
4563             dump_printf_loc (MSG_NOTE, vect_location,
4564                              "\touter base_address: %T\n"
4565                              "\touter offset from base address: %T\n"
4566                              "\touter constant offset from base address: %T\n"
4567                              "\touter step: %T\n"
4568                              "\touter base alignment: %d\n\n"
4569                              "\touter base misalignment: %d\n"
4570                              "\touter offset alignment: %d\n"
4571                              "\touter step alignment: %d\n",
4572                              STMT_VINFO_DR_BASE_ADDRESS (stmt_info),
4573                              STMT_VINFO_DR_OFFSET (stmt_info),
4574                              STMT_VINFO_DR_INIT (stmt_info),
4575                              STMT_VINFO_DR_STEP (stmt_info),
4576                              STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info),
4577                              STMT_VINFO_DR_BASE_MISALIGNMENT (stmt_info),
4578                              STMT_VINFO_DR_OFFSET_ALIGNMENT (stmt_info),
4579                              STMT_VINFO_DR_STEP_ALIGNMENT (stmt_info));
4580         }
4581
4582       /* Set vectype for STMT.  */
4583       scalar_type = TREE_TYPE (DR_REF (dr));
4584       tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
4585       if (!vectype)
4586         {
4587           if (dump_enabled_p ())
4588             {
4589               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4590                                "not vectorized: no vectype for stmt: %G",
4591                                stmt_info->stmt);
4592               dump_printf (MSG_MISSED_OPTIMIZATION, " scalar_type: ");
4593               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_DETAILS,
4594                                  scalar_type);
4595               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4596             }
4597
4598           if (is_a <bb_vec_info> (vinfo))
4599             {
4600               /* No vector type is fine, the ref can still participate
4601                  in dependence analysis, we just can't vectorize it.  */
4602               STMT_VINFO_VECTORIZABLE (stmt_info) = false;
4603               continue;
4604             }
4605           if (fatal)
4606             *fatal = false;
4607           return opt_result::failure_at (stmt_info->stmt,
4608                                          "not vectorized:"
4609                                          " no vectype for stmt: %G"
4610                                          " scalar_type: %T\n",
4611                                          stmt_info->stmt, scalar_type);
4612         }
4613       else
4614         {
4615           if (dump_enabled_p ())
4616             dump_printf_loc (MSG_NOTE, vect_location,
4617                              "got vectype for stmt: %G%T\n",
4618                              stmt_info->stmt, vectype);
4619         }
4620
4621       /* Adjust the minimal vectorization factor according to the
4622          vector type.  */
4623       vf = TYPE_VECTOR_SUBPARTS (vectype);
4624       *min_vf = upper_bound (*min_vf, vf);
4625
4626       /* Leave the BB vectorizer to pick the vector type later, based on
4627          the final dataref group size and SLP node size.  */
4628       if (is_a <loop_vec_info> (vinfo))
4629         STMT_VINFO_VECTYPE (stmt_info) = vectype;
4630
4631       if (gatherscatter != SG_NONE)
4632         {
4633           gather_scatter_info gs_info;
4634           if (!vect_check_gather_scatter (stmt_info,
4635                                           as_a <loop_vec_info> (vinfo),
4636                                           &gs_info)
4637               || !get_vectype_for_scalar_type (vinfo,
4638                                                TREE_TYPE (gs_info.offset)))
4639             {
4640               if (fatal)
4641                 *fatal = false;
4642               return opt_result::failure_at
4643                         (stmt_info->stmt,
4644                          (gatherscatter == GATHER)
4645                          ? "not vectorized: not suitable for gather load %G"
4646                          : "not vectorized: not suitable for scatter store %G",
4647                          stmt_info->stmt);
4648             }
4649           STMT_VINFO_GATHER_SCATTER_P (stmt_info) = gatherscatter;
4650         }
4651     }
4652
4653   /* We used to stop processing and prune the list here.  Verify we no
4654      longer need to.  */
4655   gcc_assert (i == datarefs.length ());
4656
4657   return opt_result::success ();
4658 }
4659
4660
4661 /* Function vect_get_new_vect_var.
4662
4663    Returns a name for a new variable.  The current naming scheme appends the
4664    prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
4665    the name of vectorizer generated variables, and appends that to NAME if
4666    provided.  */
4667
4668 tree
4669 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
4670 {
4671   const char *prefix;
4672   tree new_vect_var;
4673
4674   switch (var_kind)
4675   {
4676   case vect_simple_var:
4677     prefix = "vect";
4678     break;
4679   case vect_scalar_var:
4680     prefix = "stmp";
4681     break;
4682   case vect_mask_var:
4683     prefix = "mask";
4684     break;
4685   case vect_pointer_var:
4686     prefix = "vectp";
4687     break;
4688   default:
4689     gcc_unreachable ();
4690   }
4691
4692   if (name)
4693     {
4694       char* tmp = concat (prefix, "_", name, NULL);
4695       new_vect_var = create_tmp_reg (type, tmp);
4696       free (tmp);
4697     }
4698   else
4699     new_vect_var = create_tmp_reg (type, prefix);
4700
4701   return new_vect_var;
4702 }
4703
4704 /* Like vect_get_new_vect_var but return an SSA name.  */
4705
4706 tree
4707 vect_get_new_ssa_name (tree type, enum vect_var_kind var_kind, const char *name)
4708 {
4709   const char *prefix;
4710   tree new_vect_var;
4711
4712   switch (var_kind)
4713   {
4714   case vect_simple_var:
4715     prefix = "vect";
4716     break;
4717   case vect_scalar_var:
4718     prefix = "stmp";
4719     break;
4720   case vect_pointer_var:
4721     prefix = "vectp";
4722     break;
4723   default:
4724     gcc_unreachable ();
4725   }
4726
4727   if (name)
4728     {
4729       char* tmp = concat (prefix, "_", name, NULL);
4730       new_vect_var = make_temp_ssa_name (type, NULL, tmp);
4731       free (tmp);
4732     }
4733   else
4734     new_vect_var = make_temp_ssa_name (type, NULL, prefix);
4735
4736   return new_vect_var;
4737 }
4738
4739 /* Duplicate points-to info on NAME from DR_INFO.  */
4740
4741 static void
4742 vect_duplicate_ssa_name_ptr_info (tree name, dr_vec_info *dr_info)
4743 {
4744   duplicate_ssa_name_ptr_info (name, DR_PTR_INFO (dr_info->dr));
4745   /* DR_PTR_INFO is for a base SSA name, not including constant or
4746      variable offsets in the ref so its alignment info does not apply.  */
4747   mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (name));
4748 }
4749
4750 /* Function vect_create_addr_base_for_vector_ref.
4751
4752    Create an expression that computes the address of the first memory location
4753    that will be accessed for a data reference.
4754
4755    Input:
4756    STMT_INFO: The statement containing the data reference.
4757    NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
4758    OFFSET: Optional. If supplied, it is be added to the initial address.
4759    LOOP:    Specify relative to which loop-nest should the address be computed.
4760             For example, when the dataref is in an inner-loop nested in an
4761             outer-loop that is now being vectorized, LOOP can be either the
4762             outer-loop, or the inner-loop.  The first memory location accessed
4763             by the following dataref ('in' points to short):
4764
4765                 for (i=0; i<N; i++)
4766                    for (j=0; j<M; j++)
4767                      s += in[i+j]
4768
4769             is as follows:
4770             if LOOP=i_loop:     &in             (relative to i_loop)
4771             if LOOP=j_loop:     &in+i*2B        (relative to j_loop)
4772
4773    Output:
4774    1. Return an SSA_NAME whose value is the address of the memory location of
4775       the first vector of the data reference.
4776    2. If new_stmt_list is not NULL_TREE after return then the caller must insert
4777       these statement(s) which define the returned SSA_NAME.
4778
4779    FORNOW: We are only handling array accesses with step 1.  */
4780
4781 tree
4782 vect_create_addr_base_for_vector_ref (vec_info *vinfo, stmt_vec_info stmt_info,
4783                                       gimple_seq *new_stmt_list,
4784                                       tree offset)
4785 {
4786   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
4787   struct data_reference *dr = dr_info->dr;
4788   const char *base_name;
4789   tree addr_base;
4790   tree dest;
4791   gimple_seq seq = NULL;
4792   tree vect_ptr_type;
4793   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4794   innermost_loop_behavior *drb = vect_dr_behavior (vinfo, dr_info);
4795
4796   tree data_ref_base = unshare_expr (drb->base_address);
4797   tree base_offset = unshare_expr (get_dr_vinfo_offset (vinfo, dr_info, true));
4798   tree init = unshare_expr (drb->init);
4799
4800   if (loop_vinfo)
4801     base_name = get_name (data_ref_base);
4802   else
4803     {
4804       base_offset = ssize_int (0);
4805       init = ssize_int (0);
4806       base_name = get_name (DR_REF (dr));
4807     }
4808
4809   /* Create base_offset */
4810   base_offset = size_binop (PLUS_EXPR,
4811                             fold_convert (sizetype, base_offset),
4812                             fold_convert (sizetype, init));
4813
4814   if (offset)
4815     {
4816       offset = fold_convert (sizetype, offset);
4817       base_offset = fold_build2 (PLUS_EXPR, sizetype,
4818                                  base_offset, offset);
4819     }
4820
4821   /* base + base_offset */
4822   if (loop_vinfo)
4823     addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
4824   else
4825     {
4826       addr_base = build1 (ADDR_EXPR,
4827                           build_pointer_type (TREE_TYPE (DR_REF (dr))),
4828                           unshare_expr (DR_REF (dr)));
4829     }
4830
4831   vect_ptr_type = build_pointer_type (TREE_TYPE (DR_REF (dr)));
4832   dest = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, base_name);
4833   addr_base = force_gimple_operand (addr_base, &seq, true, dest);
4834   gimple_seq_add_seq (new_stmt_list, seq);
4835
4836   if (DR_PTR_INFO (dr)
4837       && TREE_CODE (addr_base) == SSA_NAME
4838       /* We should only duplicate pointer info to newly created SSA names.  */
4839       && SSA_NAME_VAR (addr_base) == dest)
4840     {
4841       gcc_assert (!SSA_NAME_PTR_INFO (addr_base));
4842       vect_duplicate_ssa_name_ptr_info (addr_base, dr_info);
4843     }
4844
4845   if (dump_enabled_p ())
4846     dump_printf_loc (MSG_NOTE, vect_location, "created %T\n", addr_base);
4847
4848   return addr_base;
4849 }
4850
4851
4852 /* Function vect_create_data_ref_ptr.
4853
4854    Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
4855    location accessed in the loop by STMT_INFO, along with the def-use update
4856    chain to appropriately advance the pointer through the loop iterations.
4857    Also set aliasing information for the pointer.  This pointer is used by
4858    the callers to this function to create a memory reference expression for
4859    vector load/store access.
4860
4861    Input:
4862    1. STMT_INFO: a stmt that references memory. Expected to be of the form
4863          GIMPLE_ASSIGN <name, data-ref> or
4864          GIMPLE_ASSIGN <data-ref, name>.
4865    2. AGGR_TYPE: the type of the reference, which should be either a vector
4866         or an array.
4867    3. AT_LOOP: the loop where the vector memref is to be created.
4868    4. OFFSET (optional): a byte offset to be added to the initial address
4869         accessed by the data-ref in STMT_INFO.
4870    5. BSI: location where the new stmts are to be placed if there is no loop
4871    6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
4872         pointing to the initial address.
4873    8. IV_STEP (optional, defaults to NULL): the amount that should be added
4874         to the IV during each iteration of the loop.  NULL says to move
4875         by one copy of AGGR_TYPE up or down, depending on the step of the
4876         data reference.
4877
4878    Output:
4879    1. Declare a new ptr to vector_type, and have it point to the base of the
4880       data reference (initial addressed accessed by the data reference).
4881       For example, for vector of type V8HI, the following code is generated:
4882
4883       v8hi *ap;
4884       ap = (v8hi *)initial_address;
4885
4886       if OFFSET is not supplied:
4887          initial_address = &a[init];
4888       if OFFSET is supplied:
4889          initial_address = &a[init] + OFFSET;
4890       if BYTE_OFFSET is supplied:
4891          initial_address = &a[init] + BYTE_OFFSET;
4892
4893       Return the initial_address in INITIAL_ADDRESS.
4894
4895    2. If ONLY_INIT is true, just return the initial pointer.  Otherwise, also
4896       update the pointer in each iteration of the loop.
4897
4898       Return the increment stmt that updates the pointer in PTR_INCR.
4899
4900    3. Return the pointer.  */
4901
4902 tree
4903 vect_create_data_ref_ptr (vec_info *vinfo, stmt_vec_info stmt_info,
4904                           tree aggr_type, class loop *at_loop, tree offset,
4905                           tree *initial_address, gimple_stmt_iterator *gsi,
4906                           gimple **ptr_incr, bool only_init,
4907                           tree iv_step)
4908 {
4909   const char *base_name;
4910   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4911   class loop *loop = NULL;
4912   bool nested_in_vect_loop = false;
4913   class loop *containing_loop = NULL;
4914   tree aggr_ptr_type;
4915   tree aggr_ptr;
4916   tree new_temp;
4917   gimple_seq new_stmt_list = NULL;
4918   edge pe = NULL;
4919   basic_block new_bb;
4920   tree aggr_ptr_init;
4921   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
4922   struct data_reference *dr = dr_info->dr;
4923   tree aptr;
4924   gimple_stmt_iterator incr_gsi;
4925   bool insert_after;
4926   tree indx_before_incr, indx_after_incr;
4927   gimple *incr;
4928   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4929
4930   gcc_assert (iv_step != NULL_TREE
4931               || TREE_CODE (aggr_type) == ARRAY_TYPE
4932               || TREE_CODE (aggr_type) == VECTOR_TYPE);
4933
4934   if (loop_vinfo)
4935     {
4936       loop = LOOP_VINFO_LOOP (loop_vinfo);
4937       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
4938       containing_loop = (gimple_bb (stmt_info->stmt))->loop_father;
4939       pe = loop_preheader_edge (loop);
4940     }
4941   else
4942     {
4943       gcc_assert (bb_vinfo);
4944       only_init = true;
4945       *ptr_incr = NULL;
4946     }
4947
4948   /* Create an expression for the first address accessed by this load
4949      in LOOP.  */
4950   base_name = get_name (DR_BASE_ADDRESS (dr));
4951
4952   if (dump_enabled_p ())
4953     {
4954       tree dr_base_type = TREE_TYPE (DR_BASE_OBJECT (dr));
4955       dump_printf_loc (MSG_NOTE, vect_location,
4956                        "create %s-pointer variable to type: %T",
4957                        get_tree_code_name (TREE_CODE (aggr_type)),
4958                        aggr_type);
4959       if (TREE_CODE (dr_base_type) == ARRAY_TYPE)
4960         dump_printf (MSG_NOTE, "  vectorizing an array ref: ");
4961       else if (TREE_CODE (dr_base_type) == VECTOR_TYPE)
4962         dump_printf (MSG_NOTE, "  vectorizing a vector ref: ");
4963       else if (TREE_CODE (dr_base_type) == RECORD_TYPE)
4964         dump_printf (MSG_NOTE, "  vectorizing a record based array ref: ");
4965       else
4966         dump_printf (MSG_NOTE, "  vectorizing a pointer ref: ");
4967       dump_printf (MSG_NOTE, "%T\n", DR_BASE_OBJECT (dr));
4968     }
4969
4970   /* (1) Create the new aggregate-pointer variable.
4971      Vector and array types inherit the alias set of their component
4972      type by default so we need to use a ref-all pointer if the data
4973      reference does not conflict with the created aggregated data
4974      reference because it is not addressable.  */
4975   bool need_ref_all = false;
4976   if (!alias_sets_conflict_p (get_alias_set (aggr_type),
4977                               get_alias_set (DR_REF (dr))))
4978     need_ref_all = true;
4979   /* Likewise for any of the data references in the stmt group.  */
4980   else if (DR_GROUP_SIZE (stmt_info) > 1)
4981     {
4982       stmt_vec_info sinfo = DR_GROUP_FIRST_ELEMENT (stmt_info);
4983       do
4984         {
4985           struct data_reference *sdr = STMT_VINFO_DATA_REF (sinfo);
4986           if (!alias_sets_conflict_p (get_alias_set (aggr_type),
4987                                       get_alias_set (DR_REF (sdr))))
4988             {
4989               need_ref_all = true;
4990               break;
4991             }
4992           sinfo = DR_GROUP_NEXT_ELEMENT (sinfo);
4993         }
4994       while (sinfo);
4995     }
4996   aggr_ptr_type = build_pointer_type_for_mode (aggr_type, ptr_mode,
4997                                                need_ref_all);
4998   aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, base_name);
4999
5000
5001   /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
5002      vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
5003      def-use update cycles for the pointer: one relative to the outer-loop
5004      (LOOP), which is what steps (3) and (4) below do.  The other is relative
5005      to the inner-loop (which is the inner-most loop containing the dataref),
5006      and this is done be step (5) below.
5007
5008      When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
5009      inner-most loop, and so steps (3),(4) work the same, and step (5) is
5010      redundant.  Steps (3),(4) create the following:
5011
5012         vp0 = &base_addr;
5013         LOOP:   vp1 = phi(vp0,vp2)
5014                 ...
5015                 ...
5016                 vp2 = vp1 + step
5017                 goto LOOP
5018
5019      If there is an inner-loop nested in loop, then step (5) will also be
5020      applied, and an additional update in the inner-loop will be created:
5021
5022         vp0 = &base_addr;
5023         LOOP:   vp1 = phi(vp0,vp2)
5024                 ...
5025         inner:     vp3 = phi(vp1,vp4)
5026                    vp4 = vp3 + inner_step
5027                    if () goto inner
5028                 ...
5029                 vp2 = vp1 + step
5030                 if () goto LOOP   */
5031
5032   /* (2) Calculate the initial address of the aggregate-pointer, and set
5033      the aggregate-pointer to point to it before the loop.  */
5034
5035   /* Create: (&(base[init_val]+offset) in the loop preheader.  */
5036
5037   new_temp = vect_create_addr_base_for_vector_ref (vinfo,
5038                                                    stmt_info, &new_stmt_list,
5039                                                    offset);
5040   if (new_stmt_list)
5041     {
5042       if (pe)
5043         {
5044           new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
5045           gcc_assert (!new_bb);
5046         }
5047       else
5048         gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
5049     }
5050
5051   *initial_address = new_temp;
5052   aggr_ptr_init = new_temp;
5053
5054   /* (3) Handle the updating of the aggregate-pointer inside the loop.
5055      This is needed when ONLY_INIT is false, and also when AT_LOOP is the
5056      inner-loop nested in LOOP (during outer-loop vectorization).  */
5057
5058   /* No update in loop is required.  */
5059   if (only_init && (!loop_vinfo || at_loop == loop))
5060     aptr = aggr_ptr_init;
5061   else
5062     {
5063       /* Accesses to invariant addresses should be handled specially
5064          by the caller.  */
5065       tree step = vect_dr_behavior (vinfo, dr_info)->step;
5066       gcc_assert (!integer_zerop (step));
5067
5068       if (iv_step == NULL_TREE)
5069         {
5070           /* The step of the aggregate pointer is the type size,
5071              negated for downward accesses.  */
5072           iv_step = TYPE_SIZE_UNIT (aggr_type);
5073           if (tree_int_cst_sgn (step) == -1)
5074             iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
5075         }
5076
5077       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5078
5079       create_iv (aggr_ptr_init,
5080                  fold_convert (aggr_ptr_type, iv_step),
5081                  aggr_ptr, loop, &incr_gsi, insert_after,
5082                  &indx_before_incr, &indx_after_incr);
5083       incr = gsi_stmt (incr_gsi);
5084
5085       /* Copy the points-to information if it exists. */
5086       if (DR_PTR_INFO (dr))
5087         {
5088           vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr_info);
5089           vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr_info);
5090         }
5091       if (ptr_incr)
5092         *ptr_incr = incr;
5093
5094       aptr = indx_before_incr;
5095     }
5096
5097   if (!nested_in_vect_loop || only_init)
5098     return aptr;
5099
5100
5101   /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
5102      nested in LOOP, if exists.  */
5103
5104   gcc_assert (nested_in_vect_loop);
5105   if (!only_init)
5106     {
5107       standard_iv_increment_position (containing_loop, &incr_gsi,
5108                                       &insert_after);
5109       create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
5110                  containing_loop, &incr_gsi, insert_after, &indx_before_incr,
5111                  &indx_after_incr);
5112       incr = gsi_stmt (incr_gsi);
5113
5114       /* Copy the points-to information if it exists. */
5115       if (DR_PTR_INFO (dr))
5116         {
5117           vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr_info);
5118           vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr_info);
5119         }
5120       if (ptr_incr)
5121         *ptr_incr = incr;
5122
5123       return indx_before_incr;
5124     }
5125   else
5126     gcc_unreachable ();
5127 }
5128
5129
5130 /* Function bump_vector_ptr
5131
5132    Increment a pointer (to a vector type) by vector-size. If requested,
5133    i.e. if PTR-INCR is given, then also connect the new increment stmt
5134    to the existing def-use update-chain of the pointer, by modifying
5135    the PTR_INCR as illustrated below:
5136
5137    The pointer def-use update-chain before this function:
5138                         DATAREF_PTR = phi (p_0, p_2)
5139                         ....
5140         PTR_INCR:       p_2 = DATAREF_PTR + step
5141
5142    The pointer def-use update-chain after this function:
5143                         DATAREF_PTR = phi (p_0, p_2)
5144                         ....
5145                         NEW_DATAREF_PTR = DATAREF_PTR + BUMP
5146                         ....
5147         PTR_INCR:       p_2 = NEW_DATAREF_PTR + step
5148
5149    Input:
5150    DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
5151                  in the loop.
5152    PTR_INCR - optional. The stmt that updates the pointer in each iteration of
5153               the loop.  The increment amount across iterations is expected
5154               to be vector_size.
5155    BSI - location where the new update stmt is to be placed.
5156    STMT_INFO - the original scalar memory-access stmt that is being vectorized.
5157    BUMP - optional. The offset by which to bump the pointer. If not given,
5158           the offset is assumed to be vector_size.
5159
5160    Output: Return NEW_DATAREF_PTR as illustrated above.
5161
5162 */
5163
5164 tree
5165 bump_vector_ptr (vec_info *vinfo,
5166                  tree dataref_ptr, gimple *ptr_incr, gimple_stmt_iterator *gsi,
5167                  stmt_vec_info stmt_info, tree bump)
5168 {
5169   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
5170   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5171   tree update = TYPE_SIZE_UNIT (vectype);
5172   gimple *incr_stmt;
5173   ssa_op_iter iter;
5174   use_operand_p use_p;
5175   tree new_dataref_ptr;
5176
5177   if (bump)
5178     update = bump;
5179
5180   if (TREE_CODE (dataref_ptr) == SSA_NAME)
5181     new_dataref_ptr = copy_ssa_name (dataref_ptr);
5182   else
5183     new_dataref_ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
5184   incr_stmt = gimple_build_assign (new_dataref_ptr, POINTER_PLUS_EXPR,
5185                                    dataref_ptr, update);
5186   vect_finish_stmt_generation (vinfo, stmt_info, incr_stmt, gsi);
5187   /* Fold the increment, avoiding excessive chains use-def chains of
5188      those, leading to compile-time issues for passes until the next
5189      forwprop pass which would do this as well.  */
5190   gimple_stmt_iterator fold_gsi = gsi_for_stmt (incr_stmt);
5191   if (fold_stmt (&fold_gsi, follow_all_ssa_edges))
5192     {
5193       incr_stmt = gsi_stmt (fold_gsi);
5194       update_stmt (incr_stmt);
5195     }
5196
5197   /* Copy the points-to information if it exists. */
5198   if (DR_PTR_INFO (dr))
5199     {
5200       duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
5201       mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr));
5202     }
5203
5204   if (!ptr_incr)
5205     return new_dataref_ptr;
5206
5207   /* Update the vector-pointer's cross-iteration increment.  */
5208   FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
5209     {
5210       tree use = USE_FROM_PTR (use_p);
5211
5212       if (use == dataref_ptr)
5213         SET_USE (use_p, new_dataref_ptr);
5214       else
5215         gcc_assert (operand_equal_p (use, update, 0));
5216     }
5217
5218   return new_dataref_ptr;
5219 }
5220
5221
5222 /* Copy memory reference info such as base/clique from the SRC reference
5223    to the DEST MEM_REF.  */
5224
5225 void
5226 vect_copy_ref_info (tree dest, tree src)
5227 {
5228   if (TREE_CODE (dest) != MEM_REF)
5229     return;
5230
5231   tree src_base = src;
5232   while (handled_component_p (src_base))
5233     src_base = TREE_OPERAND (src_base, 0);
5234   if (TREE_CODE (src_base) != MEM_REF
5235       && TREE_CODE (src_base) != TARGET_MEM_REF)
5236     return;
5237
5238   MR_DEPENDENCE_CLIQUE (dest) = MR_DEPENDENCE_CLIQUE (src_base);
5239   MR_DEPENDENCE_BASE (dest) = MR_DEPENDENCE_BASE (src_base);
5240 }
5241
5242
5243 /* Function vect_create_destination_var.
5244
5245    Create a new temporary of type VECTYPE.  */
5246
5247 tree
5248 vect_create_destination_var (tree scalar_dest, tree vectype)
5249 {
5250   tree vec_dest;
5251   const char *name;
5252   char *new_name;
5253   tree type;
5254   enum vect_var_kind kind;
5255
5256   kind = vectype
5257     ? VECTOR_BOOLEAN_TYPE_P (vectype)
5258     ? vect_mask_var
5259     : vect_simple_var
5260     : vect_scalar_var;
5261   type = vectype ? vectype : TREE_TYPE (scalar_dest);
5262
5263   gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
5264
5265   name = get_name (scalar_dest);
5266   if (name)
5267     new_name = xasprintf ("%s_%u", name, SSA_NAME_VERSION (scalar_dest));
5268   else
5269     new_name = xasprintf ("_%u", SSA_NAME_VERSION (scalar_dest));
5270   vec_dest = vect_get_new_vect_var (type, kind, new_name);
5271   free (new_name);
5272
5273   return vec_dest;
5274 }
5275
5276 /* Function vect_grouped_store_supported.
5277
5278    Returns TRUE if interleave high and interleave low permutations
5279    are supported, and FALSE otherwise.  */
5280
5281 bool
5282 vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
5283 {
5284   machine_mode mode = TYPE_MODE (vectype);
5285
5286   /* vect_permute_store_chain requires the group size to be equal to 3 or
5287      be a power of two.  */
5288   if (count != 3 && exact_log2 (count) == -1)
5289     {
5290       if (dump_enabled_p ())
5291         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5292                          "the size of the group of accesses"
5293                          " is not a power of 2 or not eqaul to 3\n");
5294       return false;
5295     }
5296
5297   /* Check that the permutation is supported.  */
5298   if (VECTOR_MODE_P (mode))
5299     {
5300       unsigned int i;
5301       if (count == 3)
5302         {
5303           unsigned int j0 = 0, j1 = 0, j2 = 0;
5304           unsigned int i, j;
5305
5306           unsigned int nelt;
5307           if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
5308             {
5309               if (dump_enabled_p ())
5310                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5311                                  "cannot handle groups of 3 stores for"
5312                                  " variable-length vectors\n");
5313               return false;
5314             }
5315
5316           vec_perm_builder sel (nelt, nelt, 1);
5317           sel.quick_grow (nelt);
5318           vec_perm_indices indices;
5319           for (j = 0; j < 3; j++)
5320             {
5321               int nelt0 = ((3 - j) * nelt) % 3;
5322               int nelt1 = ((3 - j) * nelt + 1) % 3;
5323               int nelt2 = ((3 - j) * nelt + 2) % 3;
5324               for (i = 0; i < nelt; i++)
5325                 {
5326                   if (3 * i + nelt0 < nelt)
5327                     sel[3 * i + nelt0] = j0++;
5328                   if (3 * i + nelt1 < nelt)
5329                     sel[3 * i + nelt1] = nelt + j1++;
5330                   if (3 * i + nelt2 < nelt)
5331                     sel[3 * i + nelt2] = 0;
5332                 }
5333               indices.new_vector (sel, 2, nelt);
5334               if (!can_vec_perm_const_p (mode, indices))
5335                 {
5336                   if (dump_enabled_p ())
5337                     dump_printf (MSG_MISSED_OPTIMIZATION,
5338                                  "permutation op not supported by target.\n");
5339                   return false;
5340                 }
5341
5342               for (i = 0; i < nelt; i++)
5343                 {
5344                   if (3 * i + nelt0 < nelt)
5345                     sel[3 * i + nelt0] = 3 * i + nelt0;
5346                   if (3 * i + nelt1 < nelt)
5347                     sel[3 * i + nelt1] = 3 * i + nelt1;
5348                   if (3 * i + nelt2 < nelt)
5349                     sel[3 * i + nelt2] = nelt + j2++;
5350                 }
5351               indices.new_vector (sel, 2, nelt);
5352               if (!can_vec_perm_const_p (mode, indices))
5353                 {
5354                   if (dump_enabled_p ())
5355                     dump_printf (MSG_MISSED_OPTIMIZATION,
5356                                  "permutation op not supported by target.\n");
5357                   return false;
5358                 }
5359             }
5360           return true;
5361         }
5362       else
5363         {
5364           /* If length is not equal to 3 then only power of 2 is supported.  */
5365           gcc_assert (pow2p_hwi (count));
5366           poly_uint64 nelt = GET_MODE_NUNITS (mode);
5367
5368           /* The encoding has 2 interleaved stepped patterns.  */
5369           vec_perm_builder sel (nelt, 2, 3);
5370           sel.quick_grow (6);
5371           for (i = 0; i < 3; i++)
5372             {
5373               sel[i * 2] = i;
5374               sel[i * 2 + 1] = i + nelt;
5375             }
5376           vec_perm_indices indices (sel, 2, nelt);
5377           if (can_vec_perm_const_p (mode, indices))
5378             {
5379               for (i = 0; i < 6; i++)
5380                 sel[i] += exact_div (nelt, 2);
5381               indices.new_vector (sel, 2, nelt);
5382               if (can_vec_perm_const_p (mode, indices))
5383                 return true;
5384             }
5385         }
5386     }
5387
5388   if (dump_enabled_p ())
5389     dump_printf (MSG_MISSED_OPTIMIZATION,
5390                  "permutation op not supported by target.\n");
5391   return false;
5392 }
5393
5394
5395 /* Return TRUE if vec_{mask_}store_lanes is available for COUNT vectors of
5396    type VECTYPE.  MASKED_P says whether the masked form is needed.  */
5397
5398 bool
5399 vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
5400                             bool masked_p)
5401 {
5402   if (masked_p)
5403     return vect_lanes_optab_supported_p ("vec_mask_store_lanes",
5404                                          vec_mask_store_lanes_optab,
5405                                          vectype, count);
5406   else
5407     return vect_lanes_optab_supported_p ("vec_store_lanes",
5408                                          vec_store_lanes_optab,
5409                                          vectype, count);
5410 }
5411
5412
5413 /* Function vect_permute_store_chain.
5414
5415    Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
5416    a power of 2 or equal to 3, generate interleave_high/low stmts to reorder
5417    the data correctly for the stores.  Return the final references for stores
5418    in RESULT_CHAIN.
5419
5420    E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
5421    The input is 4 vectors each containing 8 elements.  We assign a number to
5422    each element, the input sequence is:
5423
5424    1st vec:   0  1  2  3  4  5  6  7
5425    2nd vec:   8  9 10 11 12 13 14 15
5426    3rd vec:  16 17 18 19 20 21 22 23
5427    4th vec:  24 25 26 27 28 29 30 31
5428
5429    The output sequence should be:
5430
5431    1st vec:  0  8 16 24  1  9 17 25
5432    2nd vec:  2 10 18 26  3 11 19 27
5433    3rd vec:  4 12 20 28  5 13 21 30
5434    4th vec:  6 14 22 30  7 15 23 31
5435
5436    i.e., we interleave the contents of the four vectors in their order.
5437
5438    We use interleave_high/low instructions to create such output.  The input of
5439    each interleave_high/low operation is two vectors:
5440    1st vec    2nd vec
5441    0 1 2 3    4 5 6 7
5442    the even elements of the result vector are obtained left-to-right from the
5443    high/low elements of the first vector.  The odd elements of the result are
5444    obtained left-to-right from the high/low elements of the second vector.
5445    The output of interleave_high will be:   0 4 1 5
5446    and of interleave_low:                   2 6 3 7
5447
5448
5449    The permutation is done in log LENGTH stages.  In each stage interleave_high
5450    and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
5451    where the first argument is taken from the first half of DR_CHAIN and the
5452    second argument from it's second half.
5453    In our example,
5454
5455    I1: interleave_high (1st vec, 3rd vec)
5456    I2: interleave_low (1st vec, 3rd vec)
5457    I3: interleave_high (2nd vec, 4th vec)
5458    I4: interleave_low (2nd vec, 4th vec)
5459
5460    The output for the first stage is:
5461
5462    I1:  0 16  1 17  2 18  3 19
5463    I2:  4 20  5 21  6 22  7 23
5464    I3:  8 24  9 25 10 26 11 27
5465    I4: 12 28 13 29 14 30 15 31
5466
5467    The output of the second stage, i.e. the final result is:
5468
5469    I1:  0  8 16 24  1  9 17 25
5470    I2:  2 10 18 26  3 11 19 27
5471    I3:  4 12 20 28  5 13 21 30
5472    I4:  6 14 22 30  7 15 23 31.  */
5473
5474 void
5475 vect_permute_store_chain (vec_info *vinfo, vec<tree> &dr_chain,
5476                           unsigned int length,
5477                           stmt_vec_info stmt_info,
5478                           gimple_stmt_iterator *gsi,
5479                           vec<tree> *result_chain)
5480 {
5481   tree vect1, vect2, high, low;
5482   gimple *perm_stmt;
5483   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5484   tree perm_mask_low, perm_mask_high;
5485   tree data_ref;
5486   tree perm3_mask_low, perm3_mask_high;
5487   unsigned int i, j, n, log_length = exact_log2 (length);
5488
5489   result_chain->quick_grow (length);
5490   memcpy (result_chain->address (), dr_chain.address (),
5491           length * sizeof (tree));
5492
5493   if (length == 3)
5494     {
5495       /* vect_grouped_store_supported ensures that this is constant.  */
5496       unsigned int nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
5497       unsigned int j0 = 0, j1 = 0, j2 = 0;
5498
5499       vec_perm_builder sel (nelt, nelt, 1);
5500       sel.quick_grow (nelt);
5501       vec_perm_indices indices;
5502       for (j = 0; j < 3; j++)
5503         {
5504           int nelt0 = ((3 - j) * nelt) % 3;
5505           int nelt1 = ((3 - j) * nelt + 1) % 3;
5506           int nelt2 = ((3 - j) * nelt + 2) % 3;
5507
5508           for (i = 0; i < nelt; i++)
5509             {
5510               if (3 * i + nelt0 < nelt)
5511                 sel[3 * i + nelt0] = j0++;
5512               if (3 * i + nelt1 < nelt)
5513                 sel[3 * i + nelt1] = nelt + j1++;
5514               if (3 * i + nelt2 < nelt)
5515                 sel[3 * i + nelt2] = 0;
5516             }
5517           indices.new_vector (sel, 2, nelt);
5518           perm3_mask_low = vect_gen_perm_mask_checked (vectype, indices);
5519
5520           for (i = 0; i < nelt; i++)
5521             {
5522               if (3 * i + nelt0 < nelt)
5523                 sel[3 * i + nelt0] = 3 * i + nelt0;
5524               if (3 * i + nelt1 < nelt)
5525                 sel[3 * i + nelt1] = 3 * i + nelt1;
5526               if (3 * i + nelt2 < nelt)
5527                 sel[3 * i + nelt2] = nelt + j2++;
5528             }
5529           indices.new_vector (sel, 2, nelt);
5530           perm3_mask_high = vect_gen_perm_mask_checked (vectype, indices);
5531
5532           vect1 = dr_chain[0];
5533           vect2 = dr_chain[1];
5534
5535           /* Create interleaving stmt:
5536              low = VEC_PERM_EXPR <vect1, vect2,
5537                                   {j, nelt, *, j + 1, nelt + j + 1, *,
5538                                    j + 2, nelt + j + 2, *, ...}>  */
5539           data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low");
5540           perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
5541                                            vect2, perm3_mask_low);
5542           vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
5543
5544           vect1 = data_ref;
5545           vect2 = dr_chain[2];
5546           /* Create interleaving stmt:
5547              low = VEC_PERM_EXPR <vect1, vect2,
5548                                   {0, 1, nelt + j, 3, 4, nelt + j + 1,
5549                                    6, 7, nelt + j + 2, ...}>  */
5550           data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high");
5551           perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
5552                                            vect2, perm3_mask_high);
5553           vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
5554           (*result_chain)[j] = data_ref;
5555         }
5556     }
5557   else
5558     {
5559       /* If length is not equal to 3 then only power of 2 is supported.  */
5560       gcc_assert (pow2p_hwi (length));
5561
5562       /* The encoding has 2 interleaved stepped patterns.  */
5563       poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (vectype);
5564       vec_perm_builder sel (nelt, 2, 3);
5565       sel.quick_grow (6);
5566       for (i = 0; i < 3; i++)
5567         {
5568           sel[i * 2] = i;
5569           sel[i * 2 + 1] = i + nelt;
5570         }
5571         vec_perm_indices indices (sel, 2, nelt);
5572         perm_mask_high = vect_gen_perm_mask_checked (vectype, indices);
5573
5574         for (i = 0; i < 6; i++)
5575           sel[i] += exact_div (nelt, 2);
5576         indices.new_vector (sel, 2, nelt);
5577         perm_mask_low = vect_gen_perm_mask_checked (vectype, indices);
5578
5579         for (i = 0, n = log_length; i < n; i++)
5580           {
5581             for (j = 0; j < length/2; j++)
5582               {
5583                 vect1 = dr_chain[j];
5584                 vect2 = dr_chain[j+length/2];
5585
5586                 /* Create interleaving stmt:
5587                    high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1,
5588                                                         ...}>  */
5589                 high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
5590                 perm_stmt = gimple_build_assign (high, VEC_PERM_EXPR, vect1,
5591                                                  vect2, perm_mask_high);
5592                 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
5593                 (*result_chain)[2*j] = high;
5594
5595                 /* Create interleaving stmt:
5596                    low = VEC_PERM_EXPR <vect1, vect2,
5597                                         {nelt/2, nelt*3/2, nelt/2+1, nelt*3/2+1,
5598                                          ...}>  */
5599                 low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
5600                 perm_stmt = gimple_build_assign (low, VEC_PERM_EXPR, vect1,
5601                                                  vect2, perm_mask_low);
5602                 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
5603                 (*result_chain)[2*j+1] = low;
5604               }
5605             memcpy (dr_chain.address (), result_chain->address (),
5606                     length * sizeof (tree));
5607           }
5608     }
5609 }
5610
5611 /* Function vect_setup_realignment
5612
5613    This function is called when vectorizing an unaligned load using
5614    the dr_explicit_realign[_optimized] scheme.
5615    This function generates the following code at the loop prolog:
5616
5617       p = initial_addr;
5618    x  msq_init = *(floor(p));   # prolog load
5619       realignment_token = call target_builtin;
5620     loop:
5621    x  msq = phi (msq_init, ---)
5622
5623    The stmts marked with x are generated only for the case of
5624    dr_explicit_realign_optimized.
5625
5626    The code above sets up a new (vector) pointer, pointing to the first
5627    location accessed by STMT_INFO, and a "floor-aligned" load using that
5628    pointer.  It also generates code to compute the "realignment-token"
5629    (if the relevant target hook was defined), and creates a phi-node at the
5630    loop-header bb whose arguments are the result of the prolog-load (created
5631    by this function) and the result of a load that takes place in the loop
5632    (to be created by the caller to this function).
5633
5634    For the case of dr_explicit_realign_optimized:
5635    The caller to this function uses the phi-result (msq) to create the
5636    realignment code inside the loop, and sets up the missing phi argument,
5637    as follows:
5638     loop:
5639       msq = phi (msq_init, lsq)
5640       lsq = *(floor(p'));        # load in loop
5641       result = realign_load (msq, lsq, realignment_token);
5642
5643    For the case of dr_explicit_realign:
5644     loop:
5645       msq = *(floor(p));        # load in loop
5646       p' = p + (VS-1);
5647       lsq = *(floor(p'));       # load in loop
5648       result = realign_load (msq, lsq, realignment_token);
5649
5650    Input:
5651    STMT_INFO - (scalar) load stmt to be vectorized. This load accesses
5652                a memory location that may be unaligned.
5653    BSI - place where new code is to be inserted.
5654    ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
5655                               is used.
5656
5657    Output:
5658    REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
5659                        target hook, if defined.
5660    Return value - the result of the loop-header phi node.  */
5661
5662 tree
5663 vect_setup_realignment (vec_info *vinfo, stmt_vec_info stmt_info,
5664                         gimple_stmt_iterator *gsi, tree *realignment_token,
5665                         enum dr_alignment_support alignment_support_scheme,
5666                         tree init_addr,
5667                         class loop **at_loop)
5668 {
5669   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5670   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5671   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
5672   struct data_reference *dr = dr_info->dr;
5673   class loop *loop = NULL;
5674   edge pe = NULL;
5675   tree scalar_dest = gimple_assign_lhs (stmt_info->stmt);
5676   tree vec_dest;
5677   gimple *inc;
5678   tree ptr;
5679   tree data_ref;
5680   basic_block new_bb;
5681   tree msq_init = NULL_TREE;
5682   tree new_temp;
5683   gphi *phi_stmt;
5684   tree msq = NULL_TREE;
5685   gimple_seq stmts = NULL;
5686   bool compute_in_loop = false;
5687   bool nested_in_vect_loop = false;
5688   class loop *containing_loop = (gimple_bb (stmt_info->stmt))->loop_father;
5689   class loop *loop_for_initial_load = NULL;
5690
5691   if (loop_vinfo)
5692     {
5693       loop = LOOP_VINFO_LOOP (loop_vinfo);
5694       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
5695     }
5696
5697   gcc_assert (alignment_support_scheme == dr_explicit_realign
5698               || alignment_support_scheme == dr_explicit_realign_optimized);
5699
5700   /* We need to generate three things:
5701      1. the misalignment computation
5702      2. the extra vector load (for the optimized realignment scheme).
5703      3. the phi node for the two vectors from which the realignment is
5704       done (for the optimized realignment scheme).  */
5705
5706   /* 1. Determine where to generate the misalignment computation.
5707
5708      If INIT_ADDR is NULL_TREE, this indicates that the misalignment
5709      calculation will be generated by this function, outside the loop (in the
5710      preheader).  Otherwise, INIT_ADDR had already been computed for us by the
5711      caller, inside the loop.
5712
5713      Background: If the misalignment remains fixed throughout the iterations of
5714      the loop, then both realignment schemes are applicable, and also the
5715      misalignment computation can be done outside LOOP.  This is because we are
5716      vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
5717      are a multiple of VS (the Vector Size), and therefore the misalignment in
5718      different vectorized LOOP iterations is always the same.
5719      The problem arises only if the memory access is in an inner-loop nested
5720      inside LOOP, which is now being vectorized using outer-loop vectorization.
5721      This is the only case when the misalignment of the memory access may not
5722      remain fixed throughout the iterations of the inner-loop (as explained in
5723      detail in vect_supportable_dr_alignment).  In this case, not only is the
5724      optimized realignment scheme not applicable, but also the misalignment
5725      computation (and generation of the realignment token that is passed to
5726      REALIGN_LOAD) have to be done inside the loop.
5727
5728      In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
5729      or not, which in turn determines if the misalignment is computed inside
5730      the inner-loop, or outside LOOP.  */
5731
5732   if (init_addr != NULL_TREE || !loop_vinfo)
5733     {
5734       compute_in_loop = true;
5735       gcc_assert (alignment_support_scheme == dr_explicit_realign);
5736     }
5737
5738
5739   /* 2. Determine where to generate the extra vector load.
5740
5741      For the optimized realignment scheme, instead of generating two vector
5742      loads in each iteration, we generate a single extra vector load in the
5743      preheader of the loop, and in each iteration reuse the result of the
5744      vector load from the previous iteration.  In case the memory access is in
5745      an inner-loop nested inside LOOP, which is now being vectorized using
5746      outer-loop vectorization, we need to determine whether this initial vector
5747      load should be generated at the preheader of the inner-loop, or can be
5748      generated at the preheader of LOOP.  If the memory access has no evolution
5749      in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
5750      to be generated inside LOOP (in the preheader of the inner-loop).  */
5751
5752   if (nested_in_vect_loop)
5753     {
5754       tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
5755       bool invariant_in_outerloop =
5756             (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
5757       loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
5758     }
5759   else
5760     loop_for_initial_load = loop;
5761   if (at_loop)
5762     *at_loop = loop_for_initial_load;
5763
5764   if (loop_for_initial_load)
5765     pe = loop_preheader_edge (loop_for_initial_load);
5766
5767   /* 3. For the case of the optimized realignment, create the first vector
5768       load at the loop preheader.  */
5769
5770   if (alignment_support_scheme == dr_explicit_realign_optimized)
5771     {
5772       /* Create msq_init = *(floor(p1)) in the loop preheader  */
5773       gassign *new_stmt;
5774
5775       gcc_assert (!compute_in_loop);
5776       vec_dest = vect_create_destination_var (scalar_dest, vectype);
5777       ptr = vect_create_data_ref_ptr (vinfo, stmt_info, vectype,
5778                                       loop_for_initial_load, NULL_TREE,
5779                                       &init_addr, NULL, &inc, true);
5780       if (TREE_CODE (ptr) == SSA_NAME)
5781         new_temp = copy_ssa_name (ptr);
5782       else
5783         new_temp = make_ssa_name (TREE_TYPE (ptr));
5784       poly_uint64 align = DR_TARGET_ALIGNMENT (dr_info);
5785       tree type = TREE_TYPE (ptr);
5786       new_stmt = gimple_build_assign
5787                    (new_temp, BIT_AND_EXPR, ptr,
5788                     fold_build2 (MINUS_EXPR, type,
5789                                  build_int_cst (type, 0),
5790                                  build_int_cst (type, align)));
5791       new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
5792       gcc_assert (!new_bb);
5793       data_ref
5794         = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
5795                   build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
5796       vect_copy_ref_info (data_ref, DR_REF (dr));
5797       new_stmt = gimple_build_assign (vec_dest, data_ref);
5798       new_temp = make_ssa_name (vec_dest, new_stmt);
5799       gimple_assign_set_lhs (new_stmt, new_temp);
5800       if (pe)
5801         {
5802           new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
5803           gcc_assert (!new_bb);
5804         }
5805       else
5806          gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
5807
5808       msq_init = gimple_assign_lhs (new_stmt);
5809     }
5810
5811   /* 4. Create realignment token using a target builtin, if available.
5812       It is done either inside the containing loop, or before LOOP (as
5813       determined above).  */
5814
5815   if (targetm.vectorize.builtin_mask_for_load)
5816     {
5817       gcall *new_stmt;
5818       tree builtin_decl;
5819
5820       /* Compute INIT_ADDR - the initial addressed accessed by this memref.  */
5821       if (!init_addr)
5822         {
5823           /* Generate the INIT_ADDR computation outside LOOP.  */
5824           init_addr = vect_create_addr_base_for_vector_ref (vinfo,
5825                                                             stmt_info, &stmts,
5826                                                             NULL_TREE);
5827           if (loop)
5828             {
5829               pe = loop_preheader_edge (loop);
5830               new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
5831               gcc_assert (!new_bb);
5832             }
5833           else
5834              gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
5835         }
5836
5837       builtin_decl = targetm.vectorize.builtin_mask_for_load ();
5838       new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
5839       vec_dest =
5840         vect_create_destination_var (scalar_dest,
5841                                      gimple_call_return_type (new_stmt));
5842       new_temp = make_ssa_name (vec_dest, new_stmt);
5843       gimple_call_set_lhs (new_stmt, new_temp);
5844
5845       if (compute_in_loop)
5846         gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
5847       else
5848         {
5849           /* Generate the misalignment computation outside LOOP.  */
5850           pe = loop_preheader_edge (loop);
5851           new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
5852           gcc_assert (!new_bb);
5853         }
5854
5855       *realignment_token = gimple_call_lhs (new_stmt);
5856
5857       /* The result of the CALL_EXPR to this builtin is determined from
5858          the value of the parameter and no global variables are touched
5859          which makes the builtin a "const" function.  Requiring the
5860          builtin to have the "const" attribute makes it unnecessary
5861          to call mark_call_clobbered.  */
5862       gcc_assert (TREE_READONLY (builtin_decl));
5863     }
5864
5865   if (alignment_support_scheme == dr_explicit_realign)
5866     return msq;
5867
5868   gcc_assert (!compute_in_loop);
5869   gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
5870
5871
5872   /* 5. Create msq = phi <msq_init, lsq> in loop  */
5873
5874   pe = loop_preheader_edge (containing_loop);
5875   vec_dest = vect_create_destination_var (scalar_dest, vectype);
5876   msq = make_ssa_name (vec_dest);
5877   phi_stmt = create_phi_node (msq, containing_loop->header);
5878   add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
5879
5880   return msq;
5881 }
5882
5883
5884 /* Function vect_grouped_load_supported.
5885
5886    COUNT is the size of the load group (the number of statements plus the
5887    number of gaps).  SINGLE_ELEMENT_P is true if there is actually
5888    only one statement, with a gap of COUNT - 1.
5889
5890    Returns true if a suitable permute exists.  */
5891
5892 bool
5893 vect_grouped_load_supported (tree vectype, bool single_element_p,
5894                              unsigned HOST_WIDE_INT count)
5895 {
5896   machine_mode mode = TYPE_MODE (vectype);
5897
5898   /* If this is single-element interleaving with an element distance
5899      that leaves unused vector loads around punt - we at least create
5900      very sub-optimal code in that case (and blow up memory,
5901      see PR65518).  */
5902   if (single_element_p && maybe_gt (count, TYPE_VECTOR_SUBPARTS (vectype)))
5903     {
5904       if (dump_enabled_p ())
5905         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5906                          "single-element interleaving not supported "
5907                          "for not adjacent vector loads\n");
5908       return false;
5909     }
5910
5911   /* vect_permute_load_chain requires the group size to be equal to 3 or
5912      be a power of two.  */
5913   if (count != 3 && exact_log2 (count) == -1)
5914     {
5915       if (dump_enabled_p ())
5916         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5917                          "the size of the group of accesses"
5918                          " is not a power of 2 or not equal to 3\n");
5919       return false;
5920     }
5921
5922   /* Check that the permutation is supported.  */
5923   if (VECTOR_MODE_P (mode))
5924     {
5925       unsigned int i, j;
5926       if (count == 3)
5927         {
5928           unsigned int nelt;
5929           if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
5930             {
5931               if (dump_enabled_p ())
5932                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5933                                  "cannot handle groups of 3 loads for"
5934                                  " variable-length vectors\n");
5935               return false;
5936             }
5937
5938           vec_perm_builder sel (nelt, nelt, 1);
5939           sel.quick_grow (nelt);
5940           vec_perm_indices indices;
5941           unsigned int k;
5942           for (k = 0; k < 3; k++)
5943             {
5944               for (i = 0; i < nelt; i++)
5945                 if (3 * i + k < 2 * nelt)
5946                   sel[i] = 3 * i + k;
5947                 else
5948                   sel[i] = 0;
5949               indices.new_vector (sel, 2, nelt);
5950               if (!can_vec_perm_const_p (mode, indices))
5951                 {
5952                   if (dump_enabled_p ())
5953                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5954                                      "shuffle of 3 loads is not supported by"
5955                                      " target\n");
5956                   return false;
5957                 }
5958               for (i = 0, j = 0; i < nelt; i++)
5959                 if (3 * i + k < 2 * nelt)
5960                   sel[i] = i;
5961                 else
5962                   sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
5963               indices.new_vector (sel, 2, nelt);
5964               if (!can_vec_perm_const_p (mode, indices))
5965                 {
5966                   if (dump_enabled_p ())
5967                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5968                                      "shuffle of 3 loads is not supported by"
5969                                      " target\n");
5970                   return false;
5971                 }
5972             }
5973           return true;
5974         }
5975       else
5976         {
5977           /* If length is not equal to 3 then only power of 2 is supported.  */
5978           gcc_assert (pow2p_hwi (count));
5979           poly_uint64 nelt = GET_MODE_NUNITS (mode);
5980
5981           /* The encoding has a single stepped pattern.  */
5982           vec_perm_builder sel (nelt, 1, 3);
5983           sel.quick_grow (3);
5984           for (i = 0; i < 3; i++)
5985             sel[i] = i * 2;
5986           vec_perm_indices indices (sel, 2, nelt);
5987           if (can_vec_perm_const_p (mode, indices))
5988             {
5989               for (i = 0; i < 3; i++)
5990                 sel[i] = i * 2 + 1;
5991               indices.new_vector (sel, 2, nelt);
5992               if (can_vec_perm_const_p (mode, indices))
5993                 return true;
5994             }
5995         }
5996     }
5997
5998   if (dump_enabled_p ())
5999     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6000                      "extract even/odd not supported by target\n");
6001   return false;
6002 }
6003
6004 /* Return TRUE if vec_{masked_}load_lanes is available for COUNT vectors of
6005    type VECTYPE.  MASKED_P says whether the masked form is needed.  */
6006
6007 bool
6008 vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
6009                            bool masked_p)
6010 {
6011   if (masked_p)
6012     return vect_lanes_optab_supported_p ("vec_mask_load_lanes",
6013                                          vec_mask_load_lanes_optab,
6014                                          vectype, count);
6015   else
6016     return vect_lanes_optab_supported_p ("vec_load_lanes",
6017                                          vec_load_lanes_optab,
6018                                          vectype, count);
6019 }
6020
6021 /* Function vect_permute_load_chain.
6022
6023    Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
6024    a power of 2 or equal to 3, generate extract_even/odd stmts to reorder
6025    the input data correctly.  Return the final references for loads in
6026    RESULT_CHAIN.
6027
6028    E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
6029    The input is 4 vectors each containing 8 elements. We assign a number to each
6030    element, the input sequence is:
6031
6032    1st vec:   0  1  2  3  4  5  6  7
6033    2nd vec:   8  9 10 11 12 13 14 15
6034    3rd vec:  16 17 18 19 20 21 22 23
6035    4th vec:  24 25 26 27 28 29 30 31
6036
6037    The output sequence should be:
6038
6039    1st vec:  0 4  8 12 16 20 24 28
6040    2nd vec:  1 5  9 13 17 21 25 29
6041    3rd vec:  2 6 10 14 18 22 26 30
6042    4th vec:  3 7 11 15 19 23 27 31
6043
6044    i.e., the first output vector should contain the first elements of each
6045    interleaving group, etc.
6046
6047    We use extract_even/odd instructions to create such output.  The input of
6048    each extract_even/odd operation is two vectors
6049    1st vec    2nd vec
6050    0 1 2 3    4 5 6 7
6051
6052    and the output is the vector of extracted even/odd elements.  The output of
6053    extract_even will be:   0 2 4 6
6054    and of extract_odd:     1 3 5 7
6055
6056
6057    The permutation is done in log LENGTH stages.  In each stage extract_even
6058    and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
6059    their order.  In our example,
6060
6061    E1: extract_even (1st vec, 2nd vec)
6062    E2: extract_odd (1st vec, 2nd vec)
6063    E3: extract_even (3rd vec, 4th vec)
6064    E4: extract_odd (3rd vec, 4th vec)
6065
6066    The output for the first stage will be:
6067
6068    E1:  0  2  4  6  8 10 12 14
6069    E2:  1  3  5  7  9 11 13 15
6070    E3: 16 18 20 22 24 26 28 30
6071    E4: 17 19 21 23 25 27 29 31
6072
6073    In order to proceed and create the correct sequence for the next stage (or
6074    for the correct output, if the second stage is the last one, as in our
6075    example), we first put the output of extract_even operation and then the
6076    output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
6077    The input for the second stage is:
6078
6079    1st vec (E1):  0  2  4  6  8 10 12 14
6080    2nd vec (E3): 16 18 20 22 24 26 28 30
6081    3rd vec (E2):  1  3  5  7  9 11 13 15
6082    4th vec (E4): 17 19 21 23 25 27 29 31
6083
6084    The output of the second stage:
6085
6086    E1: 0 4  8 12 16 20 24 28
6087    E2: 2 6 10 14 18 22 26 30
6088    E3: 1 5  9 13 17 21 25 29
6089    E4: 3 7 11 15 19 23 27 31
6090
6091    And RESULT_CHAIN after reordering:
6092
6093    1st vec (E1):  0 4  8 12 16 20 24 28
6094    2nd vec (E3):  1 5  9 13 17 21 25 29
6095    3rd vec (E2):  2 6 10 14 18 22 26 30
6096    4th vec (E4):  3 7 11 15 19 23 27 31.  */
6097
6098 static void
6099 vect_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain,
6100                          unsigned int length,
6101                          stmt_vec_info stmt_info,
6102                          gimple_stmt_iterator *gsi,
6103                          vec<tree> *result_chain)
6104 {
6105   tree data_ref, first_vect, second_vect;
6106   tree perm_mask_even, perm_mask_odd;
6107   tree perm3_mask_low, perm3_mask_high;
6108   gimple *perm_stmt;
6109   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6110   unsigned int i, j, log_length = exact_log2 (length);
6111
6112   result_chain->quick_grow (length);
6113   memcpy (result_chain->address (), dr_chain.address (),
6114           length * sizeof (tree));
6115
6116   if (length == 3)
6117     {
6118       /* vect_grouped_load_supported ensures that this is constant.  */
6119       unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
6120       unsigned int k;
6121
6122       vec_perm_builder sel (nelt, nelt, 1);
6123       sel.quick_grow (nelt);
6124       vec_perm_indices indices;
6125       for (k = 0; k < 3; k++)
6126         {
6127           for (i = 0; i < nelt; i++)
6128             if (3 * i + k < 2 * nelt)
6129               sel[i] = 3 * i + k;
6130             else
6131               sel[i] = 0;
6132           indices.new_vector (sel, 2, nelt);
6133           perm3_mask_low = vect_gen_perm_mask_checked (vectype, indices);
6134
6135           for (i = 0, j = 0; i < nelt; i++)
6136             if (3 * i + k < 2 * nelt)
6137               sel[i] = i;
6138             else
6139               sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
6140           indices.new_vector (sel, 2, nelt);
6141           perm3_mask_high = vect_gen_perm_mask_checked (vectype, indices);
6142
6143           first_vect = dr_chain[0];
6144           second_vect = dr_chain[1];
6145
6146           /* Create interleaving stmt (low part of):
6147              low = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
6148                                                              ...}>  */
6149           data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low");
6150           perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, first_vect,
6151                                            second_vect, perm3_mask_low);
6152           vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6153
6154           /* Create interleaving stmt (high part of):
6155              high = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
6156                                                               ...}>  */
6157           first_vect = data_ref;
6158           second_vect = dr_chain[2];
6159           data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high");
6160           perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, first_vect,
6161                                            second_vect, perm3_mask_high);
6162           vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6163           (*result_chain)[k] = data_ref;
6164         }
6165     }
6166   else
6167     {
6168       /* If length is not equal to 3 then only power of 2 is supported.  */
6169       gcc_assert (pow2p_hwi (length));
6170
6171       /* The encoding has a single stepped pattern.  */
6172       poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (vectype);
6173       vec_perm_builder sel (nelt, 1, 3);
6174       sel.quick_grow (3);
6175       for (i = 0; i < 3; ++i)
6176         sel[i] = i * 2;
6177       vec_perm_indices indices (sel, 2, nelt);
6178       perm_mask_even = vect_gen_perm_mask_checked (vectype, indices);
6179
6180       for (i = 0; i < 3; ++i)
6181         sel[i] = i * 2 + 1;
6182       indices.new_vector (sel, 2, nelt);
6183       perm_mask_odd = vect_gen_perm_mask_checked (vectype, indices);
6184
6185       for (i = 0; i < log_length; i++)
6186         {
6187           for (j = 0; j < length; j += 2)
6188             {
6189               first_vect = dr_chain[j];
6190               second_vect = dr_chain[j+1];
6191
6192               /* data_ref = permute_even (first_data_ref, second_data_ref);  */
6193               data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_even");
6194               perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6195                                                first_vect, second_vect,
6196                                                perm_mask_even);
6197               vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6198               (*result_chain)[j/2] = data_ref;
6199
6200               /* data_ref = permute_odd (first_data_ref, second_data_ref);  */
6201               data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_odd");
6202               perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6203                                                first_vect, second_vect,
6204                                                perm_mask_odd);
6205               vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6206               (*result_chain)[j/2+length/2] = data_ref;
6207             }
6208           memcpy (dr_chain.address (), result_chain->address (),
6209                   length * sizeof (tree));
6210         }
6211     }
6212 }
6213
6214 /* Function vect_shift_permute_load_chain.
6215
6216    Given a chain of loads in DR_CHAIN of LENGTH 2 or 3, generate
6217    sequence of stmts to reorder the input data accordingly.
6218    Return the final references for loads in RESULT_CHAIN.
6219    Return true if successed, false otherwise.
6220
6221    E.g., LENGTH is 3 and the scalar type is short, i.e., VF is 8.
6222    The input is 3 vectors each containing 8 elements.  We assign a
6223    number to each element, the input sequence is:
6224
6225    1st vec:   0  1  2  3  4  5  6  7
6226    2nd vec:   8  9 10 11 12 13 14 15
6227    3rd vec:  16 17 18 19 20 21 22 23
6228
6229    The output sequence should be:
6230
6231    1st vec:  0 3 6  9 12 15 18 21
6232    2nd vec:  1 4 7 10 13 16 19 22
6233    3rd vec:  2 5 8 11 14 17 20 23
6234
6235    We use 3 shuffle instructions and 3 * 3 - 1 shifts to create such output.
6236
6237    First we shuffle all 3 vectors to get correct elements order:
6238
6239    1st vec:  ( 0  3  6) ( 1  4  7) ( 2  5)
6240    2nd vec:  ( 8 11 14) ( 9 12 15) (10 13)
6241    3rd vec:  (16 19 22) (17 20 23) (18 21)
6242
6243    Next we unite and shift vector 3 times:
6244
6245    1st step:
6246      shift right by 6 the concatenation of:
6247      "1st vec" and  "2nd vec"
6248        ( 0  3  6) ( 1  4  7) |( 2  5) _ ( 8 11 14) ( 9 12 15)| (10 13)
6249      "2nd vec" and  "3rd vec"
6250        ( 8 11 14) ( 9 12 15) |(10 13) _ (16 19 22) (17 20 23)| (18 21)
6251      "3rd vec" and  "1st vec"
6252        (16 19 22) (17 20 23) |(18 21) _ ( 0  3  6) ( 1  4  7)| ( 2  5)
6253                              | New vectors                   |
6254
6255      So that now new vectors are:
6256
6257      1st vec:  ( 2  5) ( 8 11 14) ( 9 12 15)
6258      2nd vec:  (10 13) (16 19 22) (17 20 23)
6259      3rd vec:  (18 21) ( 0  3  6) ( 1  4  7)
6260
6261    2nd step:
6262      shift right by 5 the concatenation of:
6263      "1st vec" and  "3rd vec"
6264        ( 2  5) ( 8 11 14) |( 9 12 15) _ (18 21) ( 0  3  6)| ( 1  4  7)
6265      "2nd vec" and  "1st vec"
6266        (10 13) (16 19 22) |(17 20 23) _ ( 2  5) ( 8 11 14)| ( 9 12 15)
6267      "3rd vec" and  "2nd vec"
6268        (18 21) ( 0  3  6) |( 1  4  7) _ (10 13) (16 19 22)| (17 20 23)
6269                           | New vectors                   |
6270
6271      So that now new vectors are:
6272
6273      1st vec:  ( 9 12 15) (18 21) ( 0  3  6)
6274      2nd vec:  (17 20 23) ( 2  5) ( 8 11 14)
6275      3rd vec:  ( 1  4  7) (10 13) (16 19 22) READY
6276
6277    3rd step:
6278      shift right by 5 the concatenation of:
6279      "1st vec" and  "1st vec"
6280        ( 9 12 15) (18 21) |( 0  3  6) _ ( 9 12 15) (18 21)| ( 0  3  6)
6281      shift right by 3 the concatenation of:
6282      "2nd vec" and  "2nd vec"
6283                (17 20 23) |( 2  5) ( 8 11 14) _ (17 20 23)| ( 2  5) ( 8 11 14)
6284                           | New vectors                   |
6285
6286      So that now all vectors are READY:
6287      1st vec:  ( 0  3  6) ( 9 12 15) (18 21)
6288      2nd vec:  ( 2  5) ( 8 11 14) (17 20 23)
6289      3rd vec:  ( 1  4  7) (10 13) (16 19 22)
6290
6291    This algorithm is faster than one in vect_permute_load_chain if:
6292      1.  "shift of a concatination" is faster than general permutation.
6293          This is usually so.
6294      2.  The TARGET machine can't execute vector instructions in parallel.
6295          This is because each step of the algorithm depends on previous.
6296          The algorithm in vect_permute_load_chain is much more parallel.
6297
6298    The algorithm is applicable only for LOAD CHAIN LENGTH less than VF.
6299 */
6300
6301 static bool
6302 vect_shift_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain,
6303                                unsigned int length,
6304                                stmt_vec_info stmt_info,
6305                                gimple_stmt_iterator *gsi,
6306                                vec<tree> *result_chain)
6307 {
6308   tree vect[3], vect_shift[3], data_ref, first_vect, second_vect;
6309   tree perm2_mask1, perm2_mask2, perm3_mask;
6310   tree select_mask, shift1_mask, shift2_mask, shift3_mask, shift4_mask;
6311   gimple *perm_stmt;
6312
6313   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6314   unsigned int i;
6315   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6316
6317   unsigned HOST_WIDE_INT nelt, vf;
6318   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nelt)
6319       || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
6320     /* Not supported for variable-length vectors.  */
6321     return false;
6322
6323   vec_perm_builder sel (nelt, nelt, 1);
6324   sel.quick_grow (nelt);
6325
6326   result_chain->quick_grow (length);
6327   memcpy (result_chain->address (), dr_chain.address (),
6328           length * sizeof (tree));
6329
6330   if (pow2p_hwi (length) && vf > 4)
6331     {
6332       unsigned int j, log_length = exact_log2 (length);
6333       for (i = 0; i < nelt / 2; ++i)
6334         sel[i] = i * 2;
6335       for (i = 0; i < nelt / 2; ++i)
6336         sel[nelt / 2 + i] = i * 2 + 1;
6337       vec_perm_indices indices (sel, 2, nelt);
6338       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
6339         {
6340           if (dump_enabled_p ())
6341             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6342                              "shuffle of 2 fields structure is not \
6343                               supported by target\n");
6344           return false;
6345         }
6346       perm2_mask1 = vect_gen_perm_mask_checked (vectype, indices);
6347
6348       for (i = 0; i < nelt / 2; ++i)
6349         sel[i] = i * 2 + 1;
6350       for (i = 0; i < nelt / 2; ++i)
6351         sel[nelt / 2 + i] = i * 2;
6352       indices.new_vector (sel, 2, nelt);
6353       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
6354         {
6355           if (dump_enabled_p ())
6356             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6357                              "shuffle of 2 fields structure is not \
6358                               supported by target\n");
6359           return false;
6360         }
6361       perm2_mask2 = vect_gen_perm_mask_checked (vectype, indices);
6362
6363       /* Generating permutation constant to shift all elements.
6364          For vector length 8 it is {4 5 6 7 8 9 10 11}.  */
6365       for (i = 0; i < nelt; i++)
6366         sel[i] = nelt / 2 + i;
6367       indices.new_vector (sel, 2, nelt);
6368       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
6369         {
6370           if (dump_enabled_p ())
6371             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6372                              "shift permutation is not supported by target\n");
6373           return false;
6374         }
6375       shift1_mask = vect_gen_perm_mask_checked (vectype, indices);
6376
6377       /* Generating permutation constant to select vector from 2.
6378          For vector length 8 it is {0 1 2 3 12 13 14 15}.  */
6379       for (i = 0; i < nelt / 2; i++)
6380         sel[i] = i;
6381       for (i = nelt / 2; i < nelt; i++)
6382         sel[i] = nelt + i;
6383       indices.new_vector (sel, 2, nelt);
6384       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
6385         {
6386           if (dump_enabled_p ())
6387             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6388                              "select is not supported by target\n");
6389           return false;
6390         }
6391       select_mask = vect_gen_perm_mask_checked (vectype, indices);
6392
6393       for (i = 0; i < log_length; i++)
6394         {
6395           for (j = 0; j < length; j += 2)
6396             {
6397               first_vect = dr_chain[j];
6398               second_vect = dr_chain[j + 1];
6399
6400               data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
6401               perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6402                                                first_vect, first_vect,
6403                                                perm2_mask1);
6404               vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6405               vect[0] = data_ref;
6406
6407               data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
6408               perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6409                                                second_vect, second_vect,
6410                                                perm2_mask2);
6411               vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6412               vect[1] = data_ref;
6413
6414               data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift");
6415               perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6416                                                vect[0], vect[1], shift1_mask);
6417               vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6418               (*result_chain)[j/2 + length/2] = data_ref;
6419
6420               data_ref = make_temp_ssa_name (vectype, NULL, "vect_select");
6421               perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6422                                                vect[0], vect[1], select_mask);
6423               vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6424               (*result_chain)[j/2] = data_ref;
6425             }
6426           memcpy (dr_chain.address (), result_chain->address (),
6427                   length * sizeof (tree));
6428         }
6429       return true;
6430     }
6431   if (length == 3 && vf > 2)
6432     {
6433       unsigned int k = 0, l = 0;
6434
6435       /* Generating permutation constant to get all elements in rigth order.
6436          For vector length 8 it is {0 3 6 1 4 7 2 5}.  */
6437       for (i = 0; i < nelt; i++)
6438         {
6439           if (3 * k + (l % 3) >= nelt)
6440             {
6441               k = 0;
6442               l += (3 - (nelt % 3));
6443             }
6444           sel[i] = 3 * k + (l % 3);
6445           k++;
6446         }
6447       vec_perm_indices indices (sel, 2, nelt);
6448       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
6449         {
6450           if (dump_enabled_p ())
6451             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6452                              "shuffle of 3 fields structure is not \
6453                               supported by target\n");
6454           return false;
6455         }
6456       perm3_mask = vect_gen_perm_mask_checked (vectype, indices);
6457
6458       /* Generating permutation constant to shift all elements.
6459          For vector length 8 it is {6 7 8 9 10 11 12 13}.  */
6460       for (i = 0; i < nelt; i++)
6461         sel[i] = 2 * (nelt / 3) + (nelt % 3) + i;
6462       indices.new_vector (sel, 2, nelt);
6463       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
6464         {
6465           if (dump_enabled_p ())
6466             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6467                              "shift permutation is not supported by target\n");
6468           return false;
6469         }
6470       shift1_mask = vect_gen_perm_mask_checked (vectype, indices);
6471
6472       /* Generating permutation constant to shift all elements.
6473          For vector length 8 it is {5 6 7 8 9 10 11 12}.  */
6474       for (i = 0; i < nelt; i++)
6475         sel[i] = 2 * (nelt / 3) + 1 + i;
6476       indices.new_vector (sel, 2, nelt);
6477       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
6478         {
6479           if (dump_enabled_p ())
6480             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6481                              "shift permutation is not supported by target\n");
6482           return false;
6483         }
6484       shift2_mask = vect_gen_perm_mask_checked (vectype, indices);
6485
6486       /* Generating permutation constant to shift all elements.
6487          For vector length 8 it is {3 4 5 6 7 8 9 10}.  */
6488       for (i = 0; i < nelt; i++)
6489         sel[i] = (nelt / 3) + (nelt % 3) / 2 + i;
6490       indices.new_vector (sel, 2, nelt);
6491       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
6492         {
6493           if (dump_enabled_p ())
6494             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6495                              "shift permutation is not supported by target\n");
6496           return false;
6497         }
6498       shift3_mask = vect_gen_perm_mask_checked (vectype, indices);
6499
6500       /* Generating permutation constant to shift all elements.
6501          For vector length 8 it is {5 6 7 8 9 10 11 12}.  */
6502       for (i = 0; i < nelt; i++)
6503         sel[i] = 2 * (nelt / 3) + (nelt % 3) / 2 + i;
6504       indices.new_vector (sel, 2, nelt);
6505       if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
6506         {
6507           if (dump_enabled_p ())
6508             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6509                              "shift permutation is not supported by target\n");
6510           return false;
6511         }
6512       shift4_mask = vect_gen_perm_mask_checked (vectype, indices);
6513
6514       for (k = 0; k < 3; k++)
6515         {
6516           data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3");
6517           perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6518                                            dr_chain[k], dr_chain[k],
6519                                            perm3_mask);
6520           vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6521           vect[k] = data_ref;
6522         }
6523
6524       for (k = 0; k < 3; k++)
6525         {
6526           data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift1");
6527           perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6528                                            vect[k % 3], vect[(k + 1) % 3],
6529                                            shift1_mask);
6530           vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6531           vect_shift[k] = data_ref;
6532         }
6533
6534       for (k = 0; k < 3; k++)
6535         {
6536           data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift2");
6537           perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6538                                            vect_shift[(4 - k) % 3],
6539                                            vect_shift[(3 - k) % 3],
6540                                            shift2_mask);
6541           vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6542           vect[k] = data_ref;
6543         }
6544
6545       (*result_chain)[3 - (nelt % 3)] = vect[2];
6546
6547       data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift3");
6548       perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect[0],
6549                                        vect[0], shift3_mask);
6550       vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6551       (*result_chain)[nelt % 3] = data_ref;
6552
6553       data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift4");
6554       perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect[1],
6555                                        vect[1], shift4_mask);
6556       vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6557       (*result_chain)[0] = data_ref;
6558       return true;
6559     }
6560   return false;
6561 }
6562
6563 /* Function vect_transform_grouped_load.
6564
6565    Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
6566    to perform their permutation and ascribe the result vectorized statements to
6567    the scalar statements.
6568 */
6569
6570 void
6571 vect_transform_grouped_load (vec_info *vinfo, stmt_vec_info stmt_info,
6572                              vec<tree> dr_chain,
6573                              int size, gimple_stmt_iterator *gsi)
6574 {
6575   machine_mode mode;
6576   vec<tree> result_chain = vNULL;
6577
6578   /* DR_CHAIN contains input data-refs that are a part of the interleaving.
6579      RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
6580      vectors, that are ready for vector computation.  */
6581   result_chain.create (size);
6582
6583   /* If reassociation width for vector type is 2 or greater target machine can
6584      execute 2 or more vector instructions in parallel.  Otherwise try to
6585      get chain for loads group using vect_shift_permute_load_chain.  */
6586   mode = TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info));
6587   if (targetm.sched.reassociation_width (VEC_PERM_EXPR, mode) > 1
6588       || pow2p_hwi (size)
6589       || !vect_shift_permute_load_chain (vinfo, dr_chain, size, stmt_info,
6590                                          gsi, &result_chain))
6591     vect_permute_load_chain (vinfo, dr_chain,
6592                              size, stmt_info, gsi, &result_chain);
6593   vect_record_grouped_load_vectors (vinfo, stmt_info, result_chain);
6594   result_chain.release ();
6595 }
6596
6597 /* RESULT_CHAIN contains the output of a group of grouped loads that were
6598    generated as part of the vectorization of STMT_INFO.  Assign the statement
6599    for each vector to the associated scalar statement.  */
6600
6601 void
6602 vect_record_grouped_load_vectors (vec_info *, stmt_vec_info stmt_info,
6603                                   vec<tree> result_chain)
6604 {
6605   stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
6606   unsigned int i, gap_count;
6607   tree tmp_data_ref;
6608
6609   /* Put a permuted data-ref in the VECTORIZED_STMT field.
6610      Since we scan the chain starting from it's first node, their order
6611      corresponds the order of data-refs in RESULT_CHAIN.  */
6612   stmt_vec_info next_stmt_info = first_stmt_info;
6613   gap_count = 1;
6614   FOR_EACH_VEC_ELT (result_chain, i, tmp_data_ref)
6615     {
6616       if (!next_stmt_info)
6617         break;
6618
6619       /* Skip the gaps.  Loads created for the gaps will be removed by dead
6620        code elimination pass later.  No need to check for the first stmt in
6621        the group, since it always exists.
6622        DR_GROUP_GAP is the number of steps in elements from the previous
6623        access (if there is no gap DR_GROUP_GAP is 1).  We skip loads that
6624        correspond to the gaps.  */
6625       if (next_stmt_info != first_stmt_info
6626           && gap_count < DR_GROUP_GAP (next_stmt_info))
6627         {
6628           gap_count++;
6629           continue;
6630         }
6631
6632       /* ???  The following needs cleanup after the removal of
6633          DR_GROUP_SAME_DR_STMT.  */
6634       if (next_stmt_info)
6635         {
6636           gimple *new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
6637           /* We assume that if VEC_STMT is not NULL, this is a case of multiple
6638              copies, and we put the new vector statement last.  */
6639           STMT_VINFO_VEC_STMTS (next_stmt_info).safe_push (new_stmt);
6640
6641           next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6642           gap_count = 1;
6643         }
6644     }
6645 }
6646
6647 /* Function vect_force_dr_alignment_p.
6648
6649    Returns whether the alignment of a DECL can be forced to be aligned
6650    on ALIGNMENT bit boundary.  */
6651
6652 bool
6653 vect_can_force_dr_alignment_p (const_tree decl, poly_uint64 alignment)
6654 {
6655   if (!VAR_P (decl))
6656     return false;
6657
6658   if (decl_in_symtab_p (decl)
6659       && !symtab_node::get (decl)->can_increase_alignment_p ())
6660     return false;
6661
6662   if (TREE_STATIC (decl))
6663     return (known_le (alignment,
6664                       (unsigned HOST_WIDE_INT) MAX_OFILE_ALIGNMENT));
6665   else
6666     return (known_le (alignment, (unsigned HOST_WIDE_INT) MAX_STACK_ALIGNMENT));
6667 }
6668
6669 /* Return whether the data reference DR_INFO is supported with respect to its
6670    alignment.
6671    If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
6672    it is aligned, i.e., check if it is possible to vectorize it with different
6673    alignment.  */
6674
6675 enum dr_alignment_support
6676 vect_supportable_dr_alignment (vec_info *vinfo, dr_vec_info *dr_info,
6677                                tree vectype, int misalignment)
6678 {
6679   data_reference *dr = dr_info->dr;
6680   stmt_vec_info stmt_info = dr_info->stmt;
6681   machine_mode mode = TYPE_MODE (vectype);
6682   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6683   class loop *vect_loop = NULL;
6684   bool nested_in_vect_loop = false;
6685
6686   if (misalignment == 0)
6687     return dr_aligned;
6688
6689   /* For now assume all conditional loads/stores support unaligned
6690      access without any special code.  */
6691   if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
6692     if (gimple_call_internal_p (stmt)
6693         && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
6694             || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
6695       return dr_unaligned_supported;
6696
6697   if (loop_vinfo)
6698     {
6699       vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
6700       nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt_info);
6701     }
6702
6703   /* Possibly unaligned access.  */
6704
6705   /* We can choose between using the implicit realignment scheme (generating
6706      a misaligned_move stmt) and the explicit realignment scheme (generating
6707      aligned loads with a REALIGN_LOAD).  There are two variants to the
6708      explicit realignment scheme: optimized, and unoptimized.
6709      We can optimize the realignment only if the step between consecutive
6710      vector loads is equal to the vector size.  Since the vector memory
6711      accesses advance in steps of VS (Vector Size) in the vectorized loop, it
6712      is guaranteed that the misalignment amount remains the same throughout the
6713      execution of the vectorized loop.  Therefore, we can create the
6714      "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
6715      at the loop preheader.
6716
6717      However, in the case of outer-loop vectorization, when vectorizing a
6718      memory access in the inner-loop nested within the LOOP that is now being
6719      vectorized, while it is guaranteed that the misalignment of the
6720      vectorized memory access will remain the same in different outer-loop
6721      iterations, it is *not* guaranteed that is will remain the same throughout
6722      the execution of the inner-loop.  This is because the inner-loop advances
6723      with the original scalar step (and not in steps of VS).  If the inner-loop
6724      step happens to be a multiple of VS, then the misalignment remains fixed
6725      and we can use the optimized realignment scheme.  For example:
6726
6727       for (i=0; i<N; i++)
6728         for (j=0; j<M; j++)
6729           s += a[i+j];
6730
6731      When vectorizing the i-loop in the above example, the step between
6732      consecutive vector loads is 1, and so the misalignment does not remain
6733      fixed across the execution of the inner-loop, and the realignment cannot
6734      be optimized (as illustrated in the following pseudo vectorized loop):
6735
6736       for (i=0; i<N; i+=4)
6737         for (j=0; j<M; j++){
6738           vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
6739                          // when j is {0,1,2,3,4,5,6,7,...} respectively.
6740                          // (assuming that we start from an aligned address).
6741           }
6742
6743      We therefore have to use the unoptimized realignment scheme:
6744
6745       for (i=0; i<N; i+=4)
6746           for (j=k; j<M; j+=4)
6747           vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
6748                            // that the misalignment of the initial address is
6749                            // 0).
6750
6751      The loop can then be vectorized as follows:
6752
6753       for (k=0; k<4; k++){
6754         rt = get_realignment_token (&vp[k]);
6755         for (i=0; i<N; i+=4){
6756           v1 = vp[i+k];
6757           for (j=k; j<M; j+=4){
6758             v2 = vp[i+j+VS-1];
6759             va = REALIGN_LOAD <v1,v2,rt>;
6760             vs += va;
6761             v1 = v2;
6762           }
6763         }
6764     } */
6765
6766   if (DR_IS_READ (dr))
6767     {
6768       if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
6769           && (!targetm.vectorize.builtin_mask_for_load
6770               || targetm.vectorize.builtin_mask_for_load ()))
6771         {
6772           /* If we are doing SLP then the accesses need not have the
6773              same alignment, instead it depends on the SLP group size.  */
6774           if (loop_vinfo
6775               && STMT_SLP_TYPE (stmt_info)
6776               && !multiple_p (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6777                               * (DR_GROUP_SIZE
6778                                  (DR_GROUP_FIRST_ELEMENT (stmt_info))),
6779                               TYPE_VECTOR_SUBPARTS (vectype)))
6780             ;
6781           else if (!loop_vinfo
6782                    || (nested_in_vect_loop
6783                        && maybe_ne (TREE_INT_CST_LOW (DR_STEP (dr)),
6784                                     GET_MODE_SIZE (TYPE_MODE (vectype)))))
6785             return dr_explicit_realign;
6786           else
6787             return dr_explicit_realign_optimized;
6788         }
6789     }
6790
6791   bool is_packed = false;
6792   tree type = TREE_TYPE (DR_REF (dr));
6793   if (misalignment == DR_MISALIGNMENT_UNKNOWN)
6794     is_packed = not_size_aligned (DR_REF (dr));
6795   if (targetm.vectorize.support_vector_misalignment (mode, type, misalignment,
6796                                                      is_packed))
6797     return dr_unaligned_supported;
6798
6799   /* Unsupported.  */
6800   return dr_unaligned_unsupported;
6801 }