gcc/tree-vect-patterns.cc

   1 /* Analysis Utilities for Loop Vectorization.
   2    Copyright (C) 2006-2022 Free Software Foundation, Inc.
   3    Contributed by Dorit Nuzman <dorit@il.ibm.com>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "backend.h"
  25 #include "rtl.h"
  26 #include "tree.h"
  27 #include "gimple.h"
  28 #include "ssa.h"
  29 #include "expmed.h"
  30 #include "optabs-tree.h"
  31 #include "insn-config.h"
  32 #include "recog.h"              /* FIXME: for insn_data */
  33 #include "fold-const.h"
  34 #include "stor-layout.h"
  35 #include "tree-eh.h"
  36 #include "gimplify.h"
  37 #include "gimple-iterator.h"
  38 #include "cfgloop.h"
  39 #include "tree-vectorizer.h"
  40 #include "dumpfile.h"
  41 #include "builtins.h"
  42 #include "internal-fn.h"
  43 #include "case-cfn-macros.h"
  44 #include "fold-const-call.h"
  45 #include "attribs.h"
  46 #include "cgraph.h"
  47 #include "omp-simd-clone.h"
  48 #include "predict.h"
  49 #include "tree-vector-builder.h"
  50 #include "vec-perm-indices.h"
  51 #include "gimple-range.h"
  52
  53
  54 /* TODO:  Note the vectorizer still builds COND_EXPRs with GENERIC compares
  55    in the first operand.  Disentangling this is future work, the
  56    IL is properly transfered to VEC_COND_EXPRs with separate compares.  */
  57
  58
  59 /* Return true if we have a useful VR_RANGE range for VAR, storing it
  60    in *MIN_VALUE and *MAX_VALUE if so.  Note the range in the dump files.  */
  61
  62 static bool
  63 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
  64 {
  65   value_range vr;
  66   get_range_query (cfun)->range_of_expr (vr, var);
  67   if (vr.undefined_p ())
  68     vr.set_varying (TREE_TYPE (var));
  69   *min_value = wi::to_wide (vr.min ());
  70   *max_value = wi::to_wide (vr.max ());
  71   value_range_kind vr_type = vr.kind ();
  72   wide_int nonzero = get_nonzero_bits (var);
  73   signop sgn = TYPE_SIGN (TREE_TYPE (var));
  74   if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
  75                                          nonzero, sgn) == VR_RANGE)
  76     {
  77       if (dump_enabled_p ())
  78         {
  79           dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
  80           dump_printf (MSG_NOTE, " has range [");
  81           dump_hex (MSG_NOTE, *min_value);
  82           dump_printf (MSG_NOTE, ", ");
  83           dump_hex (MSG_NOTE, *max_value);
  84           dump_printf (MSG_NOTE, "]\n");
  85         }
  86       return true;
  87     }
  88   else
  89     {
  90       if (dump_enabled_p ())
  91         {
  92           dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
  93           dump_printf (MSG_NOTE, " has no range info\n");
  94         }
  95       return false;
  96     }
  97 }
  98
  99 /* Report that we've found an instance of pattern PATTERN in
 100    statement STMT.  */
 101
 102 static void
 103 vect_pattern_detected (const char *name, gimple *stmt)
 104 {
 105   if (dump_enabled_p ())
 106     dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
 107 }
 108
 109 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
 110    return the pattern statement's stmt_vec_info.  Set its vector type to
 111    VECTYPE if it doesn't have one already.  */
 112
 113 static stmt_vec_info
 114 vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
 115                         stmt_vec_info orig_stmt_info, tree vectype)
 116 {
 117   stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
 118   if (pattern_stmt_info == NULL)
 119     pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
 120   gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
 121
 122   pattern_stmt_info->pattern_stmt_p = true;
 123   STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
 124   STMT_VINFO_DEF_TYPE (pattern_stmt_info)
 125     = STMT_VINFO_DEF_TYPE (orig_stmt_info);
 126   if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
 127     {
 128       gcc_assert (!vectype
 129                   || (VECTOR_BOOLEAN_TYPE_P (vectype)
 130                       == vect_use_mask_type_p (orig_stmt_info)));
 131       STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
 132       pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
 133     }
 134   return pattern_stmt_info;
 135 }
 136
 137 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
 138    Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
 139    have one already.  */
 140
 141 static void
 142 vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
 143                        stmt_vec_info orig_stmt_info, tree vectype)
 144 {
 145   STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
 146   STMT_VINFO_RELATED_STMT (orig_stmt_info)
 147     = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
 148 }
 149
 150 /* Add NEW_STMT to STMT_INFO's pattern definition statements.  If VECTYPE
 151    is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
 152    be different from the vector type of the final pattern statement.
 153    If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
 154    from which it was derived.  */
 155
 156 static inline void
 157 append_pattern_def_seq (vec_info *vinfo,
 158                         stmt_vec_info stmt_info, gimple *new_stmt,
 159                         tree vectype = NULL_TREE,
 160                         tree scalar_type_for_mask = NULL_TREE)
 161 {
 162   gcc_assert (!scalar_type_for_mask
 163               == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
 164   if (vectype)
 165     {
 166       stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
 167       STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
 168       if (scalar_type_for_mask)
 169         new_stmt_info->mask_precision
 170           = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
 171     }
 172   gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
 173                                       new_stmt);
 174 }
 175
 176 /* The caller wants to perform new operations on vect_external variable
 177    VAR, so that the result of the operations would also be vect_external.
 178    Return the edge on which the operations can be performed, if one exists.
 179    Return null if the operations should instead be treated as part of
 180    the pattern that needs them.  */
 181
 182 static edge
 183 vect_get_external_def_edge (vec_info *vinfo, tree var)
 184 {
 185   edge e = NULL;
 186   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
 187     {
 188       e = loop_preheader_edge (loop_vinfo->loop);
 189       if (!SSA_NAME_IS_DEFAULT_DEF (var))
 190         {
 191           basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 192           if (bb == NULL
 193               || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
 194             e = NULL;
 195         }
 196     }
 197   return e;
 198 }
 199
 200 /* Return true if the target supports a vector version of CODE,
 201    where CODE is known to map to a direct optab with the given SUBTYPE.
 202    ITYPE specifies the type of (some of) the scalar inputs and OTYPE
 203    specifies the type of the scalar result.
 204
 205    If CODE allows the inputs and outputs to have different type
 206    (such as for WIDEN_SUM_EXPR), it is the input mode rather
 207    than the output mode that determines the appropriate target pattern.
 208    Operand 0 of the target pattern then specifies the mode that the output
 209    must have.
 210
 211    When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
 212    Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
 213    is nonnull.  */
 214
 215 static bool
 216 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
 217                                  tree itype, tree *vecotype_out,
 218                                  tree *vecitype_out = NULL,
 219                                  enum optab_subtype subtype = optab_default)
 220 {
 221   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
 222   if (!vecitype)
 223     return false;
 224
 225   tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
 226   if (!vecotype)
 227     return false;
 228
 229   optab optab = optab_for_tree_code (code, vecitype, subtype);
 230   if (!optab)
 231     return false;
 232
 233   insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
 234   if (icode == CODE_FOR_nothing
 235       || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
 236     return false;
 237
 238   *vecotype_out = vecotype;
 239   if (vecitype_out)
 240     *vecitype_out = vecitype;
 241   return true;
 242 }
 243
 244 /* Round bit precision PRECISION up to a full element.  */
 245
 246 static unsigned int
 247 vect_element_precision (unsigned int precision)
 248 {
 249   precision = 1 << ceil_log2 (precision);
 250   return MAX (precision, BITS_PER_UNIT);
 251 }
 252
 253 /* If OP is defined by a statement that's being considered for vectorization,
 254    return information about that statement, otherwise return NULL.  */
 255
 256 static stmt_vec_info
 257 vect_get_internal_def (vec_info *vinfo, tree op)
 258 {
 259   stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
 260   if (def_stmt_info
 261       && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
 262     return def_stmt_info;
 263   return NULL;
 264 }
 265
 266 /* Check whether NAME, an ssa-name used in STMT_VINFO,
 267    is a result of a type promotion, such that:
 268      DEF_STMT: NAME = NOP (name0)
 269    If CHECK_SIGN is TRUE, check that either both types are signed or both are
 270    unsigned.  */
 271
 272 static bool
 273 type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
 274                    tree *orig_type, gimple **def_stmt, bool *promotion)
 275 {
 276   tree type = TREE_TYPE (name);
 277   tree oprnd0;
 278   enum vect_def_type dt;
 279
 280   stmt_vec_info def_stmt_info;
 281   if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
 282     return false;
 283
 284   if (dt != vect_internal_def
 285       && dt != vect_external_def && dt != vect_constant_def)
 286     return false;
 287
 288   if (!*def_stmt)
 289     return false;
 290
 291   if (!is_gimple_assign (*def_stmt))
 292     return false;
 293
 294   if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
 295     return false;
 296
 297   oprnd0 = gimple_assign_rhs1 (*def_stmt);
 298
 299   *orig_type = TREE_TYPE (oprnd0);
 300   if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
 301       || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
 302     return false;
 303
 304   if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
 305     *promotion = true;
 306   else
 307     *promotion = false;
 308
 309   if (!vect_is_simple_use (oprnd0, vinfo, &dt))
 310     return false;
 311
 312   return true;
 313 }
 314
 315 /* Holds information about an input operand after some sign changes
 316    and type promotions have been peeled away.  */
 317 class vect_unpromoted_value {
 318 public:
 319   vect_unpromoted_value ();
 320
 321   void set_op (tree, vect_def_type, stmt_vec_info = NULL);
 322
 323   /* The value obtained after peeling away zero or more casts.  */
 324   tree op;
 325
 326   /* The type of OP.  */
 327   tree type;
 328
 329   /* The definition type of OP.  */
 330   vect_def_type dt;
 331
 332   /* If OP is the result of peeling at least one cast, and if the cast
 333      of OP itself is a vectorizable statement, CASTER identifies that
 334      statement, otherwise it is null.  */
 335   stmt_vec_info caster;
 336 };
 337
 338 inline vect_unpromoted_value::vect_unpromoted_value ()
 339   : op (NULL_TREE),
 340     type (NULL_TREE),
 341     dt (vect_uninitialized_def),
 342     caster (NULL)
 343 {
 344 }
 345
 346 /* Set the operand to OP_IN, its definition type to DT_IN, and the
 347    statement that casts it to CASTER_IN.  */
 348
 349 inline void
 350 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
 351                                stmt_vec_info caster_in)
 352 {
 353   op = op_in;
 354   type = TREE_TYPE (op);
 355   dt = dt_in;
 356   caster = caster_in;
 357 }
 358
 359 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
 360    to reach some vectorizable inner operand OP', continuing as long as it
 361    is possible to convert OP' back to OP using a possible sign change
 362    followed by a possible promotion P.  Return this OP', or null if OP is
 363    not a vectorizable SSA name.  If there is a promotion P, describe its
 364    input in UNPROM, otherwise describe OP' in UNPROM.  If SINGLE_USE_P
 365    is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
 366    have more than one user.
 367
 368    A successful return means that it is possible to go from OP' to OP
 369    via UNPROM.  The cast from OP' to UNPROM is at most a sign change,
 370    whereas the cast from UNPROM to OP might be a promotion, a sign
 371    change, or a nop.
 372
 373    E.g. say we have:
 374
 375        signed short *ptr = ...;
 376        signed short C = *ptr;
 377        unsigned short B = (unsigned short) C;    // sign change
 378        signed int A = (signed int) B;            // unsigned promotion
 379        ...possible other uses of A...
 380        unsigned int OP = (unsigned int) A;       // sign change
 381
 382    In this case it's possible to go directly from C to OP using:
 383
 384        OP = (unsigned int) (unsigned short) C;
 385             +------------+ +--------------+
 386                promotion      sign change
 387
 388    so OP' would be C.  The input to the promotion is B, so UNPROM
 389    would describe B.  */
 390
 391 static tree
 392 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
 393                                       vect_unpromoted_value *unprom,
 394                                       bool *single_use_p = NULL)
 395 {
 396   tree res = NULL_TREE;
 397   tree op_type = TREE_TYPE (op);
 398   unsigned int orig_precision = TYPE_PRECISION (op_type);
 399   unsigned int min_precision = orig_precision;
 400   stmt_vec_info caster = NULL;
 401   while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
 402     {
 403       /* See whether OP is simple enough to vectorize.  */
 404       stmt_vec_info def_stmt_info;
 405       gimple *def_stmt;
 406       vect_def_type dt;
 407       if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
 408         break;
 409
 410       /* If OP is the input of a demotion, skip over it to see whether
 411          OP is itself the result of a promotion.  If so, the combined
 412          effect of the promotion and the demotion might fit the required
 413          pattern, otherwise neither operation fits.
 414
 415          This copes with cases such as the result of an arithmetic
 416          operation being truncated before being stored, and where that
 417          arithmetic operation has been recognized as an over-widened one.  */
 418       if (TYPE_PRECISION (op_type) <= min_precision)
 419         {
 420           /* Use OP as the UNPROM described above if we haven't yet
 421              found a promotion, or if using the new input preserves the
 422              sign of the previous promotion.  */
 423           if (!res
 424               || TYPE_PRECISION (unprom->type) == orig_precision
 425               || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
 426             {
 427               unprom->set_op (op, dt, caster);
 428               min_precision = TYPE_PRECISION (op_type);
 429             }
 430           /* Stop if we've already seen a promotion and if this
 431              conversion does more than change the sign.  */
 432           else if (TYPE_PRECISION (op_type)
 433                    != TYPE_PRECISION (unprom->type))
 434             break;
 435
 436           /* The sequence now extends to OP.  */
 437           res = op;
 438         }
 439
 440       /* See whether OP is defined by a cast.  Record it as CASTER if
 441          the cast is potentially vectorizable.  */
 442       if (!def_stmt)
 443         break;
 444       caster = def_stmt_info;
 445
 446       /* Ignore pattern statements, since we don't link uses for them.  */
 447       if (caster
 448           && single_use_p
 449           && !STMT_VINFO_RELATED_STMT (caster)
 450           && !has_single_use (res))
 451         *single_use_p = false;
 452
 453       gassign *assign = dyn_cast <gassign *> (def_stmt);
 454       if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
 455         break;
 456
 457       /* Continue with the input to the cast.  */
 458       op = gimple_assign_rhs1 (def_stmt);
 459       op_type = TREE_TYPE (op);
 460     }
 461   return res;
 462 }
 463
 464 /* OP is an integer operand to an operation that returns TYPE, and we
 465    want to treat the operation as a widening one.  So far we can treat
 466    it as widening from *COMMON_TYPE.
 467
 468    Return true if OP is suitable for such a widening operation,
 469    either widening from *COMMON_TYPE or from some supertype of it.
 470    Update *COMMON_TYPE to the supertype in the latter case.
 471
 472    SHIFT_P is true if OP is a shift amount.  */
 473
 474 static bool
 475 vect_joust_widened_integer (tree type, bool shift_p, tree op,
 476                             tree *common_type)
 477 {
 478   /* Calculate the minimum precision required by OP, without changing
 479      the sign of either operand.  */
 480   unsigned int precision;
 481   if (shift_p)
 482     {
 483       if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
 484         return false;
 485       precision = TREE_INT_CST_LOW (op);
 486     }
 487   else
 488     {
 489       precision = wi::min_precision (wi::to_widest (op),
 490                                      TYPE_SIGN (*common_type));
 491       if (precision * 2 > TYPE_PRECISION (type))
 492         return false;
 493     }
 494
 495   /* If OP requires a wider type, switch to that type.  The checks
 496      above ensure that this is still narrower than the result.  */
 497   precision = vect_element_precision (precision);
 498   if (TYPE_PRECISION (*common_type) < precision)
 499     *common_type = build_nonstandard_integer_type
 500       (precision, TYPE_UNSIGNED (*common_type));
 501   return true;
 502 }
 503
 504 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
 505    is narrower than type, storing the supertype in *COMMON_TYPE if so.  */
 506
 507 static bool
 508 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
 509 {
 510   if (types_compatible_p (*common_type, new_type))
 511     return true;
 512
 513   /* See if *COMMON_TYPE can hold all values of NEW_TYPE.  */
 514   if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
 515       && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
 516     return true;
 517
 518   /* See if NEW_TYPE can hold all values of *COMMON_TYPE.  */
 519   if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
 520       && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
 521     {
 522       *common_type = new_type;
 523       return true;
 524     }
 525
 526   /* We have mismatched signs, with the signed type being
 527      no wider than the unsigned type.  In this case we need
 528      a wider signed type.  */
 529   unsigned int precision = MAX (TYPE_PRECISION (*common_type),
 530                                 TYPE_PRECISION (new_type));
 531   precision *= 2;
 532
 533   if (precision * 2 > TYPE_PRECISION (type))
 534     return false;
 535
 536   *common_type = build_nonstandard_integer_type (precision, false);
 537   return true;
 538 }
 539
 540 /* Check whether STMT_INFO can be viewed as a tree of integer operations
 541    in which each node either performs CODE or WIDENED_CODE, and where
 542    each leaf operand is narrower than the result of STMT_INFO.  MAX_NOPS
 543    specifies the maximum number of leaf operands.  SHIFT_P says whether
 544    CODE and WIDENED_CODE are some sort of shift.
 545
 546    If STMT_INFO is such a tree, return the number of leaf operands
 547    and describe them in UNPROM[0] onwards.  Also set *COMMON_TYPE
 548    to a type that (a) is narrower than the result of STMT_INFO and
 549    (b) can hold all leaf operand values.
 550
 551    If SUBTYPE then allow that the signs of the operands
 552    may differ in signs but not in precision.  SUBTYPE is updated to reflect
 553    this.
 554
 555    Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
 556    exists.  */
 557
 558 static unsigned int
 559 vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
 560                       tree_code widened_code, bool shift_p,
 561                       unsigned int max_nops,
 562                       vect_unpromoted_value *unprom, tree *common_type,
 563                       enum optab_subtype *subtype = NULL)
 564 {
 565   /* Check for an integer operation with the right code.  */
 566   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
 567   if (!assign)
 568     return 0;
 569
 570   tree_code rhs_code = gimple_assign_rhs_code (assign);
 571   if (rhs_code != code && rhs_code != widened_code)
 572     return 0;
 573
 574   tree type = TREE_TYPE (gimple_assign_lhs (assign));
 575   if (!INTEGRAL_TYPE_P (type))
 576     return 0;
 577
 578   /* Assume that both operands will be leaf operands.  */
 579   max_nops -= 2;
 580
 581   /* Check the operands.  */
 582   unsigned int next_op = 0;
 583   for (unsigned int i = 0; i < 2; ++i)
 584     {
 585       vect_unpromoted_value *this_unprom = &unprom[next_op];
 586       unsigned int nops = 1;
 587       tree op = gimple_op (assign, i + 1);
 588       if (i == 1 && TREE_CODE (op) == INTEGER_CST)
 589         {
 590           /* We already have a common type from earlier operands.
 591              Update it to account for OP.  */
 592           this_unprom->set_op (op, vect_constant_def);
 593           if (!vect_joust_widened_integer (type, shift_p, op, common_type))
 594             return 0;
 595         }
 596       else
 597         {
 598           /* Only allow shifts by constants.  */
 599           if (shift_p && i == 1)
 600             return 0;
 601
 602           if (!vect_look_through_possible_promotion (vinfo, op, this_unprom))
 603             return 0;
 604
 605           if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
 606             {
 607               /* The operand isn't widened.  If STMT_INFO has the code
 608                  for an unwidened operation, recursively check whether
 609                  this operand is a node of the tree.  */
 610               if (rhs_code != code
 611                   || max_nops == 0
 612                   || this_unprom->dt != vect_internal_def)
 613                 return 0;
 614
 615               /* Give back the leaf slot allocated above now that we're
 616                  not treating this as a leaf operand.  */
 617               max_nops += 1;
 618
 619               /* Recursively process the definition of the operand.  */
 620               stmt_vec_info def_stmt_info
 621                 = vinfo->lookup_def (this_unprom->op);
 622               nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
 623                                            widened_code, shift_p, max_nops,
 624                                            this_unprom, common_type,
 625                                            subtype);
 626               if (nops == 0)
 627                 return 0;
 628
 629               max_nops -= nops;
 630             }
 631           else
 632             {
 633               /* Make sure that the operand is narrower than the result.  */
 634               if (TYPE_PRECISION (this_unprom->type) * 2
 635                   > TYPE_PRECISION (type))
 636                 return 0;
 637
 638               /* Update COMMON_TYPE for the new operand.  */
 639               if (i == 0)
 640                 *common_type = this_unprom->type;
 641               else if (!vect_joust_widened_type (type, this_unprom->type,
 642                                                  common_type))
 643                 {
 644                   if (subtype)
 645                     {
 646                       /* See if we can sign extend the smaller type.  */
 647                       if (TYPE_PRECISION (this_unprom->type)
 648                           > TYPE_PRECISION (*common_type))
 649                         *common_type = this_unprom->type;
 650                       *subtype = optab_vector_mixed_sign;
 651                     }
 652                   else
 653                     return 0;
 654                 }
 655             }
 656         }
 657       next_op += nops;
 658     }
 659   return next_op;
 660 }
 661
 662 /* Helper to return a new temporary for pattern of TYPE for STMT.  If STMT
 663    is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
 664
 665 static tree
 666 vect_recog_temp_ssa_var (tree type, gimple *stmt)
 667 {
 668   return make_temp_ssa_name (type, stmt, "patt");
 669 }
 670
 671 /* STMT2_INFO describes a type conversion that could be split into STMT1
 672    followed by a version of STMT2_INFO that takes NEW_RHS as its first
 673    input.  Try to do this using pattern statements, returning true on
 674    success.  */
 675
 676 static bool
 677 vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
 678                       gimple *stmt1, tree vectype)
 679 {
 680   if (is_pattern_stmt_p (stmt2_info))
 681     {
 682       /* STMT2_INFO is part of a pattern.  Get the statement to which
 683          the pattern is attached.  */
 684       stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
 685       vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
 686
 687       if (dump_enabled_p ())
 688         dump_printf_loc (MSG_NOTE, vect_location,
 689                          "Splitting pattern statement: %G", stmt2_info->stmt);
 690
 691       /* Since STMT2_INFO is a pattern statement, we can change it
 692          in-situ without worrying about changing the code for the
 693          containing block.  */
 694       gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
 695
 696       if (dump_enabled_p ())
 697         {
 698           dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
 699           dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
 700                            stmt2_info->stmt);
 701         }
 702
 703       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
 704       if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
 705         /* STMT2_INFO is the actual pattern statement.  Add STMT1
 706            to the end of the definition sequence.  */
 707         gimple_seq_add_stmt_without_update (def_seq, stmt1);
 708       else
 709         {
 710           /* STMT2_INFO belongs to the definition sequence.  Insert STMT1
 711              before it.  */
 712           gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
 713           gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
 714         }
 715       return true;
 716     }
 717   else
 718     {
 719       /* STMT2_INFO doesn't yet have a pattern.  Try to create a
 720          two-statement pattern now.  */
 721       gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
 722       tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
 723       tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
 724       if (!lhs_vectype)
 725         return false;
 726
 727       if (dump_enabled_p ())
 728         dump_printf_loc (MSG_NOTE, vect_location,
 729                          "Splitting statement: %G", stmt2_info->stmt);
 730
 731       /* Add STMT1 as a singleton pattern definition sequence.  */
 732       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
 733       vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
 734       gimple_seq_add_stmt_without_update (def_seq, stmt1);
 735
 736       /* Build the second of the two pattern statements.  */
 737       tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
 738       gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
 739       vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
 740
 741       if (dump_enabled_p ())
 742         {
 743           dump_printf_loc (MSG_NOTE, vect_location,
 744                            "into pattern statements: %G", stmt1);
 745           dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
 746                            (gimple *) new_stmt2);
 747         }
 748
 749       return true;
 750     }
 751 }
 752
 753 /* Convert UNPROM to TYPE and return the result, adding new statements
 754    to STMT_INFO's pattern definition statements if no better way is
 755    available.  VECTYPE is the vector form of TYPE.
 756
 757    If SUBTYPE then convert the type based on the subtype.  */
 758
 759 static tree
 760 vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
 761                     vect_unpromoted_value *unprom, tree vectype,
 762                     enum optab_subtype subtype = optab_default)
 763 {
 764   /* Update the type if the signs differ.  */
 765   if (subtype == optab_vector_mixed_sign)
 766     {
 767       gcc_assert (!TYPE_UNSIGNED (type));
 768       if (TYPE_UNSIGNED (TREE_TYPE (unprom->op)))
 769         {
 770           type = unsigned_type_for (type);
 771           vectype = unsigned_type_for (vectype);
 772         }
 773     }
 774
 775   /* Check for a no-op conversion.  */
 776   if (types_compatible_p (type, TREE_TYPE (unprom->op)))
 777     return unprom->op;
 778
 779   /* Allow the caller to create constant vect_unpromoted_values.  */
 780   if (TREE_CODE (unprom->op) == INTEGER_CST)
 781     return wide_int_to_tree (type, wi::to_widest (unprom->op));
 782
 783   tree input = unprom->op;
 784   if (unprom->caster)
 785     {
 786       tree lhs = gimple_get_lhs (unprom->caster->stmt);
 787       tree lhs_type = TREE_TYPE (lhs);
 788
 789       /* If the result of the existing cast is the right width, use it
 790          instead of the source of the cast.  */
 791       if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
 792         input = lhs;
 793       /* If the precision we want is between the source and result
 794          precisions of the existing cast, try splitting the cast into
 795          two and tapping into a mid-way point.  */
 796       else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
 797                && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
 798         {
 799           /* In order to preserve the semantics of the original cast,
 800              give the mid-way point the same signedness as the input value.
 801
 802              It would be possible to use a signed type here instead if
 803              TYPE is signed and UNPROM->TYPE is unsigned, but that would
 804              make the sign of the midtype sensitive to the order in
 805              which we process the statements, since the signedness of
 806              TYPE is the signedness required by just one of possibly
 807              many users.  Also, unsigned promotions are usually as cheap
 808              as or cheaper than signed ones, so it's better to keep an
 809              unsigned promotion.  */
 810           tree midtype = build_nonstandard_integer_type
 811             (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
 812           tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
 813           if (vec_midtype)
 814             {
 815               input = vect_recog_temp_ssa_var (midtype, NULL);
 816               gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
 817                                                        unprom->op);
 818               if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
 819                                          vec_midtype))
 820                 append_pattern_def_seq (vinfo, stmt_info,
 821                                         new_stmt, vec_midtype);
 822             }
 823         }
 824
 825       /* See if we can reuse an existing result.  */
 826       if (types_compatible_p (type, TREE_TYPE (input)))
 827         return input;
 828     }
 829
 830   /* We need a new conversion statement.  */
 831   tree new_op = vect_recog_temp_ssa_var (type, NULL);
 832   gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
 833
 834   /* If OP is an external value, see if we can insert the new statement
 835      on an incoming edge.  */
 836   if (input == unprom->op && unprom->dt == vect_external_def)
 837     if (edge e = vect_get_external_def_edge (vinfo, input))
 838       {
 839         basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
 840         gcc_assert (!new_bb);
 841         return new_op;
 842       }
 843
 844   /* As a (common) last resort, add the statement to the pattern itself.  */
 845   append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
 846   return new_op;
 847 }
 848
 849 /* Invoke vect_convert_input for N elements of UNPROM and store the
 850    result in the corresponding elements of RESULT.
 851
 852    If SUBTYPE then convert the type based on the subtype.  */
 853
 854 static void
 855 vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
 856                      tree *result, tree type, vect_unpromoted_value *unprom,
 857                      tree vectype, enum optab_subtype subtype = optab_default)
 858 {
 859   for (unsigned int i = 0; i < n; ++i)
 860     {
 861       unsigned int j;
 862       for (j = 0; j < i; ++j)
 863         if (unprom[j].op == unprom[i].op)
 864           break;
 865
 866       if (j < i)
 867         result[i] = result[j];
 868       else
 869         result[i] = vect_convert_input (vinfo, stmt_info,
 870                                         type, &unprom[i], vectype, subtype);
 871     }
 872 }
 873
 874 /* The caller has created a (possibly empty) sequence of pattern definition
 875    statements followed by a single statement PATTERN_STMT.  Cast the result
 876    of this final statement to TYPE.  If a new statement is needed, add
 877    PATTERN_STMT to the end of STMT_INFO's pattern definition statements
 878    and return the new statement, otherwise return PATTERN_STMT as-is.
 879    VECITYPE is the vector form of PATTERN_STMT's result type.  */
 880
 881 static gimple *
 882 vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
 883                      gimple *pattern_stmt, tree vecitype)
 884 {
 885   tree lhs = gimple_get_lhs (pattern_stmt);
 886   if (!types_compatible_p (type, TREE_TYPE (lhs)))
 887     {
 888       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
 889       tree cast_var = vect_recog_temp_ssa_var (type, NULL);
 890       pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
 891     }
 892   return pattern_stmt;
 893 }
 894
 895 /* Return true if STMT_VINFO describes a reduction for which reassociation
 896    is allowed.  If STMT_INFO is part of a group, assume that it's part of
 897    a reduction chain and optimistically assume that all statements
 898    except the last allow reassociation.
 899    Also require it to have code CODE and to be a reduction
 900    in the outermost loop.  When returning true, store the operands in
 901    *OP0_OUT and *OP1_OUT.  */
 902
 903 static bool
 904 vect_reassociating_reduction_p (vec_info *vinfo,
 905                                 stmt_vec_info stmt_info, tree_code code,
 906                                 tree *op0_out, tree *op1_out)
 907 {
 908   loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
 909   if (!loop_info)
 910     return false;
 911
 912   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
 913   if (!assign || gimple_assign_rhs_code (assign) != code)
 914     return false;
 915
 916   /* We don't allow changing the order of the computation in the inner-loop
 917      when doing outer-loop vectorization.  */
 918   class loop *loop = LOOP_VINFO_LOOP (loop_info);
 919   if (loop && nested_in_vect_loop_p (loop, stmt_info))
 920     return false;
 921
 922   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
 923     {
 924       if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
 925                                        code))
 926         return false;
 927     }
 928   else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
 929     return false;
 930
 931   *op0_out = gimple_assign_rhs1 (assign);
 932   *op1_out = gimple_assign_rhs2 (assign);
 933   if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
 934     std::swap (*op0_out, *op1_out);
 935   return true;
 936 }
 937
 938 /* match.pd function to match
 939    (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
 940    with conditions:
 941    1) @1, @2, c, d, a, b are all integral type.
 942    2) There's single_use for both @1 and @2.
 943    3) a, c have same precision.
 944    4) c and @1 have different precision.
 945    5) c, d are the same type or they can differ in sign when convert is
 946    truncation.
 947
 948    record a and c and d and @3.  */
 949
 950 extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree));
 951
 952 /* Function vect_recog_cond_expr_convert
 953
 954    Try to find the following pattern:
 955
 956    TYPE_AB A,B;
 957    TYPE_CD C,D;
 958    TYPE_E E;
 959    TYPE_E op_true = (TYPE_E) A;
 960    TYPE_E op_false = (TYPE_E) B;
 961
 962    E = C cmp D ? op_true : op_false;
 963
 964    where
 965    TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
 966    TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
 967    single_use of op_true and op_false.
 968    TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
 969
 970    Input:
 971
 972    * STMT_VINFO: The stmt from which the pattern search begins.
 973    here it starts with E = c cmp D ? op_true : op_false;
 974
 975    Output:
 976
 977    TYPE1 E' = C cmp D ? A : B;
 978    TYPE3 E = (TYPE3) E';
 979
 980    There may extra nop_convert for A or B to handle different signness.
 981
 982    * TYPE_OUT: The vector type of the output of this pattern.
 983
 984    * Return value: A new stmt that will be used to replace the sequence of
 985    stmts that constitute the pattern. In this case it will be:
 986    E = (TYPE3)E';
 987    E' = C cmp D ? A : B; is recorded in pattern definition statements;  */
 988
 989 static gimple *
 990 vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
 991                                       stmt_vec_info stmt_vinfo, tree *type_out)
 992 {
 993   gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
 994   tree lhs, match[4], temp, type, new_lhs, op2;
 995   gimple *cond_stmt;
 996   gimple *pattern_stmt;
 997
 998   if (!last_stmt)
 999     return NULL;
1000
1001   lhs = gimple_assign_lhs (last_stmt);
1002
1003   /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
1004      TYPE_PRECISION (A) == TYPE_PRECISION (C).  */
1005   if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
1006     return NULL;
1007
1008   vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
1009
1010   op2 = match[2];
1011   type = TREE_TYPE (match[1]);
1012   if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
1013     {
1014       op2 = vect_recog_temp_ssa_var (type, NULL);
1015       gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
1016       append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt,
1017                               get_vectype_for_scalar_type (vinfo, type));
1018     }
1019
1020   temp = vect_recog_temp_ssa_var (type, NULL);
1021   cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
1022                                                  match[1], op2));
1023   append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt,
1024                           get_vectype_for_scalar_type (vinfo, type));
1025   new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
1026   pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp);
1027   *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
1028
1029   if (dump_enabled_p ())
1030     dump_printf_loc (MSG_NOTE, vect_location,
1031                      "created pattern stmt: %G", pattern_stmt);
1032   return pattern_stmt;
1033 }
1034
1035 /* Function vect_recog_dot_prod_pattern
1036
1037    Try to find the following pattern:
1038
1039      type1a x_t
1040      type1b y_t;
1041      TYPE1 prod;
1042      TYPE2 sum = init;
1043    loop:
1044      sum_0 = phi <init, sum_1>
1045      S1  x_t = ...
1046      S2  y_t = ...
1047      S3  x_T = (TYPE1) x_t;
1048      S4  y_T = (TYPE1) y_t;
1049      S5  prod = x_T * y_T;
1050      [S6  prod = (TYPE2) prod;  #optional]
1051      S7  sum_1 = prod + sum_0;
1052
1053    where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
1054    the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
1055    'type1a' and 'type1b' can differ.
1056
1057    Input:
1058
1059    * STMT_VINFO: The stmt from which the pattern search begins.  In the
1060    example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
1061    will be detected.
1062
1063    Output:
1064
1065    * TYPE_OUT: The type of the output  of this pattern.
1066
1067    * Return value: A new stmt that will be used to replace the sequence of
1068    stmts that constitute the pattern. In this case it will be:
1069         WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
1070
1071    Note: The dot-prod idiom is a widening reduction pattern that is
1072          vectorized without preserving all the intermediate results. It
1073          produces only N/2 (widened) results (by summing up pairs of
1074          intermediate results) rather than all N results.  Therefore, we
1075          cannot allow this pattern when we want to get all the results and in
1076          the correct order (as is the case when this computation is in an
1077          inner-loop nested in an outer-loop that us being vectorized).  */
1078
1079 static gimple *
1080 vect_recog_dot_prod_pattern (vec_info *vinfo,
1081                              stmt_vec_info stmt_vinfo, tree *type_out)
1082 {
1083   tree oprnd0, oprnd1;
1084   gimple *last_stmt = stmt_vinfo->stmt;
1085   tree type, half_type;
1086   gimple *pattern_stmt;
1087   tree var;
1088
1089   /* Look for the following pattern
1090           DX = (TYPE1) X;
1091           DY = (TYPE1) Y;
1092           DPROD = DX * DY;
1093           DDPROD = (TYPE2) DPROD;
1094           sum_1 = DDPROD + sum_0;
1095      In which
1096      - DX is double the size of X
1097      - DY is double the size of Y
1098      - DX, DY, DPROD all have the same type but the sign
1099        between X, Y and DPROD can differ.
1100      - sum is the same size of DPROD or bigger
1101      - sum has been recognized as a reduction variable.
1102
1103      This is equivalent to:
1104        DPROD = X w* Y;          #widen mult
1105        sum_1 = DPROD w+ sum_0;  #widen summation
1106      or
1107        DPROD = X w* Y;          #widen mult
1108        sum_1 = DPROD + sum_0;   #summation
1109    */
1110
1111   /* Starting from LAST_STMT, follow the defs of its uses in search
1112      of the above pattern.  */
1113
1114   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1115                                        &oprnd0, &oprnd1))
1116     return NULL;
1117
1118   type = TREE_TYPE (gimple_get_lhs (last_stmt));
1119
1120   vect_unpromoted_value unprom_mult;
1121   oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
1122
1123   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
1124      we know that oprnd1 is the reduction variable (defined by a loop-header
1125      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1126      Left to check that oprnd0 is defined by a (widen_)mult_expr  */
1127   if (!oprnd0)
1128     return NULL;
1129
1130   stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
1131   if (!mult_vinfo)
1132     return NULL;
1133
1134   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
1135      inside the loop (in case we are analyzing an outer-loop).  */
1136   vect_unpromoted_value unprom0[2];
1137   enum optab_subtype subtype = optab_vector;
1138   if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
1139                              false, 2, unprom0, &half_type, &subtype))
1140     return NULL;
1141
1142   /* If there are two widening operations, make sure they agree on the sign
1143      of the extension.  The result of an optab_vector_mixed_sign operation
1144      is signed; otherwise, the result has the same sign as the operands.  */
1145   if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
1146       && (subtype == optab_vector_mixed_sign
1147           ? TYPE_UNSIGNED (unprom_mult.type)
1148           : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
1149     return NULL;
1150
1151   vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
1152
1153   /* If the inputs have mixed signs, canonicalize on using the signed
1154      input type for analysis.  This also helps when emulating mixed-sign
1155      operations using signed operations.  */
1156   if (subtype == optab_vector_mixed_sign)
1157     half_type = signed_type_for (half_type);
1158
1159   tree half_vectype;
1160   if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1161                                         type_out, &half_vectype, subtype))
1162     {
1163       /* We can emulate a mixed-sign dot-product using a sequence of
1164          signed dot-products; see vect_emulate_mixed_dot_prod for details.  */
1165       if (subtype != optab_vector_mixed_sign
1166           || !vect_supportable_direct_optab_p (vinfo, signed_type_for (type),
1167                                                DOT_PROD_EXPR, half_type,
1168                                                type_out, &half_vectype,
1169                                                optab_vector))
1170         return NULL;
1171
1172       *type_out = signed_or_unsigned_type_for (TYPE_UNSIGNED (type),
1173                                                *type_out);
1174     }
1175
1176   /* Get the inputs in the appropriate types.  */
1177   tree mult_oprnd[2];
1178   vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
1179                        unprom0, half_vectype, subtype);
1180
1181   var = vect_recog_temp_ssa_var (type, NULL);
1182   pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1183                                       mult_oprnd[0], mult_oprnd[1], oprnd1);
1184
1185   return pattern_stmt;
1186 }
1187
1188
1189 /* Function vect_recog_sad_pattern
1190
1191    Try to find the following Sum of Absolute Difference (SAD) pattern:
1192
1193      type x_t, y_t;
1194      signed TYPE1 diff, abs_diff;
1195      TYPE2 sum = init;
1196    loop:
1197      sum_0 = phi <init, sum_1>
1198      S1  x_t = ...
1199      S2  y_t = ...
1200      S3  x_T = (TYPE1) x_t;
1201      S4  y_T = (TYPE1) y_t;
1202      S5  diff = x_T - y_T;
1203      S6  abs_diff = ABS_EXPR <diff>;
1204      [S7  abs_diff = (TYPE2) abs_diff;  #optional]
1205      S8  sum_1 = abs_diff + sum_0;
1206
1207    where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1208    same size of 'TYPE1' or bigger. This is a special case of a reduction
1209    computation.
1210
1211    Input:
1212
1213    * STMT_VINFO: The stmt from which the pattern search begins.  In the
1214    example, when this function is called with S8, the pattern
1215    {S3,S4,S5,S6,S7,S8} will be detected.
1216
1217    Output:
1218
1219    * TYPE_OUT: The type of the output of this pattern.
1220
1221    * Return value: A new stmt that will be used to replace the sequence of
1222    stmts that constitute the pattern. In this case it will be:
1223         SAD_EXPR <x_t, y_t, sum_0>
1224   */
1225
1226 static gimple *
1227 vect_recog_sad_pattern (vec_info *vinfo,
1228                         stmt_vec_info stmt_vinfo, tree *type_out)
1229 {
1230   gimple *last_stmt = stmt_vinfo->stmt;
1231   tree half_type;
1232
1233   /* Look for the following pattern
1234           DX = (TYPE1) X;
1235           DY = (TYPE1) Y;
1236           DDIFF = DX - DY;
1237           DAD = ABS_EXPR <DDIFF>;
1238           DDPROD = (TYPE2) DPROD;
1239           sum_1 = DAD + sum_0;
1240      In which
1241      - DX is at least double the size of X
1242      - DY is at least double the size of Y
1243      - DX, DY, DDIFF, DAD all have the same type
1244      - sum is the same size of DAD or bigger
1245      - sum has been recognized as a reduction variable.
1246
1247      This is equivalent to:
1248        DDIFF = X w- Y;          #widen sub
1249        DAD = ABS_EXPR <DDIFF>;
1250        sum_1 = DAD w+ sum_0;    #widen summation
1251      or
1252        DDIFF = X w- Y;          #widen sub
1253        DAD = ABS_EXPR <DDIFF>;
1254        sum_1 = DAD + sum_0;     #summation
1255    */
1256
1257   /* Starting from LAST_STMT, follow the defs of its uses in search
1258      of the above pattern.  */
1259
1260   tree plus_oprnd0, plus_oprnd1;
1261   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1262                                        &plus_oprnd0, &plus_oprnd1))
1263     return NULL;
1264
1265   tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
1266
1267   /* Any non-truncating sequence of conversions is OK here, since
1268      with a successful match, the result of the ABS(U) is known to fit
1269      within the nonnegative range of the result type.  (It cannot be the
1270      negative of the minimum signed value due to the range of the widening
1271      MINUS_EXPR.)  */
1272   vect_unpromoted_value unprom_abs;
1273   plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1274                                                       &unprom_abs);
1275
1276   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
1277      we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1278      phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1279      Then check that plus_oprnd0 is defined by an abs_expr.  */
1280
1281   if (!plus_oprnd0)
1282     return NULL;
1283
1284   stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1285   if (!abs_stmt_vinfo)
1286     return NULL;
1287
1288   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
1289      inside the loop (in case we are analyzing an outer-loop).  */
1290   gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1291   if (!abs_stmt
1292       || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1293           && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1294     return NULL;
1295
1296   tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1297   tree abs_type = TREE_TYPE (abs_oprnd);
1298   if (TYPE_UNSIGNED (abs_type))
1299     return NULL;
1300
1301   /* Peel off conversions from the ABS input.  This can involve sign
1302      changes (e.g. from an unsigned subtraction to a signed ABS input)
1303      or signed promotion, but it can't include unsigned promotion.
1304      (Note that ABS of an unsigned promotion should have been folded
1305      away before now anyway.)  */
1306   vect_unpromoted_value unprom_diff;
1307   abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1308                                                     &unprom_diff);
1309   if (!abs_oprnd)
1310     return NULL;
1311   if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1312       && TYPE_UNSIGNED (unprom_diff.type))
1313     return NULL;
1314
1315   /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
1316   stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1317   if (!diff_stmt_vinfo)
1318     return NULL;
1319
1320   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
1321      inside the loop (in case we are analyzing an outer-loop).  */
1322   vect_unpromoted_value unprom[2];
1323   if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
1324                              false, 2, unprom, &half_type))
1325     return NULL;
1326
1327   vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1328
1329   tree half_vectype;
1330   if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1331                                         type_out, &half_vectype))
1332     return NULL;
1333
1334   /* Get the inputs to the SAD_EXPR in the appropriate types.  */
1335   tree sad_oprnd[2];
1336   vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
1337                        unprom, half_vectype);
1338
1339   tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1340   gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1341                                               sad_oprnd[1], plus_oprnd1);
1342
1343   return pattern_stmt;
1344 }
1345
1346 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1347    so that it can be treated as though it had the form:
1348
1349       A_TYPE a;
1350       B_TYPE b;
1351       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
1352       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
1353     | RES_TYPE a_extend = (RES_TYPE) a_cast;  // promotion from HALF_TYPE
1354     | RES_TYPE b_extend = (RES_TYPE) b_cast;  // promotion from HALF_TYPE
1355     | RES_TYPE res = a_extend ORIG_CODE b_extend;
1356
1357    Try to replace the pattern with:
1358
1359       A_TYPE a;
1360       B_TYPE b;
1361       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
1362       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
1363     | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1364     | RES_TYPE res = (EXT_TYPE) ext;  // possible no-op
1365
1366    where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1367
1368    SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts.  NAME is the
1369    name of the pattern being matched, for dump purposes.  */
1370
1371 static gimple *
1372 vect_recog_widen_op_pattern (vec_info *vinfo,
1373                              stmt_vec_info last_stmt_info, tree *type_out,
1374                              tree_code orig_code, tree_code wide_code,
1375                              bool shift_p, const char *name)
1376 {
1377   gimple *last_stmt = last_stmt_info->stmt;
1378
1379   vect_unpromoted_value unprom[2];
1380   tree half_type;
1381   if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
1382                              shift_p, 2, unprom, &half_type))
1383     return NULL;
1384
1385   /* Pattern detected.  */
1386   vect_pattern_detected (name, last_stmt);
1387
1388   tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
1389   tree itype = type;
1390   if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1391       || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1392     itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1393                                             TYPE_UNSIGNED (half_type));
1394
1395   /* Check target support  */
1396   tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1397   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1398   tree ctype = itype;
1399   tree vecctype = vecitype;
1400   if (orig_code == MINUS_EXPR
1401       && TYPE_UNSIGNED (itype)
1402       && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
1403     {
1404       /* Subtraction is special, even if half_type is unsigned and no matter
1405          whether type is signed or unsigned, if type is wider than itype,
1406          we need to sign-extend from the widening operation result to the
1407          result type.
1408          Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1409          itype unsigned short and type either int or unsigned int.
1410          Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1411          (unsigned short) 0xffff, but for type int we want the result -1
1412          and for type unsigned int 0xffffffff rather than 0xffff.  */
1413       ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
1414       vecctype = get_vectype_for_scalar_type (vinfo, ctype);
1415     }
1416
1417   enum tree_code dummy_code;
1418   int dummy_int;
1419   auto_vec<tree> dummy_vec;
1420   if (!vectype
1421       || !vecitype
1422       || !vecctype
1423       || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1424                                           vecitype, vectype,
1425                                           &dummy_code, &dummy_code,
1426                                           &dummy_int, &dummy_vec))
1427     return NULL;
1428
1429   *type_out = get_vectype_for_scalar_type (vinfo, type);
1430   if (!*type_out)
1431     return NULL;
1432
1433   tree oprnd[2];
1434   vect_convert_inputs (vinfo, last_stmt_info,
1435                        2, oprnd, half_type, unprom, vectype);
1436
1437   tree var = vect_recog_temp_ssa_var (itype, NULL);
1438   gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1439                                               oprnd[0], oprnd[1]);
1440
1441   if (vecctype != vecitype)
1442     pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
1443                                         pattern_stmt, vecitype);
1444
1445   return vect_convert_output (vinfo, last_stmt_info,
1446                               type, pattern_stmt, vecctype);
1447 }
1448
1449 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1450    to WIDEN_MULT_EXPR.  See vect_recog_widen_op_pattern for details.  */
1451
1452 static gimple *
1453 vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1454                                tree *type_out)
1455 {
1456   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1457                                       MULT_EXPR, WIDEN_MULT_EXPR, false,
1458                                       "vect_recog_widen_mult_pattern");
1459 }
1460
1461 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1462    to WIDEN_PLUS_EXPR.  See vect_recog_widen_op_pattern for details.  */
1463
1464 static gimple *
1465 vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1466                                tree *type_out)
1467 {
1468   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1469                                       PLUS_EXPR, WIDEN_PLUS_EXPR, false,
1470                                       "vect_recog_widen_plus_pattern");
1471 }
1472
1473 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1474    to WIDEN_MINUS_EXPR.  See vect_recog_widen_op_pattern for details.  */
1475 static gimple *
1476 vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1477                                tree *type_out)
1478 {
1479   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1480                                       MINUS_EXPR, WIDEN_MINUS_EXPR, false,
1481                                       "vect_recog_widen_minus_pattern");
1482 }
1483
1484 /* Function vect_recog_popcount_pattern
1485
1486    Try to find the following pattern:
1487
1488    UTYPE1 A;
1489    TYPE1 B;
1490    UTYPE2 temp_in;
1491    TYPE3 temp_out;
1492    temp_in = (UTYPE2)A;
1493
1494    temp_out = __builtin_popcount{,l,ll} (temp_in);
1495    B = (TYPE1) temp_out;
1496
1497    TYPE2 may or may not be equal to TYPE3.
1498    i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
1499    i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
1500
1501    Input:
1502
1503    * STMT_VINFO: The stmt from which the pattern search begins.
1504    here it starts with B = (TYPE1) temp_out;
1505
1506    Output:
1507
1508    * TYPE_OUT: The vector type of the output of this pattern.
1509
1510    * Return value: A new stmt that will be used to replace the sequence of
1511    stmts that constitute the pattern. In this case it will be:
1512    B = .POPCOUNT (A);
1513 */
1514
1515 static gimple *
1516 vect_recog_popcount_pattern (vec_info *vinfo,
1517                              stmt_vec_info stmt_vinfo, tree *type_out)
1518 {
1519   gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
1520   gimple *popcount_stmt, *pattern_stmt;
1521   tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
1522   auto_vec<tree> vargs;
1523
1524   /* Find B = (TYPE1) temp_out. */
1525   if (!last_stmt)
1526     return NULL;
1527   tree_code code = gimple_assign_rhs_code (last_stmt);
1528   if (!CONVERT_EXPR_CODE_P (code))
1529     return NULL;
1530
1531   lhs_oprnd = gimple_assign_lhs (last_stmt);
1532   lhs_type = TREE_TYPE (lhs_oprnd);
1533   if (!INTEGRAL_TYPE_P (lhs_type))
1534     return NULL;
1535
1536   rhs_oprnd = gimple_assign_rhs1 (last_stmt);
1537   if (TREE_CODE (rhs_oprnd) != SSA_NAME
1538       || !has_single_use (rhs_oprnd))
1539     return NULL;
1540   popcount_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
1541
1542   /* Find temp_out = __builtin_popcount{,l,ll} (temp_in);  */
1543   if (!is_gimple_call (popcount_stmt))
1544     return NULL;
1545   switch (gimple_call_combined_fn (popcount_stmt))
1546     {
1547     CASE_CFN_POPCOUNT:
1548       break;
1549     default:
1550       return NULL;
1551     }
1552
1553   if (gimple_call_num_args (popcount_stmt) != 1)
1554     return NULL;
1555
1556   rhs_oprnd = gimple_call_arg (popcount_stmt, 0);
1557   vect_unpromoted_value unprom_diff;
1558   rhs_origin = vect_look_through_possible_promotion (vinfo, rhs_oprnd,
1559                                                     &unprom_diff);
1560
1561   if (!rhs_origin)
1562     return NULL;
1563
1564   /* Input and output of .POPCOUNT should be same-precision integer.
1565      Also A should be unsigned or same precision as temp_in,
1566      otherwise there would be sign_extend from A to temp_in.  */
1567   if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type)
1568       || (!TYPE_UNSIGNED (unprom_diff.type)
1569           && (TYPE_PRECISION (unprom_diff.type)
1570               != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))))
1571     return NULL;
1572   vargs.safe_push (unprom_diff.op);
1573
1574   vect_pattern_detected ("vec_regcog_popcount_pattern", popcount_stmt);
1575   vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
1576   /* Do it only if the backend has popcount<vector_mode>2 pattern.  */
1577   if (!vec_type
1578       || !direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
1579                                           OPTIMIZE_FOR_SPEED))
1580     return NULL;
1581
1582   /* Create B = .POPCOUNT (A).  */
1583   new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1584   pattern_stmt = gimple_build_call_internal_vec (IFN_POPCOUNT, vargs);
1585   gimple_call_set_lhs (pattern_stmt, new_var);
1586   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1587   *type_out = vec_type;
1588
1589   if (dump_enabled_p ())
1590     dump_printf_loc (MSG_NOTE, vect_location,
1591                      "created pattern stmt: %G", pattern_stmt);
1592   return pattern_stmt;
1593 }
1594
1595 /* Function vect_recog_pow_pattern
1596
1597    Try to find the following pattern:
1598
1599      x = POW (y, N);
1600
1601    with POW being one of pow, powf, powi, powif and N being
1602    either 2 or 0.5.
1603
1604    Input:
1605
1606    * STMT_VINFO: The stmt from which the pattern search begins.
1607
1608    Output:
1609
1610    * TYPE_OUT: The type of the output of this pattern.
1611
1612    * Return value: A new stmt that will be used to replace the sequence of
1613    stmts that constitute the pattern. In this case it will be:
1614         x = x * x
1615    or
1616         x = sqrt (x)
1617 */
1618
1619 static gimple *
1620 vect_recog_pow_pattern (vec_info *vinfo,
1621                         stmt_vec_info stmt_vinfo, tree *type_out)
1622 {
1623   gimple *last_stmt = stmt_vinfo->stmt;
1624   tree base, exp;
1625   gimple *stmt;
1626   tree var;
1627
1628   if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1629     return NULL;
1630
1631   switch (gimple_call_combined_fn (last_stmt))
1632     {
1633     CASE_CFN_POW:
1634     CASE_CFN_POWI:
1635       break;
1636
1637     default:
1638       return NULL;
1639     }
1640
1641   base = gimple_call_arg (last_stmt, 0);
1642   exp = gimple_call_arg (last_stmt, 1);
1643   if (TREE_CODE (exp) != REAL_CST
1644       && TREE_CODE (exp) != INTEGER_CST)
1645     {
1646       if (flag_unsafe_math_optimizations
1647           && TREE_CODE (base) == REAL_CST
1648           && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
1649         {
1650           combined_fn log_cfn;
1651           built_in_function exp_bfn;
1652           switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1653             {
1654             case BUILT_IN_POW:
1655               log_cfn = CFN_BUILT_IN_LOG;
1656               exp_bfn = BUILT_IN_EXP;
1657               break;
1658             case BUILT_IN_POWF:
1659               log_cfn = CFN_BUILT_IN_LOGF;
1660               exp_bfn = BUILT_IN_EXPF;
1661               break;
1662             case BUILT_IN_POWL:
1663               log_cfn = CFN_BUILT_IN_LOGL;
1664               exp_bfn = BUILT_IN_EXPL;
1665               break;
1666             default:
1667               return NULL;
1668             }
1669           tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1670           tree exp_decl = builtin_decl_implicit (exp_bfn);
1671           /* Optimize pow (C, x) as exp (log (C) * x).  Normally match.pd
1672              does that, but if C is a power of 2, we want to use
1673              exp2 (log2 (C) * x) in the non-vectorized version, but for
1674              vectorization we don't have vectorized exp2.  */
1675           if (logc
1676               && TREE_CODE (logc) == REAL_CST
1677               && exp_decl
1678               && lookup_attribute ("omp declare simd",
1679                                    DECL_ATTRIBUTES (exp_decl)))
1680             {
1681               cgraph_node *node = cgraph_node::get_create (exp_decl);
1682               if (node->simd_clones == NULL)
1683                 {
1684                   if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1685                       || node->definition)
1686                     return NULL;
1687                   expand_simd_clones (node);
1688                   if (node->simd_clones == NULL)
1689                     return NULL;
1690                 }
1691               *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1692               if (!*type_out)
1693                 return NULL;
1694               tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1695               gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1696               append_pattern_def_seq (vinfo, stmt_vinfo, g);
1697               tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1698               g = gimple_build_call (exp_decl, 1, def);
1699               gimple_call_set_lhs (g, res);
1700               return g;
1701             }
1702         }
1703
1704       return NULL;
1705     }
1706
1707   /* We now have a pow or powi builtin function call with a constant
1708      exponent.  */
1709
1710   /* Catch squaring.  */
1711   if ((tree_fits_shwi_p (exp)
1712        && tree_to_shwi (exp) == 2)
1713       || (TREE_CODE (exp) == REAL_CST
1714           && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1715     {
1716       if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
1717                                             TREE_TYPE (base), type_out))
1718         return NULL;
1719
1720       var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1721       stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1722       return stmt;
1723     }
1724
1725   /* Catch square root.  */
1726   if (TREE_CODE (exp) == REAL_CST
1727       && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1728     {
1729       *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1730       if (*type_out
1731           && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1732                                              OPTIMIZE_FOR_SPEED))
1733         {
1734           gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1735           var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1736           gimple_call_set_lhs (stmt, var);
1737           gimple_call_set_nothrow (stmt, true);
1738           return stmt;
1739         }
1740     }
1741
1742   return NULL;
1743 }
1744
1745
1746 /* Function vect_recog_widen_sum_pattern
1747
1748    Try to find the following pattern:
1749
1750      type x_t;
1751      TYPE x_T, sum = init;
1752    loop:
1753      sum_0 = phi <init, sum_1>
1754      S1  x_t = *p;
1755      S2  x_T = (TYPE) x_t;
1756      S3  sum_1 = x_T + sum_0;
1757
1758    where type 'TYPE' is at least double the size of type 'type', i.e - we're
1759    summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1760    a special case of a reduction computation.
1761
1762    Input:
1763
1764    * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1765    when this function is called with S3, the pattern {S2,S3} will be detected.
1766
1767    Output:
1768
1769    * TYPE_OUT: The type of the output of this pattern.
1770
1771    * Return value: A new stmt that will be used to replace the sequence of
1772    stmts that constitute the pattern. In this case it will be:
1773         WIDEN_SUM <x_t, sum_0>
1774
1775    Note: The widening-sum idiom is a widening reduction pattern that is
1776          vectorized without preserving all the intermediate results. It
1777          produces only N/2 (widened) results (by summing up pairs of
1778          intermediate results) rather than all N results.  Therefore, we
1779          cannot allow this pattern when we want to get all the results and in
1780          the correct order (as is the case when this computation is in an
1781          inner-loop nested in an outer-loop that us being vectorized).  */
1782
1783 static gimple *
1784 vect_recog_widen_sum_pattern (vec_info *vinfo,
1785                               stmt_vec_info stmt_vinfo, tree *type_out)
1786 {
1787   gimple *last_stmt = stmt_vinfo->stmt;
1788   tree oprnd0, oprnd1;
1789   tree type;
1790   gimple *pattern_stmt;
1791   tree var;
1792
1793   /* Look for the following pattern
1794           DX = (TYPE) X;
1795           sum_1 = DX + sum_0;
1796      In which DX is at least double the size of X, and sum_1 has been
1797      recognized as a reduction variable.
1798    */
1799
1800   /* Starting from LAST_STMT, follow the defs of its uses in search
1801      of the above pattern.  */
1802
1803   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1804                                        &oprnd0, &oprnd1))
1805     return NULL;
1806
1807   type = TREE_TYPE (gimple_get_lhs (last_stmt));
1808
1809   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
1810      we know that oprnd1 is the reduction variable (defined by a loop-header
1811      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1812      Left to check that oprnd0 is defined by a cast from type 'type' to type
1813      'TYPE'.  */
1814
1815   vect_unpromoted_value unprom0;
1816   if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1817       || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1818     return NULL;
1819
1820   vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1821
1822   if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
1823                                         unprom0.type, type_out))
1824     return NULL;
1825
1826   var = vect_recog_temp_ssa_var (type, NULL);
1827   pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1828
1829   return pattern_stmt;
1830 }
1831
1832 /* Recognize cases in which an operation is performed in one type WTYPE
1833    but could be done more efficiently in a narrower type NTYPE.  For example,
1834    if we have:
1835
1836      ATYPE a;  // narrower than NTYPE
1837      BTYPE b;  // narrower than NTYPE
1838      WTYPE aw = (WTYPE) a;
1839      WTYPE bw = (WTYPE) b;
1840      WTYPE res = aw + bw;  // only uses of aw and bw
1841
1842    then it would be more efficient to do:
1843
1844      NTYPE an = (NTYPE) a;
1845      NTYPE bn = (NTYPE) b;
1846      NTYPE resn = an + bn;
1847      WTYPE res = (WTYPE) resn;
1848
1849    Other situations include things like:
1850
1851      ATYPE a;  // NTYPE or narrower
1852      WTYPE aw = (WTYPE) a;
1853      WTYPE res = aw + b;
1854
1855    when only "(NTYPE) res" is significant.  In that case it's more efficient
1856    to truncate "b" and do the operation on NTYPE instead:
1857
1858      NTYPE an = (NTYPE) a;
1859      NTYPE bn = (NTYPE) b;  // truncation
1860      NTYPE resn = an + bn;
1861      WTYPE res = (WTYPE) resn;
1862
1863    All users of "res" should then use "resn" instead, making the final
1864    statement dead (not marked as relevant).  The final statement is still
1865    needed to maintain the type correctness of the IR.
1866
1867    vect_determine_precisions has already determined the minimum
1868    precison of the operation and the minimum precision required
1869    by users of the result.  */
1870
1871 static gimple *
1872 vect_recog_over_widening_pattern (vec_info *vinfo,
1873                                   stmt_vec_info last_stmt_info, tree *type_out)
1874 {
1875   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1876   if (!last_stmt)
1877     return NULL;
1878
1879   /* See whether we have found that this operation can be done on a
1880      narrower type without changing its semantics.  */
1881   unsigned int new_precision = last_stmt_info->operation_precision;
1882   if (!new_precision)
1883     return NULL;
1884
1885   tree lhs = gimple_assign_lhs (last_stmt);
1886   tree type = TREE_TYPE (lhs);
1887   tree_code code = gimple_assign_rhs_code (last_stmt);
1888
1889   /* Punt for reductions where we don't handle the type conversions.  */
1890   if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
1891     return NULL;
1892
1893   /* Keep the first operand of a COND_EXPR as-is: only the other two
1894      operands are interesting.  */
1895   unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1896
1897   /* Check the operands.  */
1898   unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1899   auto_vec <vect_unpromoted_value, 3> unprom (nops);
1900   unprom.quick_grow (nops);
1901   unsigned int min_precision = 0;
1902   bool single_use_p = false;
1903   for (unsigned int i = 0; i < nops; ++i)
1904     {
1905       tree op = gimple_op (last_stmt, first_op + i);
1906       if (TREE_CODE (op) == INTEGER_CST)
1907         unprom[i].set_op (op, vect_constant_def);
1908       else if (TREE_CODE (op) == SSA_NAME)
1909         {
1910           bool op_single_use_p = true;
1911           if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1912                                                      &op_single_use_p))
1913             return NULL;
1914           /* If:
1915
1916              (1) N bits of the result are needed;
1917              (2) all inputs are widened from M<N bits; and
1918              (3) one operand OP is a single-use SSA name
1919
1920              we can shift the M->N widening from OP to the output
1921              without changing the number or type of extensions involved.
1922              This then reduces the number of copies of STMT_INFO.
1923
1924              If instead of (3) more than one operand is a single-use SSA name,
1925              shifting the extension to the output is even more of a win.
1926
1927              If instead:
1928
1929              (1) N bits of the result are needed;
1930              (2) one operand OP2 is widened from M2<N bits;
1931              (3) another operand OP1 is widened from M1<M2 bits; and
1932              (4) both OP1 and OP2 are single-use
1933
1934              the choice is between:
1935
1936              (a) truncating OP2 to M1, doing the operation on M1,
1937                  and then widening the result to N
1938
1939              (b) widening OP1 to M2, doing the operation on M2, and then
1940                  widening the result to N
1941
1942              Both shift the M2->N widening of the inputs to the output.
1943              (a) additionally shifts the M1->M2 widening to the output;
1944              it requires fewer copies of STMT_INFO but requires an extra
1945              M2->M1 truncation.
1946
1947              Which is better will depend on the complexity and cost of
1948              STMT_INFO, which is hard to predict at this stage.  However,
1949              a clear tie-breaker in favor of (b) is the fact that the
1950              truncation in (a) increases the length of the operation chain.
1951
1952              If instead of (4) only one of OP1 or OP2 is single-use,
1953              (b) is still a win over doing the operation in N bits:
1954              it still shifts the M2->N widening on the single-use operand
1955              to the output and reduces the number of STMT_INFO copies.
1956
1957              If neither operand is single-use then operating on fewer than
1958              N bits might lead to more extensions overall.  Whether it does
1959              or not depends on global information about the vectorization
1960              region, and whether that's a good trade-off would again
1961              depend on the complexity and cost of the statements involved,
1962              as well as things like register pressure that are not normally
1963              modelled at this stage.  We therefore ignore these cases
1964              and just optimize the clear single-use wins above.
1965
1966              Thus we take the maximum precision of the unpromoted operands
1967              and record whether any operand is single-use.  */
1968           if (unprom[i].dt == vect_internal_def)
1969             {
1970               min_precision = MAX (min_precision,
1971                                    TYPE_PRECISION (unprom[i].type));
1972               single_use_p |= op_single_use_p;
1973             }
1974         }
1975       else
1976         return NULL;
1977     }
1978
1979   /* Although the operation could be done in operation_precision, we have
1980      to balance that against introducing extra truncations or extensions.
1981      Calculate the minimum precision that can be handled efficiently.
1982
1983      The loop above determined that the operation could be handled
1984      efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1985      extension from the inputs to the output without introducing more
1986      instructions, and would reduce the number of instructions required
1987      for STMT_INFO itself.
1988
1989      vect_determine_precisions has also determined that the result only
1990      needs min_output_precision bits.  Truncating by a factor of N times
1991      requires a tree of N - 1 instructions, so if TYPE is N times wider
1992      than min_output_precision, doing the operation in TYPE and truncating
1993      the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1994      In contrast:
1995
1996      - truncating the input to a unary operation and doing the operation
1997        in the new type requires at most N - 1 + 1 = N instructions per
1998        output vector
1999
2000      - doing the same for a binary operation requires at most
2001        (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
2002
2003      Both unary and binary operations require fewer instructions than
2004      this if the operands were extended from a suitable truncated form.
2005      Thus there is usually nothing to lose by doing operations in
2006      min_output_precision bits, but there can be something to gain.  */
2007   if (!single_use_p)
2008     min_precision = last_stmt_info->min_output_precision;
2009   else
2010     min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
2011
2012   /* Apply the minimum efficient precision we just calculated.  */
2013   if (new_precision < min_precision)
2014     new_precision = min_precision;
2015   new_precision = vect_element_precision (new_precision);
2016   if (new_precision >= TYPE_PRECISION (type))
2017     return NULL;
2018
2019   vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
2020
2021   *type_out = get_vectype_for_scalar_type (vinfo, type);
2022   if (!*type_out)
2023     return NULL;
2024
2025   /* We've found a viable pattern.  Get the new type of the operation.  */
2026   bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
2027   tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
2028
2029   /* If we're truncating an operation, we need to make sure that we
2030      don't introduce new undefined overflow.  The codes tested here are
2031      a subset of those accepted by vect_truncatable_operation_p.  */
2032   tree op_type = new_type;
2033   if (TYPE_OVERFLOW_UNDEFINED (new_type)
2034       && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
2035     op_type = build_nonstandard_integer_type (new_precision, true);
2036
2037   /* We specifically don't check here whether the target supports the
2038      new operation, since it might be something that a later pattern
2039      wants to rewrite anyway.  If targets have a minimum element size
2040      for some optabs, we should pattern-match smaller ops to larger ops
2041      where beneficial.  */
2042   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2043   tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
2044   if (!new_vectype || !op_vectype)
2045     return NULL;
2046
2047   if (dump_enabled_p ())
2048     dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
2049                      type, new_type);
2050
2051   /* Calculate the rhs operands for an operation on OP_TYPE.  */
2052   tree ops[3] = {};
2053   for (unsigned int i = 1; i < first_op; ++i)
2054     ops[i - 1] = gimple_op (last_stmt, i);
2055   vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
2056                        op_type, &unprom[0], op_vectype);
2057
2058   /* Use the operation to produce a result of type OP_TYPE.  */
2059   tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
2060   gimple *pattern_stmt = gimple_build_assign (new_var, code,
2061                                               ops[0], ops[1], ops[2]);
2062   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2063
2064   if (dump_enabled_p ())
2065     dump_printf_loc (MSG_NOTE, vect_location,
2066                      "created pattern stmt: %G", pattern_stmt);
2067
2068   /* Convert back to the original signedness, if OP_TYPE is different
2069      from NEW_TYPE.  */
2070   if (op_type != new_type)
2071     pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
2072                                         pattern_stmt, op_vectype);
2073
2074   /* Promote the result to the original type.  */
2075   pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
2076                                       pattern_stmt, new_vectype);
2077
2078   return pattern_stmt;
2079 }
2080
2081 /* Recognize the following patterns:
2082
2083      ATYPE a;  // narrower than TYPE
2084      BTYPE b;  // narrower than TYPE
2085
2086    1) Multiply high with scaling
2087      TYPE res = ((TYPE) a * (TYPE) b) >> c;
2088      Here, c is bitsize (TYPE) / 2 - 1.
2089
2090    2) ... or also with rounding
2091      TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
2092      Here, d is bitsize (TYPE) / 2 - 2.
2093
2094    3) Normal multiply high
2095      TYPE res = ((TYPE) a * (TYPE) b) >> e;
2096      Here, e is bitsize (TYPE) / 2.
2097
2098    where only the bottom half of res is used.  */
2099
2100 static gimple *
2101 vect_recog_mulhs_pattern (vec_info *vinfo,
2102                           stmt_vec_info last_stmt_info, tree *type_out)
2103 {
2104   /* Check for a right shift.  */
2105   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2106   if (!last_stmt
2107       || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
2108     return NULL;
2109
2110   /* Check that the shift result is wider than the users of the
2111      result need (i.e. that narrowing would be a natural choice).  */
2112   tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
2113   unsigned int target_precision
2114     = vect_element_precision (last_stmt_info->min_output_precision);
2115   if (!INTEGRAL_TYPE_P (lhs_type)
2116       || target_precision >= TYPE_PRECISION (lhs_type))
2117     return NULL;
2118
2119   /* Look through any change in sign on the outer shift input.  */
2120   vect_unpromoted_value unprom_rshift_input;
2121   tree rshift_input = vect_look_through_possible_promotion
2122     (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
2123   if (!rshift_input
2124       || TYPE_PRECISION (TREE_TYPE (rshift_input))
2125            != TYPE_PRECISION (lhs_type))
2126     return NULL;
2127
2128   /* Get the definition of the shift input.  */
2129   stmt_vec_info rshift_input_stmt_info
2130     = vect_get_internal_def (vinfo, rshift_input);
2131   if (!rshift_input_stmt_info)
2132     return NULL;
2133   gassign *rshift_input_stmt
2134     = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
2135   if (!rshift_input_stmt)
2136     return NULL;
2137
2138   stmt_vec_info mulh_stmt_info;
2139   tree scale_term;
2140   bool rounding_p = false;
2141
2142   /* Check for the presence of the rounding term.  */
2143   if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
2144     {
2145       /* Check that the outer shift was by 1.  */
2146       if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
2147         return NULL;
2148
2149       /* Check that the second operand of the PLUS_EXPR is 1.  */
2150       if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
2151         return NULL;
2152
2153       /* Look through any change in sign on the addition input.  */
2154       vect_unpromoted_value unprom_plus_input;
2155       tree plus_input = vect_look_through_possible_promotion
2156         (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
2157       if (!plus_input
2158            || TYPE_PRECISION (TREE_TYPE (plus_input))
2159                 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
2160         return NULL;
2161
2162       /* Get the definition of the multiply-high-scale part.  */
2163       stmt_vec_info plus_input_stmt_info
2164         = vect_get_internal_def (vinfo, plus_input);
2165       if (!plus_input_stmt_info)
2166         return NULL;
2167       gassign *plus_input_stmt
2168         = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
2169       if (!plus_input_stmt
2170           || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
2171         return NULL;
2172
2173       /* Look through any change in sign on the scaling input.  */
2174       vect_unpromoted_value unprom_scale_input;
2175       tree scale_input = vect_look_through_possible_promotion
2176         (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
2177       if (!scale_input
2178           || TYPE_PRECISION (TREE_TYPE (scale_input))
2179                != TYPE_PRECISION (TREE_TYPE (plus_input)))
2180         return NULL;
2181
2182       /* Get the definition of the multiply-high part.  */
2183       mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
2184       if (!mulh_stmt_info)
2185         return NULL;
2186
2187       /* Get the scaling term.  */
2188       scale_term = gimple_assign_rhs2 (plus_input_stmt);
2189       rounding_p = true;
2190     }
2191   else
2192     {
2193       mulh_stmt_info = rshift_input_stmt_info;
2194       scale_term = gimple_assign_rhs2 (last_stmt);
2195     }
2196
2197   /* Check that the scaling factor is constant.  */
2198   if (TREE_CODE (scale_term) != INTEGER_CST)
2199     return NULL;
2200
2201   /* Check whether the scaling input term can be seen as two widened
2202      inputs multiplied together.  */
2203   vect_unpromoted_value unprom_mult[2];
2204   tree new_type;
2205   unsigned int nops
2206     = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
2207                             false, 2, unprom_mult, &new_type);
2208   if (nops != 2)
2209     return NULL;
2210
2211   /* Adjust output precision.  */
2212   if (TYPE_PRECISION (new_type) < target_precision)
2213     new_type = build_nonstandard_integer_type
2214       (target_precision, TYPE_UNSIGNED (new_type));
2215
2216   unsigned mult_precision = TYPE_PRECISION (new_type);
2217   internal_fn ifn;
2218   /* Check that the scaling factor is expected.  Instead of
2219      target_precision, we should use the one that we actually
2220      use for internal function.  */
2221   if (rounding_p)
2222     {
2223       /* Check pattern 2).  */
2224       if (wi::to_widest (scale_term) + mult_precision + 2
2225           != TYPE_PRECISION (lhs_type))
2226         return NULL;
2227
2228       ifn = IFN_MULHRS;
2229     }
2230   else
2231     {
2232       /* Check for pattern 1).  */
2233       if (wi::to_widest (scale_term) + mult_precision + 1
2234           == TYPE_PRECISION (lhs_type))
2235         ifn = IFN_MULHS;
2236       /* Check for pattern 3).  */
2237       else if (wi::to_widest (scale_term) + mult_precision
2238                == TYPE_PRECISION (lhs_type))
2239         ifn = IFN_MULH;
2240       else
2241         return NULL;
2242     }
2243
2244   vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
2245
2246   /* Check for target support.  */
2247   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2248   if (!new_vectype
2249       || !direct_internal_fn_supported_p
2250             (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2251     return NULL;
2252
2253   /* The IR requires a valid vector type for the cast result, even though
2254      it's likely to be discarded.  */
2255   *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2256   if (!*type_out)
2257     return NULL;
2258
2259   /* Generate the IFN_MULHRS call.  */
2260   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2261   tree new_ops[2];
2262   vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2263                        unprom_mult, new_vectype);
2264   gcall *mulhrs_stmt
2265     = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
2266   gimple_call_set_lhs (mulhrs_stmt, new_var);
2267   gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
2268
2269   if (dump_enabled_p ())
2270     dump_printf_loc (MSG_NOTE, vect_location,
2271                      "created pattern stmt: %G", (gimple *) mulhrs_stmt);
2272
2273   return vect_convert_output (vinfo, last_stmt_info, lhs_type,
2274                               mulhrs_stmt, new_vectype);
2275 }
2276
2277 /* Recognize the patterns:
2278
2279             ATYPE a;  // narrower than TYPE
2280             BTYPE b;  // narrower than TYPE
2281         (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
2282      or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
2283
2284    where only the bottom half of avg is used.  Try to transform them into:
2285
2286         (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
2287      or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
2288
2289   followed by:
2290
2291             TYPE avg = (TYPE) avg';
2292
2293   where NTYPE is no wider than half of TYPE.  Since only the bottom half
2294   of avg is used, all or part of the cast of avg' should become redundant.
2295
2296   If there is no target support available, generate code to distribute rshift
2297   over plus and add a carry.  */
2298
2299 static gimple *
2300 vect_recog_average_pattern (vec_info *vinfo,
2301                             stmt_vec_info last_stmt_info, tree *type_out)
2302 {
2303   /* Check for a shift right by one bit.  */
2304   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2305   if (!last_stmt
2306       || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
2307       || !integer_onep (gimple_assign_rhs2 (last_stmt)))
2308     return NULL;
2309
2310   /* Check that the shift result is wider than the users of the
2311      result need (i.e. that narrowing would be a natural choice).  */
2312   tree lhs = gimple_assign_lhs (last_stmt);
2313   tree type = TREE_TYPE (lhs);
2314   unsigned int target_precision
2315     = vect_element_precision (last_stmt_info->min_output_precision);
2316   if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
2317     return NULL;
2318
2319   /* Look through any change in sign on the shift input.  */
2320   tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
2321   vect_unpromoted_value unprom_plus;
2322   rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
2323                                                      &unprom_plus);
2324   if (!rshift_rhs
2325       || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
2326     return NULL;
2327
2328   /* Get the definition of the shift input.  */
2329   stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
2330   if (!plus_stmt_info)
2331     return NULL;
2332
2333   /* Check whether the shift input can be seen as a tree of additions on
2334      2 or 3 widened inputs.
2335
2336      Note that the pattern should be a win even if the result of one or
2337      more additions is reused elsewhere: if the pattern matches, we'd be
2338      replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s.  */
2339   internal_fn ifn = IFN_AVG_FLOOR;
2340   vect_unpromoted_value unprom[3];
2341   tree new_type;
2342   unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
2343                                             WIDEN_PLUS_EXPR, false, 3,
2344                                             unprom, &new_type);
2345   if (nops == 0)
2346     return NULL;
2347   if (nops == 3)
2348     {
2349       /* Check that one operand is 1.  */
2350       unsigned int i;
2351       for (i = 0; i < 3; ++i)
2352         if (integer_onep (unprom[i].op))
2353           break;
2354       if (i == 3)
2355         return NULL;
2356       /* Throw away the 1 operand and keep the other two.  */
2357       if (i < 2)
2358         unprom[i] = unprom[2];
2359       ifn = IFN_AVG_CEIL;
2360     }
2361
2362   vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
2363
2364   /* We know that:
2365
2366      (a) the operation can be viewed as:
2367
2368            TYPE widened0 = (TYPE) UNPROM[0];
2369            TYPE widened1 = (TYPE) UNPROM[1];
2370            TYPE tmp1 = widened0 + widened1 {+ 1};
2371            TYPE tmp2 = tmp1 >> 1;   // LAST_STMT_INFO
2372
2373      (b) the first two statements are equivalent to:
2374
2375            TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
2376            TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
2377
2378      (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
2379          where sensible;
2380
2381      (d) all the operations can be performed correctly at twice the width of
2382          NEW_TYPE, due to the nature of the average operation; and
2383
2384      (e) users of the result of the right shift need only TARGET_PRECISION
2385          bits, where TARGET_PRECISION is no more than half of TYPE's
2386          precision.
2387
2388      Under these circumstances, the only situation in which NEW_TYPE
2389      could be narrower than TARGET_PRECISION is if widened0, widened1
2390      and an addition result are all used more than once.  Thus we can
2391      treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
2392      as "free", whereas widening the result of the average instruction
2393      from NEW_TYPE to TARGET_PRECISION would be a new operation.  It's
2394      therefore better not to go narrower than TARGET_PRECISION.  */
2395   if (TYPE_PRECISION (new_type) < target_precision)
2396     new_type = build_nonstandard_integer_type (target_precision,
2397                                                TYPE_UNSIGNED (new_type));
2398
2399   /* Check for target support.  */
2400   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2401   if (!new_vectype)
2402     return NULL;
2403
2404   bool fallback_p = false;
2405
2406   if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2407     ;
2408   else if (TYPE_UNSIGNED (new_type)
2409            && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
2410            && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
2411            && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
2412            && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
2413     fallback_p = true;
2414   else
2415     return NULL;
2416
2417   /* The IR requires a valid vector type for the cast result, even though
2418      it's likely to be discarded.  */
2419   *type_out = get_vectype_for_scalar_type (vinfo, type);
2420   if (!*type_out)
2421     return NULL;
2422
2423   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2424   tree new_ops[2];
2425   vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2426                        unprom, new_vectype);
2427
2428   if (fallback_p)
2429     {
2430       /* As a fallback, generate code for following sequence:
2431
2432          shifted_op0 = new_ops[0] >> 1;
2433          shifted_op1 = new_ops[1] >> 1;
2434          sum_of_shifted = shifted_op0 + shifted_op1;
2435          unmasked_carry = new_ops[0] and/or new_ops[1];
2436          carry = unmasked_carry & 1;
2437          new_var = sum_of_shifted + carry;
2438       */
2439
2440       tree one_cst = build_one_cst (new_type);
2441       gassign *g;
2442
2443       tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
2444       g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
2445       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2446
2447       tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
2448       g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
2449       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2450
2451       tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
2452       g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
2453                                shifted_op0, shifted_op1);
2454       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2455
2456       tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
2457       tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
2458       g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
2459       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2460
2461       tree carry = vect_recog_temp_ssa_var (new_type, NULL);
2462       g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
2463       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2464
2465       g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
2466       return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
2467     }
2468
2469   /* Generate the IFN_AVG* call.  */
2470   gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
2471                                                     new_ops[1]);
2472   gimple_call_set_lhs (average_stmt, new_var);
2473   gimple_set_location (average_stmt, gimple_location (last_stmt));
2474
2475   if (dump_enabled_p ())
2476     dump_printf_loc (MSG_NOTE, vect_location,
2477                      "created pattern stmt: %G", (gimple *) average_stmt);
2478
2479   return vect_convert_output (vinfo, last_stmt_info,
2480                               type, average_stmt, new_vectype);
2481 }
2482
2483 /* Recognize cases in which the input to a cast is wider than its
2484    output, and the input is fed by a widening operation.  Fold this
2485    by removing the unnecessary intermediate widening.  E.g.:
2486
2487      unsigned char a;
2488      unsigned int b = (unsigned int) a;
2489      unsigned short c = (unsigned short) b;
2490
2491    -->
2492
2493      unsigned short c = (unsigned short) a;
2494
2495    Although this is rare in input IR, it is an expected side-effect
2496    of the over-widening pattern above.
2497
2498    This is beneficial also for integer-to-float conversions, if the
2499    widened integer has more bits than the float, and if the unwidened
2500    input doesn't.  */
2501
2502 static gimple *
2503 vect_recog_cast_forwprop_pattern (vec_info *vinfo,
2504                                   stmt_vec_info last_stmt_info, tree *type_out)
2505 {
2506   /* Check for a cast, including an integer-to-float conversion.  */
2507   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2508   if (!last_stmt)
2509     return NULL;
2510   tree_code code = gimple_assign_rhs_code (last_stmt);
2511   if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
2512     return NULL;
2513
2514   /* Make sure that the rhs is a scalar with a natural bitsize.  */
2515   tree lhs = gimple_assign_lhs (last_stmt);
2516   if (!lhs)
2517     return NULL;
2518   tree lhs_type = TREE_TYPE (lhs);
2519   scalar_mode lhs_mode;
2520   if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
2521       || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
2522     return NULL;
2523
2524   /* Check for a narrowing operation (from a vector point of view).  */
2525   tree rhs = gimple_assign_rhs1 (last_stmt);
2526   tree rhs_type = TREE_TYPE (rhs);
2527   if (!INTEGRAL_TYPE_P (rhs_type)
2528       || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
2529       || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
2530     return NULL;
2531
2532   /* Try to find an unpromoted input.  */
2533   vect_unpromoted_value unprom;
2534   if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
2535       || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
2536     return NULL;
2537
2538   /* If the bits above RHS_TYPE matter, make sure that they're the
2539      same when extending from UNPROM as they are when extending from RHS.  */
2540   if (!INTEGRAL_TYPE_P (lhs_type)
2541       && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
2542     return NULL;
2543
2544   /* We can get the same result by casting UNPROM directly, to avoid
2545      the unnecessary widening and narrowing.  */
2546   vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
2547
2548   *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2549   if (!*type_out)
2550     return NULL;
2551
2552   tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2553   gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
2554   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2555
2556   return pattern_stmt;
2557 }
2558
2559 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
2560    to WIDEN_LSHIFT_EXPR.  See vect_recog_widen_op_pattern for details.  */
2561
2562 static gimple *
2563 vect_recog_widen_shift_pattern (vec_info *vinfo,
2564                                 stmt_vec_info last_stmt_info, tree *type_out)
2565 {
2566   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
2567                                       LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
2568                                       "vect_recog_widen_shift_pattern");
2569 }
2570
2571 /* Detect a rotate pattern wouldn't be otherwise vectorized:
2572
2573    type a_t, b_t, c_t;
2574
2575    S0 a_t = b_t r<< c_t;
2576
2577   Input/Output:
2578
2579   * STMT_VINFO: The stmt from which the pattern search begins,
2580     i.e. the shift/rotate stmt.  The original stmt (S0) is replaced
2581     with a sequence:
2582
2583    S1 d_t = -c_t;
2584    S2 e_t = d_t & (B - 1);
2585    S3 f_t = b_t << c_t;
2586    S4 g_t = b_t >> e_t;
2587    S0 a_t = f_t | g_t;
2588
2589     where B is element bitsize of type.
2590
2591   Output:
2592
2593   * TYPE_OUT: The type of the output of this pattern.
2594
2595   * Return value: A new stmt that will be used to replace the rotate
2596     S0 stmt.  */
2597
2598 static gimple *
2599 vect_recog_rotate_pattern (vec_info *vinfo,
2600                            stmt_vec_info stmt_vinfo, tree *type_out)
2601 {
2602   gimple *last_stmt = stmt_vinfo->stmt;
2603   tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
2604   gimple *pattern_stmt, *def_stmt;
2605   enum tree_code rhs_code;
2606   enum vect_def_type dt;
2607   optab optab1, optab2;
2608   edge ext_def = NULL;
2609   bool bswap16_p = false;
2610
2611   if (is_gimple_assign (last_stmt))
2612     {
2613       rhs_code = gimple_assign_rhs_code (last_stmt);
2614       switch (rhs_code)
2615         {
2616         case LROTATE_EXPR:
2617         case RROTATE_EXPR:
2618           break;
2619         default:
2620           return NULL;
2621         }
2622
2623       lhs = gimple_assign_lhs (last_stmt);
2624       oprnd0 = gimple_assign_rhs1 (last_stmt);
2625       type = TREE_TYPE (oprnd0);
2626       oprnd1 = gimple_assign_rhs2 (last_stmt);
2627     }
2628   else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
2629     {
2630       /* __builtin_bswap16 (x) is another form of x r>> 8.
2631          The vectorizer has bswap support, but only if the argument isn't
2632          promoted.  */
2633       lhs = gimple_call_lhs (last_stmt);
2634       oprnd0 = gimple_call_arg (last_stmt, 0);
2635       type = TREE_TYPE (oprnd0);
2636       if (!lhs
2637           || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
2638           || TYPE_PRECISION (type) <= 16
2639           || TREE_CODE (oprnd0) != SSA_NAME
2640           || BITS_PER_UNIT != 8)
2641         return NULL;
2642
2643       stmt_vec_info def_stmt_info;
2644       if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
2645         return NULL;
2646
2647       if (dt != vect_internal_def)
2648         return NULL;
2649
2650       if (gimple_assign_cast_p (def_stmt))
2651         {
2652           def = gimple_assign_rhs1 (def_stmt);
2653           if (INTEGRAL_TYPE_P (TREE_TYPE (def))
2654               && TYPE_PRECISION (TREE_TYPE (def)) == 16)
2655             oprnd0 = def;
2656         }
2657
2658       type = TREE_TYPE (lhs);
2659       vectype = get_vectype_for_scalar_type (vinfo, type);
2660       if (vectype == NULL_TREE)
2661         return NULL;
2662
2663       if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
2664         {
2665           /* The encoding uses one stepped pattern for each byte in the
2666              16-bit word.  */
2667           vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
2668           for (unsigned i = 0; i < 3; ++i)
2669             for (unsigned j = 0; j < 2; ++j)
2670               elts.quick_push ((i + 1) * 2 - j - 1);
2671
2672           vec_perm_indices indices (elts, 1,
2673                                     TYPE_VECTOR_SUBPARTS (char_vectype));
2674           machine_mode vmode = TYPE_MODE (char_vectype);
2675           if (can_vec_perm_const_p (vmode, vmode, indices))
2676             {
2677               /* vectorizable_bswap can handle the __builtin_bswap16 if we
2678                  undo the argument promotion.  */
2679               if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2680                 {
2681                   def = vect_recog_temp_ssa_var (type, NULL);
2682                   def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2683                   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2684                   oprnd0 = def;
2685                 }
2686
2687               /* Pattern detected.  */
2688               vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2689
2690               *type_out = vectype;
2691
2692               /* Pattern supported.  Create a stmt to be used to replace the
2693                  pattern, with the unpromoted argument.  */
2694               var = vect_recog_temp_ssa_var (type, NULL);
2695               pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
2696                                                 1, oprnd0);
2697               gimple_call_set_lhs (pattern_stmt, var);
2698               gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
2699                                       gimple_call_fntype (last_stmt));
2700               return pattern_stmt;
2701             }
2702         }
2703
2704       oprnd1 = build_int_cst (integer_type_node, 8);
2705       rhs_code = LROTATE_EXPR;
2706       bswap16_p = true;
2707     }
2708   else
2709     return NULL;
2710
2711   if (TREE_CODE (oprnd0) != SSA_NAME
2712       || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
2713       || !INTEGRAL_TYPE_P (type))
2714     return NULL;
2715
2716   stmt_vec_info def_stmt_info;
2717   if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
2718     return NULL;
2719
2720   if (dt != vect_internal_def
2721       && dt != vect_constant_def
2722       && dt != vect_external_def)
2723     return NULL;
2724
2725   vectype = get_vectype_for_scalar_type (vinfo, type);
2726   if (vectype == NULL_TREE)
2727     return NULL;
2728
2729   /* If vector/vector or vector/scalar rotate is supported by the target,
2730      don't do anything here.  */
2731   optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2732   if (optab1
2733       && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2734     {
2735      use_rotate:
2736       if (bswap16_p)
2737         {
2738           if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2739             {
2740               def = vect_recog_temp_ssa_var (type, NULL);
2741               def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2742               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2743               oprnd0 = def;
2744             }
2745
2746           /* Pattern detected.  */
2747           vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2748
2749           *type_out = vectype;
2750
2751           /* Pattern supported.  Create a stmt to be used to replace the
2752              pattern.  */
2753           var = vect_recog_temp_ssa_var (type, NULL);
2754           pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
2755                                               oprnd1);
2756           return pattern_stmt;
2757         }
2758       return NULL;
2759     }
2760
2761   if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
2762     {
2763       optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2764       if (optab2
2765           && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2766         goto use_rotate;
2767     }
2768
2769   tree utype = unsigned_type_for (type);
2770   tree uvectype = get_vectype_for_scalar_type (vinfo, utype);
2771   if (!uvectype)
2772     return NULL;
2773
2774   /* If vector/vector or vector/scalar shifts aren't supported by the target,
2775      don't do anything here either.  */
2776   optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_vector);
2777   optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_vector);
2778   if (!optab1
2779       || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
2780       || !optab2
2781       || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
2782     {
2783       if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2784         return NULL;
2785       optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_scalar);
2786       optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_scalar);
2787       if (!optab1
2788           || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
2789           || !optab2
2790           || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
2791         return NULL;
2792     }
2793
2794   *type_out = vectype;
2795
2796   if (!useless_type_conversion_p (utype, TREE_TYPE (oprnd0)))
2797     {
2798       def = vect_recog_temp_ssa_var (utype, NULL);
2799       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2800       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2801       oprnd0 = def;
2802     }
2803
2804   if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
2805     ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2806
2807   def = NULL_TREE;
2808   scalar_int_mode mode = SCALAR_INT_TYPE_MODE (utype);
2809   if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2810     def = oprnd1;
2811   else if (def_stmt && gimple_assign_cast_p (def_stmt))
2812     {
2813       tree rhs1 = gimple_assign_rhs1 (def_stmt);
2814       if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2815           && TYPE_PRECISION (TREE_TYPE (rhs1))
2816              == TYPE_PRECISION (type))
2817         def = rhs1;
2818     }
2819
2820   if (def == NULL_TREE)
2821     {
2822       def = vect_recog_temp_ssa_var (utype, NULL);
2823       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2824       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2825     }
2826   stype = TREE_TYPE (def);
2827
2828   if (TREE_CODE (def) == INTEGER_CST)
2829     {
2830       if (!tree_fits_uhwi_p (def)
2831           || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2832           || integer_zerop (def))
2833         return NULL;
2834       def2 = build_int_cst (stype,
2835                             GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2836     }
2837   else
2838     {
2839       tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
2840
2841       if (vecstype == NULL_TREE)
2842         return NULL;
2843       def2 = vect_recog_temp_ssa_var (stype, NULL);
2844       def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2845       if (ext_def)
2846         {
2847           basic_block new_bb
2848             = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2849           gcc_assert (!new_bb);
2850         }
2851       else
2852         append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2853
2854       def2 = vect_recog_temp_ssa_var (stype, NULL);
2855       tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
2856       def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2857                                       gimple_assign_lhs (def_stmt), mask);
2858       if (ext_def)
2859         {
2860           basic_block new_bb
2861             = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2862           gcc_assert (!new_bb);
2863         }
2864       else
2865         append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2866     }
2867
2868   var1 = vect_recog_temp_ssa_var (utype, NULL);
2869   def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2870                                         ? LSHIFT_EXPR : RSHIFT_EXPR,
2871                                   oprnd0, def);
2872   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2873
2874   var2 = vect_recog_temp_ssa_var (utype, NULL);
2875   def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2876                                         ? RSHIFT_EXPR : LSHIFT_EXPR,
2877                                   oprnd0, def2);
2878   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2879
2880   /* Pattern detected.  */
2881   vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2882
2883   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
2884   var = vect_recog_temp_ssa_var (utype, NULL);
2885   pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2886
2887   if (!useless_type_conversion_p (type, utype))
2888     {
2889       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
2890       tree result = vect_recog_temp_ssa_var (type, NULL);
2891       pattern_stmt = gimple_build_assign (result, NOP_EXPR, var);
2892     }
2893   return pattern_stmt;
2894 }
2895
2896 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2897    vectorized:
2898
2899    type a_t;
2900    TYPE b_T, res_T;
2901
2902    S1 a_t = ;
2903    S2 b_T = ;
2904    S3 res_T = b_T op a_t;
2905
2906   where type 'TYPE' is a type with different size than 'type',
2907   and op is <<, >> or rotate.
2908
2909   Also detect cases:
2910
2911    type a_t;
2912    TYPE b_T, c_T, res_T;
2913
2914    S0 c_T = ;
2915    S1 a_t = (type) c_T;
2916    S2 b_T = ;
2917    S3 res_T = b_T op a_t;
2918
2919   Input/Output:
2920
2921   * STMT_VINFO: The stmt from which the pattern search begins,
2922     i.e. the shift/rotate stmt.  The original stmt (S3) is replaced
2923     with a shift/rotate which has same type on both operands, in the
2924     second case just b_T op c_T, in the first case with added cast
2925     from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2926
2927   Output:
2928
2929   * TYPE_OUT: The type of the output of this pattern.
2930
2931   * Return value: A new stmt that will be used to replace the shift/rotate
2932     S3 stmt.  */
2933
2934 static gimple *
2935 vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
2936                                         stmt_vec_info stmt_vinfo,
2937                                         tree *type_out)
2938 {
2939   gimple *last_stmt = stmt_vinfo->stmt;
2940   tree oprnd0, oprnd1, lhs, var;
2941   gimple *pattern_stmt;
2942   enum tree_code rhs_code;
2943
2944   if (!is_gimple_assign (last_stmt))
2945     return NULL;
2946
2947   rhs_code = gimple_assign_rhs_code (last_stmt);
2948   switch (rhs_code)
2949     {
2950     case LSHIFT_EXPR:
2951     case RSHIFT_EXPR:
2952     case LROTATE_EXPR:
2953     case RROTATE_EXPR:
2954       break;
2955     default:
2956       return NULL;
2957     }
2958
2959   lhs = gimple_assign_lhs (last_stmt);
2960   oprnd0 = gimple_assign_rhs1 (last_stmt);
2961   oprnd1 = gimple_assign_rhs2 (last_stmt);
2962   if (TREE_CODE (oprnd0) != SSA_NAME
2963       || TREE_CODE (oprnd1) != SSA_NAME
2964       || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2965       || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2966       || TYPE_PRECISION (TREE_TYPE (lhs))
2967          != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2968     return NULL;
2969
2970   stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2971   if (!def_vinfo)
2972     return NULL;
2973
2974   *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
2975   if (*type_out == NULL_TREE)
2976     return NULL;
2977
2978   tree def = NULL_TREE;
2979   gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2980   if (def_stmt && gimple_assign_cast_p (def_stmt))
2981     {
2982       tree rhs1 = gimple_assign_rhs1 (def_stmt);
2983       if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2984           && TYPE_PRECISION (TREE_TYPE (rhs1))
2985              == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2986         {
2987           if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2988               >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2989             def = rhs1;
2990           else
2991             {
2992               tree mask
2993                 = build_low_bits_mask (TREE_TYPE (rhs1),
2994                                        TYPE_PRECISION (TREE_TYPE (oprnd1)));
2995               def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2996               def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2997               tree vecstype = get_vectype_for_scalar_type (vinfo,
2998                                                            TREE_TYPE (rhs1));
2999               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
3000             }
3001         }
3002     }
3003
3004   if (def == NULL_TREE)
3005     {
3006       def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
3007       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
3008       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3009     }
3010
3011   /* Pattern detected.  */
3012   vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
3013
3014   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
3015   var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
3016   pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
3017
3018   return pattern_stmt;
3019 }
3020
3021 /* Return true iff the target has a vector optab implementing the operation
3022    CODE on type VECTYPE.  */
3023
3024 static bool
3025 target_has_vecop_for_code (tree_code code, tree vectype)
3026 {
3027   optab voptab = optab_for_tree_code (code, vectype, optab_vector);
3028   return voptab
3029          && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
3030 }
3031
3032 /* Verify that the target has optabs of VECTYPE to perform all the steps
3033    needed by the multiplication-by-immediate synthesis algorithm described by
3034    ALG and VAR.  If SYNTH_SHIFT_P is true ensure that vector addition is
3035    present.  Return true iff the target supports all the steps.  */
3036
3037 static bool
3038 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
3039                                  tree vectype, bool synth_shift_p)
3040 {
3041   if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
3042     return false;
3043
3044   bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
3045   bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
3046
3047   if (var == negate_variant
3048       && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
3049     return false;
3050
3051   /* If we must synthesize shifts with additions make sure that vector
3052      addition is available.  */
3053   if ((var == add_variant || synth_shift_p) && !supports_vplus)
3054     return false;
3055
3056   for (int i = 1; i < alg->ops; i++)
3057     {
3058       switch (alg->op[i])
3059         {
3060         case alg_shift:
3061           break;
3062         case alg_add_t_m2:
3063         case alg_add_t2_m:
3064         case alg_add_factor:
3065           if (!supports_vplus)
3066             return false;
3067           break;
3068         case alg_sub_t_m2:
3069         case alg_sub_t2_m:
3070         case alg_sub_factor:
3071           if (!supports_vminus)
3072             return false;
3073           break;
3074         case alg_unknown:
3075         case alg_m:
3076         case alg_zero:
3077         case alg_impossible:
3078           return false;
3079         default:
3080           gcc_unreachable ();
3081         }
3082     }
3083
3084   return true;
3085 }
3086
3087 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
3088    putting the final result in DEST.  Append all statements but the last into
3089    VINFO.  Return the last statement.  */
3090
3091 static gimple *
3092 synth_lshift_by_additions (vec_info *vinfo,
3093                            tree dest, tree op, HOST_WIDE_INT amnt,
3094                            stmt_vec_info stmt_info)
3095 {
3096   HOST_WIDE_INT i;
3097   tree itype = TREE_TYPE (op);
3098   tree prev_res = op;
3099   gcc_assert (amnt >= 0);
3100   for (i = 0; i < amnt; i++)
3101     {
3102       tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
3103                       : dest;
3104       gimple *stmt
3105         = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
3106       prev_res = tmp_var;
3107       if (i < amnt - 1)
3108         append_pattern_def_seq (vinfo, stmt_info, stmt);
3109       else
3110         return stmt;
3111     }
3112   gcc_unreachable ();
3113   return NULL;
3114 }
3115
3116 /* Helper for vect_synth_mult_by_constant.  Apply a binary operation
3117    CODE to operands OP1 and OP2, creating a new temporary SSA var in
3118    the process if necessary.  Append the resulting assignment statements
3119    to the sequence in STMT_VINFO.  Return the SSA variable that holds the
3120    result of the binary operation.  If SYNTH_SHIFT_P is true synthesize
3121    left shifts using additions.  */
3122
3123 static tree
3124 apply_binop_and_append_stmt (vec_info *vinfo,
3125                              tree_code code, tree op1, tree op2,
3126                              stmt_vec_info stmt_vinfo, bool synth_shift_p)
3127 {
3128   if (integer_zerop (op2)
3129       && (code == LSHIFT_EXPR
3130           || code == PLUS_EXPR))
3131     {
3132       gcc_assert (TREE_CODE (op1) == SSA_NAME);
3133       return op1;
3134     }
3135
3136   gimple *stmt;
3137   tree itype = TREE_TYPE (op1);
3138   tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
3139
3140   if (code == LSHIFT_EXPR
3141       && synth_shift_p)
3142     {
3143       stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
3144                                         TREE_INT_CST_LOW (op2), stmt_vinfo);
3145       append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3146       return tmp_var;
3147     }
3148
3149   stmt = gimple_build_assign (tmp_var, code, op1, op2);
3150   append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3151   return tmp_var;
3152 }
3153
3154 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
3155    and simple arithmetic operations to be vectorized.  Record the statements
3156    produced in STMT_VINFO and return the last statement in the sequence or
3157    NULL if it's not possible to synthesize such a multiplication.
3158    This function mirrors the behavior of expand_mult_const in expmed.cc but
3159    works on tree-ssa form.  */
3160
3161 static gimple *
3162 vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
3163                              stmt_vec_info stmt_vinfo)
3164 {
3165   tree itype = TREE_TYPE (op);
3166   machine_mode mode = TYPE_MODE (itype);
3167   struct algorithm alg;
3168   mult_variant variant;
3169   if (!tree_fits_shwi_p (val))
3170     return NULL;
3171
3172   /* Multiplication synthesis by shifts, adds and subs can introduce
3173      signed overflow where the original operation didn't.  Perform the
3174      operations on an unsigned type and cast back to avoid this.
3175      In the future we may want to relax this for synthesis algorithms
3176      that we can prove do not cause unexpected overflow.  */
3177   bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
3178
3179   tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
3180   tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
3181   if (!vectype)
3182     return NULL;
3183
3184   /* Targets that don't support vector shifts but support vector additions
3185      can synthesize shifts that way.  */
3186   bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
3187
3188   HOST_WIDE_INT hwval = tree_to_shwi (val);
3189   /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
3190      The vectorizer's benefit analysis will decide whether it's beneficial
3191      to do this.  */
3192   bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
3193                                        ? TYPE_MODE (vectype) : mode,
3194                                        hwval, &alg, &variant, MAX_COST);
3195   if (!possible)
3196     return NULL;
3197
3198   if (!target_supports_mult_synth_alg (&alg, variant, vectype, synth_shift_p))
3199     return NULL;
3200
3201   tree accumulator;
3202
3203   /* Clear out the sequence of statements so we can populate it below.  */
3204   gimple *stmt = NULL;
3205
3206   if (cast_to_unsigned_p)
3207     {
3208       tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
3209       stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
3210       append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3211       op = tmp_op;
3212     }
3213
3214   if (alg.op[0] == alg_zero)
3215     accumulator = build_int_cst (multtype, 0);
3216   else
3217     accumulator = op;
3218
3219   bool needs_fixup = (variant == negate_variant)
3220                       || (variant == add_variant);
3221
3222   for (int i = 1; i < alg.ops; i++)
3223     {
3224       tree shft_log = build_int_cst (multtype, alg.log[i]);
3225       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3226       tree tmp_var = NULL_TREE;
3227
3228       switch (alg.op[i])
3229         {
3230         case alg_shift:
3231           if (synth_shift_p)
3232             stmt
3233               = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
3234                                            alg.log[i], stmt_vinfo);
3235           else
3236             stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
3237                                          shft_log);
3238           break;
3239         case alg_add_t_m2:
3240           tmp_var
3241             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
3242                                            stmt_vinfo, synth_shift_p);
3243           stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
3244                                        tmp_var);
3245           break;
3246         case alg_sub_t_m2:
3247           tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
3248                                                  shft_log, stmt_vinfo,
3249                                                  synth_shift_p);
3250           /* In some algorithms the first step involves zeroing the
3251              accumulator.  If subtracting from such an accumulator
3252              just emit the negation directly.  */
3253           if (integer_zerop (accumulator))
3254             stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
3255           else
3256             stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
3257                                         tmp_var);
3258           break;
3259         case alg_add_t2_m:
3260           tmp_var
3261             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3262                                            shft_log, stmt_vinfo, synth_shift_p);
3263           stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
3264           break;
3265         case alg_sub_t2_m:
3266           tmp_var
3267             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3268                                            shft_log, stmt_vinfo, synth_shift_p);
3269           stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
3270           break;
3271         case alg_add_factor:
3272           tmp_var
3273             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3274                                            shft_log, stmt_vinfo, synth_shift_p);
3275           stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
3276                                        tmp_var);
3277           break;
3278         case alg_sub_factor:
3279           tmp_var
3280             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3281                                            shft_log, stmt_vinfo, synth_shift_p);
3282           stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
3283                                       accumulator);
3284           break;
3285         default:
3286           gcc_unreachable ();
3287         }
3288       /* We don't want to append the last stmt in the sequence to stmt_vinfo
3289          but rather return it directly.  */
3290
3291       if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
3292         append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3293       accumulator = accum_tmp;
3294     }
3295   if (variant == negate_variant)
3296     {
3297       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3298       stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
3299       accumulator = accum_tmp;
3300       if (cast_to_unsigned_p)
3301         append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3302     }
3303   else if (variant == add_variant)
3304     {
3305       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3306       stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
3307       accumulator = accum_tmp;
3308       if (cast_to_unsigned_p)
3309         append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3310     }
3311   /* Move back to a signed if needed.  */
3312   if (cast_to_unsigned_p)
3313     {
3314       tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
3315       stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
3316     }
3317
3318   return stmt;
3319 }
3320
3321 /* Detect multiplication by constant and convert it into a sequence of
3322    shifts and additions, subtractions, negations.  We reuse the
3323    choose_mult_variant algorithms from expmed.cc
3324
3325    Input/Output:
3326
3327    STMT_VINFO: The stmt from which the pattern search begins,
3328    i.e. the mult stmt.
3329
3330  Output:
3331
3332   * TYPE_OUT: The type of the output of this pattern.
3333
3334   * Return value: A new stmt that will be used to replace
3335     the multiplication.  */
3336
3337 static gimple *
3338 vect_recog_mult_pattern (vec_info *vinfo,
3339                          stmt_vec_info stmt_vinfo, tree *type_out)
3340 {
3341   gimple *last_stmt = stmt_vinfo->stmt;
3342   tree oprnd0, oprnd1, vectype, itype;
3343   gimple *pattern_stmt;
3344
3345   if (!is_gimple_assign (last_stmt))
3346     return NULL;
3347
3348   if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
3349     return NULL;
3350
3351   oprnd0 = gimple_assign_rhs1 (last_stmt);
3352   oprnd1 = gimple_assign_rhs2 (last_stmt);
3353   itype = TREE_TYPE (oprnd0);
3354
3355   if (TREE_CODE (oprnd0) != SSA_NAME
3356       || TREE_CODE (oprnd1) != INTEGER_CST
3357       || !INTEGRAL_TYPE_P (itype)
3358       || !type_has_mode_precision_p (itype))
3359     return NULL;
3360
3361   vectype = get_vectype_for_scalar_type (vinfo, itype);
3362   if (vectype == NULL_TREE)
3363     return NULL;
3364
3365   /* If the target can handle vectorized multiplication natively,
3366      don't attempt to optimize this.  */
3367   optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
3368   if (mul_optab != unknown_optab)
3369     {
3370       machine_mode vec_mode = TYPE_MODE (vectype);
3371       int icode = (int) optab_handler (mul_optab, vec_mode);
3372       if (icode != CODE_FOR_nothing)
3373        return NULL;
3374     }
3375
3376   pattern_stmt = vect_synth_mult_by_constant (vinfo,
3377                                               oprnd0, oprnd1, stmt_vinfo);
3378   if (!pattern_stmt)
3379     return NULL;
3380
3381   /* Pattern detected.  */
3382   vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
3383
3384   *type_out = vectype;
3385
3386   return pattern_stmt;
3387 }
3388
3389 /* Detect a signed division by a constant that wouldn't be
3390    otherwise vectorized:
3391
3392    type a_t, b_t;
3393
3394    S1 a_t = b_t / N;
3395
3396   where type 'type' is an integral type and N is a constant.
3397
3398   Similarly handle modulo by a constant:
3399
3400    S4 a_t = b_t % N;
3401
3402   Input/Output:
3403
3404   * STMT_VINFO: The stmt from which the pattern search begins,
3405     i.e. the division stmt.  S1 is replaced by if N is a power
3406     of two constant and type is signed:
3407   S3  y_t = b_t < 0 ? N - 1 : 0;
3408   S2  x_t = b_t + y_t;
3409   S1' a_t = x_t >> log2 (N);
3410
3411     S4 is replaced if N is a power of two constant and
3412     type is signed by (where *_T temporaries have unsigned type):
3413   S9  y_T = b_t < 0 ? -1U : 0U;
3414   S8  z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
3415   S7  z_t = (type) z_T;
3416   S6  w_t = b_t + z_t;
3417   S5  x_t = w_t & (N - 1);
3418   S4' a_t = x_t - z_t;
3419
3420   Output:
3421
3422   * TYPE_OUT: The type of the output of this pattern.
3423
3424   * Return value: A new stmt that will be used to replace the division
3425     S1 or modulo S4 stmt.  */
3426
3427 static gimple *
3428 vect_recog_divmod_pattern (vec_info *vinfo,
3429                            stmt_vec_info stmt_vinfo, tree *type_out)
3430 {
3431   gimple *last_stmt = stmt_vinfo->stmt;
3432   tree oprnd0, oprnd1, vectype, itype, cond;
3433   gimple *pattern_stmt, *def_stmt;
3434   enum tree_code rhs_code;
3435   optab optab;
3436   tree q;
3437   int dummy_int, prec;
3438
3439   if (!is_gimple_assign (last_stmt))
3440     return NULL;
3441
3442   rhs_code = gimple_assign_rhs_code (last_stmt);
3443   switch (rhs_code)
3444     {
3445     case TRUNC_DIV_EXPR:
3446     case EXACT_DIV_EXPR:
3447     case TRUNC_MOD_EXPR:
3448       break;
3449     default:
3450       return NULL;
3451     }
3452
3453   oprnd0 = gimple_assign_rhs1 (last_stmt);
3454   oprnd1 = gimple_assign_rhs2 (last_stmt);
3455   itype = TREE_TYPE (oprnd0);
3456   if (TREE_CODE (oprnd0) != SSA_NAME
3457       || TREE_CODE (oprnd1) != INTEGER_CST
3458       || TREE_CODE (itype) != INTEGER_TYPE
3459       || !type_has_mode_precision_p (itype))
3460     return NULL;
3461
3462   scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
3463   vectype = get_vectype_for_scalar_type (vinfo, itype);
3464   if (vectype == NULL_TREE)
3465     return NULL;
3466
3467   if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
3468     {
3469       /* If the target can handle vectorized division or modulo natively,
3470          don't attempt to optimize this, since native division is likely
3471          to give smaller code.  */
3472       optab = optab_for_tree_code (rhs_code, vectype, optab_default);
3473       if (optab != unknown_optab)
3474         {
3475           machine_mode vec_mode = TYPE_MODE (vectype);
3476           int icode = (int) optab_handler (optab, vec_mode);
3477           if (icode != CODE_FOR_nothing)
3478             return NULL;
3479         }
3480     }
3481
3482   prec = TYPE_PRECISION (itype);
3483   if (integer_pow2p (oprnd1))
3484     {
3485       if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
3486         return NULL;
3487
3488       /* Pattern detected.  */
3489       vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3490
3491       *type_out = vectype;
3492
3493       /* Check if the target supports this internal function.  */
3494       internal_fn ifn = IFN_DIV_POW2;
3495       if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
3496         {
3497           tree shift = build_int_cst (itype, tree_log2 (oprnd1));
3498
3499           tree var_div = vect_recog_temp_ssa_var (itype, NULL);
3500           gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
3501           gimple_call_set_lhs (div_stmt, var_div);
3502
3503           if (rhs_code == TRUNC_MOD_EXPR)
3504             {
3505               append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
3506               def_stmt
3507                 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3508                                        LSHIFT_EXPR, var_div, shift);
3509               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3510               pattern_stmt
3511                 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3512                                        MINUS_EXPR, oprnd0,
3513                                        gimple_assign_lhs (def_stmt));
3514             }
3515           else
3516             pattern_stmt = div_stmt;
3517           gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3518
3519           return pattern_stmt;
3520         }
3521
3522       cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
3523                      build_int_cst (itype, 0));
3524       if (rhs_code == TRUNC_DIV_EXPR
3525           || rhs_code == EXACT_DIV_EXPR)
3526         {
3527           tree var = vect_recog_temp_ssa_var (itype, NULL);
3528           tree shift;
3529           def_stmt
3530             = gimple_build_assign (var, COND_EXPR, cond,
3531                                    fold_build2 (MINUS_EXPR, itype, oprnd1,
3532                                                 build_int_cst (itype, 1)),
3533                                    build_int_cst (itype, 0));
3534           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3535           var = vect_recog_temp_ssa_var (itype, NULL);
3536           def_stmt
3537             = gimple_build_assign (var, PLUS_EXPR, oprnd0,
3538                                    gimple_assign_lhs (def_stmt));
3539           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3540
3541           shift = build_int_cst (itype, tree_log2 (oprnd1));
3542           pattern_stmt
3543             = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3544                                    RSHIFT_EXPR, var, shift);
3545         }
3546       else
3547         {
3548           tree signmask;
3549           if (compare_tree_int (oprnd1, 2) == 0)
3550             {
3551               signmask = vect_recog_temp_ssa_var (itype, NULL);
3552               def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
3553                                               build_int_cst (itype, 1),
3554                                               build_int_cst (itype, 0));
3555               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3556             }
3557           else
3558             {
3559               tree utype
3560                 = build_nonstandard_integer_type (prec, 1);
3561               tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
3562               tree shift
3563                 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
3564                                         - tree_log2 (oprnd1));
3565               tree var = vect_recog_temp_ssa_var (utype, NULL);
3566
3567               def_stmt = gimple_build_assign (var, COND_EXPR, cond,
3568                                               build_int_cst (utype, -1),
3569                                               build_int_cst (utype, 0));
3570               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3571               var = vect_recog_temp_ssa_var (utype, NULL);
3572               def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
3573                                               gimple_assign_lhs (def_stmt),
3574                                               shift);
3575               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3576               signmask = vect_recog_temp_ssa_var (itype, NULL);
3577               def_stmt
3578                 = gimple_build_assign (signmask, NOP_EXPR, var);
3579               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3580             }
3581           def_stmt
3582             = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3583                                    PLUS_EXPR, oprnd0, signmask);
3584           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3585           def_stmt
3586             = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3587                                    BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
3588                                    fold_build2 (MINUS_EXPR, itype, oprnd1,
3589                                                 build_int_cst (itype, 1)));
3590           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3591
3592           pattern_stmt
3593             = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3594                                    MINUS_EXPR, gimple_assign_lhs (def_stmt),
3595                                    signmask);
3596         }
3597
3598       return pattern_stmt;
3599     }
3600
3601   if (prec > HOST_BITS_PER_WIDE_INT
3602       || integer_zerop (oprnd1))
3603     return NULL;
3604
3605   if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
3606     return NULL;
3607
3608   if (TYPE_UNSIGNED (itype))
3609     {
3610       unsigned HOST_WIDE_INT mh, ml;
3611       int pre_shift, post_shift;
3612       unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
3613                                   & GET_MODE_MASK (itype_mode));
3614       tree t1, t2, t3, t4;
3615
3616       if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
3617         /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0.  */
3618         return NULL;
3619
3620       /* Find a suitable multiplier and right shift count
3621          instead of multiplying with D.  */
3622       mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
3623
3624       /* If the suggested multiplier is more than SIZE bits, we can do better
3625          for even divisors, using an initial right shift.  */
3626       if (mh != 0 && (d & 1) == 0)
3627         {
3628           pre_shift = ctz_or_zero (d);
3629           mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
3630                                   &ml, &post_shift, &dummy_int);
3631           gcc_assert (!mh);
3632         }
3633       else
3634         pre_shift = 0;
3635
3636       if (mh != 0)
3637         {
3638           if (post_shift - 1 >= prec)
3639             return NULL;
3640
3641           /* t1 = oprnd0 h* ml;
3642              t2 = oprnd0 - t1;
3643              t3 = t2 >> 1;
3644              t4 = t1 + t3;
3645              q = t4 >> (post_shift - 1);  */
3646           t1 = vect_recog_temp_ssa_var (itype, NULL);
3647           def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3648                                           build_int_cst (itype, ml));
3649           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3650
3651           t2 = vect_recog_temp_ssa_var (itype, NULL);
3652           def_stmt
3653             = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
3654           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3655
3656           t3 = vect_recog_temp_ssa_var (itype, NULL);
3657           def_stmt
3658             = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
3659           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3660
3661           t4 = vect_recog_temp_ssa_var (itype, NULL);
3662           def_stmt
3663             = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
3664
3665           if (post_shift != 1)
3666             {
3667               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3668
3669               q = vect_recog_temp_ssa_var (itype, NULL);
3670               pattern_stmt
3671                 = gimple_build_assign (q, RSHIFT_EXPR, t4,
3672                                        build_int_cst (itype, post_shift - 1));
3673             }
3674           else
3675             {
3676               q = t4;
3677               pattern_stmt = def_stmt;
3678             }
3679         }
3680       else
3681         {
3682           if (pre_shift >= prec || post_shift >= prec)
3683             return NULL;
3684
3685           /* t1 = oprnd0 >> pre_shift;
3686              t2 = t1 h* ml;
3687              q = t2 >> post_shift;  */
3688           if (pre_shift)
3689             {
3690               t1 = vect_recog_temp_ssa_var (itype, NULL);
3691               def_stmt
3692                 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
3693                                        build_int_cst (NULL, pre_shift));
3694               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3695             }
3696           else
3697             t1 = oprnd0;
3698
3699           t2 = vect_recog_temp_ssa_var (itype, NULL);
3700           def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
3701                                           build_int_cst (itype, ml));
3702
3703           if (post_shift)
3704             {
3705               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3706
3707               q = vect_recog_temp_ssa_var (itype, NULL);
3708               def_stmt
3709                 = gimple_build_assign (q, RSHIFT_EXPR, t2,
3710                                        build_int_cst (itype, post_shift));
3711             }
3712           else
3713             q = t2;
3714
3715           pattern_stmt = def_stmt;
3716         }
3717     }
3718   else
3719     {
3720       unsigned HOST_WIDE_INT ml;
3721       int post_shift;
3722       HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
3723       unsigned HOST_WIDE_INT abs_d;
3724       bool add = false;
3725       tree t1, t2, t3, t4;
3726
3727       /* Give up for -1.  */
3728       if (d == -1)
3729         return NULL;
3730
3731       /* Since d might be INT_MIN, we have to cast to
3732          unsigned HOST_WIDE_INT before negating to avoid
3733          undefined signed overflow.  */
3734       abs_d = (d >= 0
3735                ? (unsigned HOST_WIDE_INT) d
3736                : - (unsigned HOST_WIDE_INT) d);
3737
3738       /* n rem d = n rem -d */
3739       if (rhs_code == TRUNC_MOD_EXPR && d < 0)
3740         {
3741           d = abs_d;
3742           oprnd1 = build_int_cst (itype, abs_d);
3743         }
3744       if (HOST_BITS_PER_WIDE_INT >= prec
3745           && abs_d == HOST_WIDE_INT_1U << (prec - 1))
3746         /* This case is not handled correctly below.  */
3747         return NULL;
3748
3749       choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
3750       if (ml >= HOST_WIDE_INT_1U << (prec - 1))
3751         {
3752           add = true;
3753           ml |= HOST_WIDE_INT_M1U << (prec - 1);
3754         }
3755       if (post_shift >= prec)
3756         return NULL;
3757
3758       /* t1 = oprnd0 h* ml;  */
3759       t1 = vect_recog_temp_ssa_var (itype, NULL);
3760       def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3761                                       build_int_cst (itype, ml));
3762
3763       if (add)
3764         {
3765           /* t2 = t1 + oprnd0;  */
3766           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3767           t2 = vect_recog_temp_ssa_var (itype, NULL);
3768           def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
3769         }
3770       else
3771         t2 = t1;
3772
3773       if (post_shift)
3774         {
3775           /* t3 = t2 >> post_shift;  */
3776           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3777           t3 = vect_recog_temp_ssa_var (itype, NULL);
3778           def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
3779                                           build_int_cst (itype, post_shift));
3780         }
3781       else
3782         t3 = t2;
3783
3784       int msb = 1;
3785       value_range r;
3786       get_range_query (cfun)->range_of_expr (r, oprnd0);
3787       if (r.kind () == VR_RANGE)
3788         {
3789           if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
3790             msb = 0;
3791           else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
3792             msb = -1;
3793         }
3794
3795       if (msb == 0 && d >= 0)
3796         {
3797           /* q = t3;  */
3798           q = t3;
3799           pattern_stmt = def_stmt;
3800         }
3801       else
3802         {
3803           /* t4 = oprnd0 >> (prec - 1);
3804              or if we know from VRP that oprnd0 >= 0
3805              t4 = 0;
3806              or if we know from VRP that oprnd0 < 0
3807              t4 = -1;  */
3808           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3809           t4 = vect_recog_temp_ssa_var (itype, NULL);
3810           if (msb != 1)
3811             def_stmt = gimple_build_assign (t4, INTEGER_CST,
3812                                             build_int_cst (itype, msb));
3813           else
3814             def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3815                                             build_int_cst (itype, prec - 1));
3816           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3817
3818           /* q = t3 - t4;  or q = t4 - t3;  */
3819           q = vect_recog_temp_ssa_var (itype, NULL);
3820           pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3821                                               d < 0 ? t3 : t4);
3822         }
3823     }
3824
3825   if (rhs_code == TRUNC_MOD_EXPR)
3826     {
3827       tree r, t1;
3828
3829       /* We divided.  Now finish by:
3830          t1 = q * oprnd1;
3831          r = oprnd0 - t1;  */
3832       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
3833
3834       t1 = vect_recog_temp_ssa_var (itype, NULL);
3835       def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3836       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3837
3838       r = vect_recog_temp_ssa_var (itype, NULL);
3839       pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3840     }
3841
3842   /* Pattern detected.  */
3843   vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3844
3845   *type_out = vectype;
3846   return pattern_stmt;
3847 }
3848
3849 /* Function vect_recog_mixed_size_cond_pattern
3850
3851    Try to find the following pattern:
3852
3853      type x_t, y_t;
3854      TYPE a_T, b_T, c_T;
3855    loop:
3856      S1  a_T = x_t CMP y_t ? b_T : c_T;
3857
3858    where type 'TYPE' is an integral type which has different size
3859    from 'type'.  b_T and c_T are either constants (and if 'TYPE' is wider
3860    than 'type', the constants need to fit into an integer type
3861    with the same width as 'type') or results of conversion from 'type'.
3862
3863    Input:
3864
3865    * STMT_VINFO: The stmt from which the pattern search begins.
3866
3867    Output:
3868
3869    * TYPE_OUT: The type of the output of this pattern.
3870
3871    * Return value: A new stmt that will be used to replace the pattern.
3872         Additionally a def_stmt is added.
3873
3874         a_it = x_t CMP y_t ? b_it : c_it;
3875         a_T = (TYPE) a_it;  */
3876
3877 static gimple *
3878 vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
3879                                     stmt_vec_info stmt_vinfo, tree *type_out)
3880 {
3881   gimple *last_stmt = stmt_vinfo->stmt;
3882   tree cond_expr, then_clause, else_clause;
3883   tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3884   gimple *pattern_stmt, *def_stmt;
3885   tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3886   gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3887   bool promotion;
3888   tree comp_scalar_type;
3889
3890   if (!is_gimple_assign (last_stmt)
3891       || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3892       || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3893     return NULL;
3894
3895   cond_expr = gimple_assign_rhs1 (last_stmt);
3896   then_clause = gimple_assign_rhs2 (last_stmt);
3897   else_clause = gimple_assign_rhs3 (last_stmt);
3898
3899   if (!COMPARISON_CLASS_P (cond_expr))
3900     return NULL;
3901
3902   comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3903   comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
3904   if (comp_vectype == NULL_TREE)
3905     return NULL;
3906
3907   type = TREE_TYPE (gimple_assign_lhs (last_stmt));
3908   if (types_compatible_p (type, comp_scalar_type)
3909       || ((TREE_CODE (then_clause) != INTEGER_CST
3910            || TREE_CODE (else_clause) != INTEGER_CST)
3911           && !INTEGRAL_TYPE_P (comp_scalar_type))
3912       || !INTEGRAL_TYPE_P (type))
3913     return NULL;
3914
3915   if ((TREE_CODE (then_clause) != INTEGER_CST
3916        && !type_conversion_p (vinfo, then_clause, false,
3917                               &orig_type0, &def_stmt0, &promotion))
3918       || (TREE_CODE (else_clause) != INTEGER_CST
3919           && !type_conversion_p (vinfo, else_clause, false,
3920                                  &orig_type1, &def_stmt1, &promotion)))
3921     return NULL;
3922
3923   if (orig_type0 && orig_type1
3924       && !types_compatible_p (orig_type0, orig_type1))
3925     return NULL;
3926
3927   if (orig_type0)
3928     {
3929       if (!types_compatible_p (orig_type0, comp_scalar_type))
3930         return NULL;
3931       then_clause = gimple_assign_rhs1 (def_stmt0);
3932       itype = orig_type0;
3933     }
3934
3935   if (orig_type1)
3936     {
3937       if (!types_compatible_p (orig_type1, comp_scalar_type))
3938         return NULL;
3939       else_clause = gimple_assign_rhs1 (def_stmt1);
3940       itype = orig_type1;
3941     }
3942
3943
3944   HOST_WIDE_INT cmp_mode_size
3945     = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3946
3947   scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3948   if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3949     return NULL;
3950
3951   vectype = get_vectype_for_scalar_type (vinfo, type);
3952   if (vectype == NULL_TREE)
3953     return NULL;
3954
3955   if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3956     return NULL;
3957
3958   if (itype == NULL_TREE)
3959     itype = build_nonstandard_integer_type (cmp_mode_size,
3960                                             TYPE_UNSIGNED (type));
3961
3962   if (itype == NULL_TREE
3963       || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3964     return NULL;
3965
3966   vecitype = get_vectype_for_scalar_type (vinfo, itype);
3967   if (vecitype == NULL_TREE)
3968     return NULL;
3969
3970   if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3971     return NULL;
3972
3973   if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3974     {
3975       if ((TREE_CODE (then_clause) == INTEGER_CST
3976            && !int_fits_type_p (then_clause, itype))
3977           || (TREE_CODE (else_clause) == INTEGER_CST
3978               && !int_fits_type_p (else_clause, itype)))
3979         return NULL;
3980     }
3981
3982   def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3983                                   COND_EXPR, unshare_expr (cond_expr),
3984                                   fold_convert (itype, then_clause),
3985                                   fold_convert (itype, else_clause));
3986   pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3987                                       NOP_EXPR, gimple_assign_lhs (def_stmt));
3988
3989   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
3990   *type_out = vectype;
3991
3992   vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3993
3994   return pattern_stmt;
3995 }
3996
3997
3998 /* Helper function of vect_recog_bool_pattern.  Called recursively, return
3999    true if bool VAR can and should be optimized that way.  Assume it shouldn't
4000    in case it's a result of a comparison which can be directly vectorized into
4001    a vector comparison.  Fills in STMTS with all stmts visited during the
4002    walk.  */
4003
4004 static bool
4005 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
4006 {
4007   tree rhs1;
4008   enum tree_code rhs_code;
4009
4010   stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
4011   if (!def_stmt_info)
4012     return false;
4013
4014   gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
4015   if (!def_stmt)
4016     return false;
4017
4018   if (stmts.contains (def_stmt))
4019     return true;
4020
4021   rhs1 = gimple_assign_rhs1 (def_stmt);
4022   rhs_code = gimple_assign_rhs_code (def_stmt);
4023   switch (rhs_code)
4024     {
4025     case SSA_NAME:
4026       if (! check_bool_pattern (rhs1, vinfo, stmts))
4027         return false;
4028       break;
4029
4030     CASE_CONVERT:
4031       if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
4032         return false;
4033       if (! check_bool_pattern (rhs1, vinfo, stmts))
4034         return false;
4035       break;
4036
4037     case BIT_NOT_EXPR:
4038       if (! check_bool_pattern (rhs1, vinfo, stmts))
4039         return false;
4040       break;
4041
4042     case BIT_AND_EXPR:
4043     case BIT_IOR_EXPR:
4044     case BIT_XOR_EXPR:
4045       if (! check_bool_pattern (rhs1, vinfo, stmts)
4046           || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
4047         return false;
4048       break;
4049
4050     default:
4051       if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
4052         {
4053           tree vecitype, comp_vectype;
4054
4055           /* If the comparison can throw, then is_gimple_condexpr will be
4056              false and we can't make a COND_EXPR/VEC_COND_EXPR out of it.  */
4057           if (stmt_could_throw_p (cfun, def_stmt))
4058             return false;
4059
4060           comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
4061           if (comp_vectype == NULL_TREE)
4062             return false;
4063
4064           tree mask_type = get_mask_type_for_scalar_type (vinfo,
4065                                                           TREE_TYPE (rhs1));
4066           if (mask_type
4067               && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
4068             return false;
4069
4070           if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
4071             {
4072               scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
4073               tree itype
4074                 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
4075               vecitype = get_vectype_for_scalar_type (vinfo, itype);
4076               if (vecitype == NULL_TREE)
4077                 return false;
4078             }
4079           else
4080             vecitype = comp_vectype;
4081           if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
4082             return false;
4083         }
4084       else
4085         return false;
4086       break;
4087     }
4088
4089   bool res = stmts.add (def_stmt);
4090   /* We can't end up recursing when just visiting SSA defs but not PHIs.  */
4091   gcc_assert (!res);
4092
4093   return true;
4094 }
4095
4096
4097 /* Helper function of adjust_bool_pattern.  Add a cast to TYPE to a previous
4098    stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
4099    pattern sequence.  */
4100
4101 static tree
4102 adjust_bool_pattern_cast (vec_info *vinfo,
4103                           tree type, tree var, stmt_vec_info stmt_info)
4104 {
4105   gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
4106                                            NOP_EXPR, var);
4107   append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
4108                           get_vectype_for_scalar_type (vinfo, type));
4109   return gimple_assign_lhs (cast_stmt);
4110 }
4111
4112 /* Helper function of vect_recog_bool_pattern.  Do the actual transformations.
4113    VAR is an SSA_NAME that should be transformed from bool to a wider integer
4114    type, OUT_TYPE is the desired final integer type of the whole pattern.
4115    STMT_INFO is the info of the pattern root and is where pattern stmts should
4116    be associated with.  DEFS is a map of pattern defs.  */
4117
4118 static void
4119 adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
4120                      stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
4121 {
4122   gimple *stmt = SSA_NAME_DEF_STMT (var);
4123   enum tree_code rhs_code, def_rhs_code;
4124   tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
4125   location_t loc;
4126   gimple *pattern_stmt, *def_stmt;
4127   tree trueval = NULL_TREE;
4128
4129   rhs1 = gimple_assign_rhs1 (stmt);
4130   rhs2 = gimple_assign_rhs2 (stmt);
4131   rhs_code = gimple_assign_rhs_code (stmt);
4132   loc = gimple_location (stmt);
4133   switch (rhs_code)
4134     {
4135     case SSA_NAME:
4136     CASE_CONVERT:
4137       irhs1 = *defs.get (rhs1);
4138       itype = TREE_TYPE (irhs1);
4139       pattern_stmt
4140         = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4141                                SSA_NAME, irhs1);
4142       break;
4143
4144     case BIT_NOT_EXPR:
4145       irhs1 = *defs.get (rhs1);
4146       itype = TREE_TYPE (irhs1);
4147       pattern_stmt
4148         = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4149                                BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
4150       break;
4151
4152     case BIT_AND_EXPR:
4153       /* Try to optimize x = y & (a < b ? 1 : 0); into
4154          x = (a < b ? y : 0);
4155
4156          E.g. for:
4157            bool a_b, b_b, c_b;
4158            TYPE d_T;
4159
4160            S1  a_b = x1 CMP1 y1;
4161            S2  b_b = x2 CMP2 y2;
4162            S3  c_b = a_b & b_b;
4163            S4  d_T = (TYPE) c_b;
4164
4165          we would normally emit:
4166
4167            S1'  a_T = x1 CMP1 y1 ? 1 : 0;
4168            S2'  b_T = x2 CMP2 y2 ? 1 : 0;
4169            S3'  c_T = a_T & b_T;
4170            S4'  d_T = c_T;
4171
4172          but we can save one stmt by using the
4173          result of one of the COND_EXPRs in the other COND_EXPR and leave
4174          BIT_AND_EXPR stmt out:
4175
4176            S1'  a_T = x1 CMP1 y1 ? 1 : 0;
4177            S3'  c_T = x2 CMP2 y2 ? a_T : 0;
4178            S4'  f_T = c_T;
4179
4180          At least when VEC_COND_EXPR is implemented using masks
4181          cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
4182          computes the comparison masks and ands it, in one case with
4183          all ones vector, in the other case with a vector register.
4184          Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
4185          often more expensive.  */
4186       def_stmt = SSA_NAME_DEF_STMT (rhs2);
4187       def_rhs_code = gimple_assign_rhs_code (def_stmt);
4188       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
4189         {
4190           irhs1 = *defs.get (rhs1);
4191           tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
4192           if (TYPE_PRECISION (TREE_TYPE (irhs1))
4193               == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
4194             {
4195               rhs_code = def_rhs_code;
4196               rhs1 = def_rhs1;
4197               rhs2 = gimple_assign_rhs2 (def_stmt);
4198               trueval = irhs1;
4199               goto do_compare;
4200             }
4201           else
4202             irhs2 = *defs.get (rhs2);
4203           goto and_ior_xor;
4204         }
4205       def_stmt = SSA_NAME_DEF_STMT (rhs1);
4206       def_rhs_code = gimple_assign_rhs_code (def_stmt);
4207       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
4208         {
4209           irhs2 = *defs.get (rhs2);
4210           tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
4211           if (TYPE_PRECISION (TREE_TYPE (irhs2))
4212               == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
4213             {
4214               rhs_code = def_rhs_code;
4215               rhs1 = def_rhs1;
4216               rhs2 = gimple_assign_rhs2 (def_stmt);
4217               trueval = irhs2;
4218               goto do_compare;
4219             }
4220           else
4221             irhs1 = *defs.get (rhs1);
4222           goto and_ior_xor;
4223         }
4224       /* FALLTHRU */
4225     case BIT_IOR_EXPR:
4226     case BIT_XOR_EXPR:
4227       irhs1 = *defs.get (rhs1);
4228       irhs2 = *defs.get (rhs2);
4229     and_ior_xor:
4230       if (TYPE_PRECISION (TREE_TYPE (irhs1))
4231           != TYPE_PRECISION (TREE_TYPE (irhs2)))
4232         {
4233           int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
4234           int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
4235           int out_prec = TYPE_PRECISION (out_type);
4236           if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
4237             irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
4238                                               stmt_info);
4239           else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
4240             irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
4241                                               stmt_info);
4242           else
4243             {
4244               irhs1 = adjust_bool_pattern_cast (vinfo,
4245                                                 out_type, irhs1, stmt_info);
4246               irhs2 = adjust_bool_pattern_cast (vinfo,
4247                                                 out_type, irhs2, stmt_info);
4248             }
4249         }
4250       itype = TREE_TYPE (irhs1);
4251       pattern_stmt
4252         = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4253                                rhs_code, irhs1, irhs2);
4254       break;
4255
4256     default:
4257     do_compare:
4258       gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
4259       if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
4260           || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
4261           || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
4262                        GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
4263         {
4264           scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
4265           itype
4266             = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
4267         }
4268       else
4269         itype = TREE_TYPE (rhs1);
4270       cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
4271       if (trueval == NULL_TREE)
4272         trueval = build_int_cst (itype, 1);
4273       else
4274         gcc_checking_assert (useless_type_conversion_p (itype,
4275                                                         TREE_TYPE (trueval)));
4276       pattern_stmt
4277         = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4278                                COND_EXPR, cond_expr, trueval,
4279                                build_int_cst (itype, 0));
4280       break;
4281     }
4282
4283   gimple_set_location (pattern_stmt, loc);
4284   append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
4285                           get_vectype_for_scalar_type (vinfo, itype));
4286   defs.put (var, gimple_assign_lhs (pattern_stmt));
4287 }
4288
4289 /* Comparison function to qsort a vector of gimple stmts after UID.  */
4290
4291 static int
4292 sort_after_uid (const void *p1, const void *p2)
4293 {
4294   const gimple *stmt1 = *(const gimple * const *)p1;
4295   const gimple *stmt2 = *(const gimple * const *)p2;
4296   return gimple_uid (stmt1) - gimple_uid (stmt2);
4297 }
4298
4299 /* Create pattern stmts for all stmts participating in the bool pattern
4300    specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
4301    OUT_TYPE.  Return the def of the pattern root.  */
4302
4303 static tree
4304 adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
4305                    tree out_type, stmt_vec_info stmt_info)
4306 {
4307   /* Gather original stmts in the bool pattern in their order of appearance
4308      in the IL.  */
4309   auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
4310   for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
4311        i != bool_stmt_set.end (); ++i)
4312     bool_stmts.quick_push (*i);
4313   bool_stmts.qsort (sort_after_uid);
4314
4315   /* Now process them in that order, producing pattern stmts.  */
4316   hash_map <tree, tree> defs;
4317   for (unsigned i = 0; i < bool_stmts.length (); ++i)
4318     adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
4319                          out_type, stmt_info, defs);
4320
4321   /* Pop the last pattern seq stmt and install it as pattern root for STMT.  */
4322   gimple *pattern_stmt
4323     = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4324   return gimple_assign_lhs (pattern_stmt);
4325 }
4326
4327 /* Return the proper type for converting bool VAR into
4328    an integer value or NULL_TREE if no such type exists.
4329    The type is chosen so that the converted value has the
4330    same number of elements as VAR's vector type.  */
4331
4332 static tree
4333 integer_type_for_mask (tree var, vec_info *vinfo)
4334 {
4335   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4336     return NULL_TREE;
4337
4338   stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
4339   if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
4340     return NULL_TREE;
4341
4342   return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
4343 }
4344
4345 /* Function vect_recog_bool_pattern
4346
4347    Try to find pattern like following:
4348
4349      bool a_b, b_b, c_b, d_b, e_b;
4350      TYPE f_T;
4351    loop:
4352      S1  a_b = x1 CMP1 y1;
4353      S2  b_b = x2 CMP2 y2;
4354      S3  c_b = a_b & b_b;
4355      S4  d_b = x3 CMP3 y3;
4356      S5  e_b = c_b | d_b;
4357      S6  f_T = (TYPE) e_b;
4358
4359    where type 'TYPE' is an integral type.  Or a similar pattern
4360    ending in
4361
4362      S6  f_Y = e_b ? r_Y : s_Y;
4363
4364    as results from if-conversion of a complex condition.
4365
4366    Input:
4367
4368    * STMT_VINFO: The stmt at the end from which the pattern
4369                  search begins, i.e. cast of a bool to
4370                  an integer type.
4371
4372    Output:
4373
4374    * TYPE_OUT: The type of the output of this pattern.
4375
4376    * Return value: A new stmt that will be used to replace the pattern.
4377
4378         Assuming size of TYPE is the same as size of all comparisons
4379         (otherwise some casts would be added where needed), the above
4380         sequence we create related pattern stmts:
4381         S1'  a_T = x1 CMP1 y1 ? 1 : 0;
4382         S3'  c_T = x2 CMP2 y2 ? a_T : 0;
4383         S4'  d_T = x3 CMP3 y3 ? 1 : 0;
4384         S5'  e_T = c_T | d_T;
4385         S6'  f_T = e_T;
4386
4387         Instead of the above S3' we could emit:
4388         S2'  b_T = x2 CMP2 y2 ? 1 : 0;
4389         S3'  c_T = a_T | b_T;
4390         but the above is more efficient.  */
4391
4392 static gimple *
4393 vect_recog_bool_pattern (vec_info *vinfo,
4394                          stmt_vec_info stmt_vinfo, tree *type_out)
4395 {
4396   gimple *last_stmt = stmt_vinfo->stmt;
4397   enum tree_code rhs_code;
4398   tree var, lhs, rhs, vectype;
4399   gimple *pattern_stmt;
4400
4401   if (!is_gimple_assign (last_stmt))
4402     return NULL;
4403
4404   var = gimple_assign_rhs1 (last_stmt);
4405   lhs = gimple_assign_lhs (last_stmt);
4406   rhs_code = gimple_assign_rhs_code (last_stmt);
4407
4408   if (rhs_code == VIEW_CONVERT_EXPR)
4409     var = TREE_OPERAND (var, 0);
4410
4411   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4412     return NULL;
4413
4414   hash_set<gimple *> bool_stmts;
4415
4416   if (CONVERT_EXPR_CODE_P (rhs_code)
4417       || rhs_code == VIEW_CONVERT_EXPR)
4418     {
4419       if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4420           || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4421         return NULL;
4422       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4423
4424       if (check_bool_pattern (var, vinfo, bool_stmts))
4425         {
4426           rhs = adjust_bool_stmts (vinfo, bool_stmts,
4427                                    TREE_TYPE (lhs), stmt_vinfo);
4428           lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4429           if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4430             pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4431           else
4432             pattern_stmt
4433               = gimple_build_assign (lhs, NOP_EXPR, rhs);
4434         }
4435       else
4436         {
4437           tree type = integer_type_for_mask (var, vinfo);
4438           tree cst0, cst1, tmp;
4439
4440           if (!type)
4441             return NULL;
4442
4443           /* We may directly use cond with narrowed type to avoid
4444              multiple cond exprs with following result packing and
4445              perform single cond with packed mask instead.  In case
4446              of widening we better make cond first and then extract
4447              results.  */
4448           if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
4449             type = TREE_TYPE (lhs);
4450
4451           cst0 = build_int_cst (type, 0);
4452           cst1 = build_int_cst (type, 1);
4453           tmp = vect_recog_temp_ssa_var (type, NULL);
4454           pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
4455
4456           if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
4457             {
4458               tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
4459               append_pattern_def_seq (vinfo, stmt_vinfo,
4460                                       pattern_stmt, new_vectype);
4461
4462               lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4463               pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
4464             }
4465         }
4466
4467       *type_out = vectype;
4468       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4469
4470       return pattern_stmt;
4471     }
4472   else if (rhs_code == COND_EXPR
4473            && TREE_CODE (var) == SSA_NAME)
4474     {
4475       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4476       if (vectype == NULL_TREE)
4477         return NULL;
4478
4479       /* Build a scalar type for the boolean result that when
4480          vectorized matches the vector type of the result in
4481          size and number of elements.  */
4482       unsigned prec
4483         = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
4484                                TYPE_VECTOR_SUBPARTS (vectype));
4485
4486       tree type
4487         = build_nonstandard_integer_type (prec,
4488                                           TYPE_UNSIGNED (TREE_TYPE (var)));
4489       if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
4490         return NULL;
4491
4492       if (check_bool_pattern (var, vinfo, bool_stmts))
4493         var = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
4494       else if (integer_type_for_mask (var, vinfo))
4495         return NULL;
4496
4497       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4498       pattern_stmt
4499         = gimple_build_assign (lhs, COND_EXPR,
4500                                build2 (NE_EXPR, boolean_type_node,
4501                                        var, build_int_cst (TREE_TYPE (var), 0)),
4502                                gimple_assign_rhs2 (last_stmt),
4503                                gimple_assign_rhs3 (last_stmt));
4504       *type_out = vectype;
4505       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4506
4507       return pattern_stmt;
4508     }
4509   else if (rhs_code == SSA_NAME
4510            && STMT_VINFO_DATA_REF (stmt_vinfo))
4511     {
4512       stmt_vec_info pattern_stmt_info;
4513       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4514       if (!vectype || !VECTOR_MODE_P (TYPE_MODE (vectype)))
4515         return NULL;
4516
4517       if (check_bool_pattern (var, vinfo, bool_stmts))
4518         rhs = adjust_bool_stmts (vinfo, bool_stmts,
4519                                  TREE_TYPE (vectype), stmt_vinfo);
4520       else
4521         {
4522           tree type = integer_type_for_mask (var, vinfo);
4523           tree cst0, cst1, new_vectype;
4524
4525           if (!type)
4526             return NULL;
4527
4528           if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
4529             type = TREE_TYPE (vectype);
4530
4531           cst0 = build_int_cst (type, 0);
4532           cst1 = build_int_cst (type, 1);
4533           new_vectype = get_vectype_for_scalar_type (vinfo, type);
4534
4535           rhs = vect_recog_temp_ssa_var (type, NULL);
4536           pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
4537           append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
4538         }
4539
4540       lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
4541       if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4542         {
4543           tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4544           gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
4545           append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
4546           rhs = rhs2;
4547         }
4548       pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4549       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4550       vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4551       *type_out = vectype;
4552       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4553
4554       return pattern_stmt;
4555     }
4556   else
4557     return NULL;
4558 }
4559
4560
4561 /* A helper for vect_recog_mask_conversion_pattern.  Build
4562    conversion of MASK to a type suitable for masking VECTYPE.
4563    Built statement gets required vectype and is appended to
4564    a pattern sequence of STMT_VINFO.
4565
4566    Return converted mask.  */
4567
4568 static tree
4569 build_mask_conversion (vec_info *vinfo,
4570                        tree mask, tree vectype, stmt_vec_info stmt_vinfo)
4571 {
4572   gimple *stmt;
4573   tree masktype, tmp;
4574
4575   masktype = truth_type_for (vectype);
4576   tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
4577   stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
4578   append_pattern_def_seq (vinfo, stmt_vinfo,
4579                           stmt, masktype, TREE_TYPE (vectype));
4580
4581   return tmp;
4582 }
4583
4584
4585 /* Function vect_recog_mask_conversion_pattern
4586
4587    Try to find statements which require boolean type
4588    converison.  Additional conversion statements are
4589    added to handle such cases.  For example:
4590
4591    bool m_1, m_2, m_3;
4592    int i_4, i_5;
4593    double d_6, d_7;
4594    char c_1, c_2, c_3;
4595
4596    S1   m_1 = i_4 > i_5;
4597    S2   m_2 = d_6 < d_7;
4598    S3   m_3 = m_1 & m_2;
4599    S4   c_1 = m_3 ? c_2 : c_3;
4600
4601    Will be transformed into:
4602
4603    S1   m_1 = i_4 > i_5;
4604    S2   m_2 = d_6 < d_7;
4605    S3'' m_2' = (_Bool[bitsize=32])m_2
4606    S3'  m_3' = m_1 & m_2';
4607    S4'' m_3'' = (_Bool[bitsize=8])m_3'
4608    S4'  c_1' = m_3'' ? c_2 : c_3;  */
4609
4610 static gimple *
4611 vect_recog_mask_conversion_pattern (vec_info *vinfo,
4612                                     stmt_vec_info stmt_vinfo, tree *type_out)
4613 {
4614   gimple *last_stmt = stmt_vinfo->stmt;
4615   enum tree_code rhs_code;
4616   tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
4617   tree vectype1, vectype2;
4618   stmt_vec_info pattern_stmt_info;
4619   tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
4620   tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
4621
4622   /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion.  */
4623   if (is_gimple_call (last_stmt)
4624       && gimple_call_internal_p (last_stmt))
4625     {
4626       gcall *pattern_stmt;
4627
4628       internal_fn ifn = gimple_call_internal_fn (last_stmt);
4629       int mask_argno = internal_fn_mask_index (ifn);
4630       if (mask_argno < 0)
4631         return NULL;
4632
4633       bool store_p = internal_store_fn_p (ifn);
4634       if (store_p)
4635         {
4636           int rhs_index = internal_fn_stored_value_index (ifn);
4637           tree rhs = gimple_call_arg (last_stmt, rhs_index);
4638           vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
4639         }
4640       else
4641         {
4642           lhs = gimple_call_lhs (last_stmt);
4643           vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4644         }
4645
4646       tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
4647       tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
4648       if (!mask_arg_type)
4649         return NULL;
4650       vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
4651
4652       if (!vectype1 || !vectype2
4653           || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4654                        TYPE_VECTOR_SUBPARTS (vectype2)))
4655         return NULL;
4656
4657       tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
4658
4659       auto_vec<tree, 8> args;
4660       unsigned int nargs = gimple_call_num_args (last_stmt);
4661       args.safe_grow (nargs, true);
4662       for (unsigned int i = 0; i < nargs; ++i)
4663         args[i] = ((int) i == mask_argno
4664                    ? tmp
4665                    : gimple_call_arg (last_stmt, i));
4666       pattern_stmt = gimple_build_call_internal_vec (ifn, args);
4667
4668       if (!store_p)
4669         {
4670           lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4671           gimple_call_set_lhs (pattern_stmt, lhs);
4672         }
4673       gimple_call_set_nothrow (pattern_stmt, true);
4674
4675       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4676       if (STMT_VINFO_DATA_REF (stmt_vinfo))
4677         vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4678
4679       *type_out = vectype1;
4680       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4681
4682       return pattern_stmt;
4683     }
4684
4685   if (!is_gimple_assign (last_stmt))
4686     return NULL;
4687
4688   gimple *pattern_stmt;
4689   lhs = gimple_assign_lhs (last_stmt);
4690   rhs1 = gimple_assign_rhs1 (last_stmt);
4691   rhs_code = gimple_assign_rhs_code (last_stmt);
4692
4693   /* Check for cond expression requiring mask conversion.  */
4694   if (rhs_code == COND_EXPR)
4695     {
4696       vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4697
4698       if (TREE_CODE (rhs1) == SSA_NAME)
4699         {
4700           rhs1_type = integer_type_for_mask (rhs1, vinfo);
4701           if (!rhs1_type)
4702             return NULL;
4703         }
4704       else if (COMPARISON_CLASS_P (rhs1))
4705         {
4706           /* Check whether we're comparing scalar booleans and (if so)
4707              whether a better mask type exists than the mask associated
4708              with boolean-sized elements.  This avoids unnecessary packs
4709              and unpacks if the booleans are set from comparisons of
4710              wider types.  E.g. in:
4711
4712                int x1, x2, x3, x4, y1, y1;
4713                ...
4714                bool b1 = (x1 == x2);
4715                bool b2 = (x3 == x4);
4716                ... = b1 == b2 ? y1 : y2;
4717
4718              it is better for b1 and b2 to use the mask type associated
4719              with int elements rather bool (byte) elements.  */
4720           rhs1_op0 = TREE_OPERAND (rhs1, 0);
4721           rhs1_op1 = TREE_OPERAND (rhs1, 1);
4722           if (!rhs1_op0 || !rhs1_op1)
4723             return NULL;
4724           rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
4725           rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
4726
4727           if (!rhs1_op0_type)
4728             rhs1_type = TREE_TYPE (rhs1_op0);
4729           else if (!rhs1_op1_type)
4730             rhs1_type = TREE_TYPE (rhs1_op1);
4731           else if (TYPE_PRECISION (rhs1_op0_type)
4732                    != TYPE_PRECISION (rhs1_op1_type))
4733             {
4734               int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
4735                          - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4736               int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
4737                          - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4738               if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
4739                 {
4740                   if (abs (tmp0) > abs (tmp1))
4741                     rhs1_type = rhs1_op1_type;
4742                   else
4743                     rhs1_type = rhs1_op0_type;
4744                 }
4745               else
4746                 rhs1_type = build_nonstandard_integer_type
4747                   (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
4748             }
4749           else
4750             rhs1_type = rhs1_op0_type;
4751         }
4752       else
4753         return NULL;
4754
4755       vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4756
4757       if (!vectype1 || !vectype2)
4758         return NULL;
4759
4760       /* Continue if a conversion is needed.  Also continue if we have
4761          a comparison whose vector type would normally be different from
4762          VECTYPE2 when considered in isolation.  In that case we'll
4763          replace the comparison with an SSA name (so that we can record
4764          its vector type) and behave as though the comparison was an SSA
4765          name from the outset.  */
4766       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4767                     TYPE_VECTOR_SUBPARTS (vectype2))
4768           && !rhs1_op0_type
4769           && !rhs1_op1_type)
4770         return NULL;
4771
4772       /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4773          in place, we can handle it in vectorizable_condition.  This avoids
4774          unnecessary promotion stmts and increased vectorization factor.  */
4775       if (COMPARISON_CLASS_P (rhs1)
4776           && INTEGRAL_TYPE_P (rhs1_type)
4777           && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4778                        TYPE_VECTOR_SUBPARTS (vectype2)))
4779         {
4780           enum vect_def_type dt;
4781           if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4782               && dt == vect_external_def
4783               && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4784               && (dt == vect_external_def
4785                   || dt == vect_constant_def))
4786             {
4787               tree wide_scalar_type = build_nonstandard_integer_type
4788                 (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
4789               tree vectype3 = get_vectype_for_scalar_type (vinfo,
4790                                                            wide_scalar_type);
4791               if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4792                 return NULL;
4793             }
4794         }
4795
4796       /* If rhs1 is a comparison we need to move it into a
4797          separate statement.  */
4798       if (TREE_CODE (rhs1) != SSA_NAME)
4799         {
4800           tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4801           if (rhs1_op0_type
4802               && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
4803             rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
4804                                               vectype2, stmt_vinfo);
4805           if (rhs1_op1_type
4806               && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
4807             rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
4808                                       vectype2, stmt_vinfo);
4809           pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
4810                                               rhs1_op0, rhs1_op1);
4811           rhs1 = tmp;
4812           append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
4813                                   rhs1_type);
4814         }
4815
4816       if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4817                     TYPE_VECTOR_SUBPARTS (vectype2)))
4818         tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4819       else
4820         tmp = rhs1;
4821
4822       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4823       pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4824                                           gimple_assign_rhs2 (last_stmt),
4825                                           gimple_assign_rhs3 (last_stmt));
4826
4827       *type_out = vectype1;
4828       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4829
4830       return pattern_stmt;
4831     }
4832
4833   /* Now check for binary boolean operations requiring conversion for
4834      one of operands.  */
4835   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4836     return NULL;
4837
4838   if (rhs_code != BIT_IOR_EXPR
4839       && rhs_code != BIT_XOR_EXPR
4840       && rhs_code != BIT_AND_EXPR
4841       && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4842     return NULL;
4843
4844   rhs2 = gimple_assign_rhs2 (last_stmt);
4845
4846   rhs1_type = integer_type_for_mask (rhs1, vinfo);
4847   rhs2_type = integer_type_for_mask (rhs2, vinfo);
4848
4849   if (!rhs1_type || !rhs2_type
4850       || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4851     return NULL;
4852
4853   if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4854     {
4855       vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4856       if (!vectype1)
4857         return NULL;
4858       rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
4859     }
4860   else
4861     {
4862       vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
4863       if (!vectype1)
4864         return NULL;
4865       rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4866     }
4867
4868   lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4869   pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4870
4871   *type_out = vectype1;
4872   vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4873
4874   return pattern_stmt;
4875 }
4876
4877 /* STMT_INFO is a load or store.  If the load or store is conditional, return
4878    the boolean condition under which it occurs, otherwise return null.  */
4879
4880 static tree
4881 vect_get_load_store_mask (stmt_vec_info stmt_info)
4882 {
4883   if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
4884     {
4885       gcc_assert (gimple_assign_single_p (def_assign));
4886       return NULL_TREE;
4887     }
4888
4889   if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
4890     {
4891       internal_fn ifn = gimple_call_internal_fn (def_call);
4892       int mask_index = internal_fn_mask_index (ifn);
4893       return gimple_call_arg (def_call, mask_index);
4894     }
4895
4896   gcc_unreachable ();
4897 }
4898
4899 /* Return MASK if MASK is suitable for masking an operation on vectors
4900    of type VECTYPE, otherwise convert it into such a form and return
4901    the result.  Associate any conversion statements with STMT_INFO's
4902    pattern.  */
4903
4904 static tree
4905 vect_convert_mask_for_vectype (tree mask, tree vectype,
4906                                stmt_vec_info stmt_info, vec_info *vinfo)
4907 {
4908   tree mask_type = integer_type_for_mask (mask, vinfo);
4909   if (mask_type)
4910     {
4911       tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
4912       if (mask_vectype
4913           && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4914                        TYPE_VECTOR_SUBPARTS (mask_vectype)))
4915         mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
4916     }
4917   return mask;
4918 }
4919
4920 /* Return the equivalent of:
4921
4922      fold_convert (TYPE, VALUE)
4923
4924    with the expectation that the operation will be vectorized.
4925    If new statements are needed, add them as pattern statements
4926    to STMT_INFO.  */
4927
4928 static tree
4929 vect_add_conversion_to_pattern (vec_info *vinfo,
4930                                 tree type, tree value, stmt_vec_info stmt_info)
4931 {
4932   if (useless_type_conversion_p (type, TREE_TYPE (value)))
4933     return value;
4934
4935   tree new_value = vect_recog_temp_ssa_var (type, NULL);
4936   gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4937   append_pattern_def_seq (vinfo, stmt_info, conversion,
4938                           get_vectype_for_scalar_type (vinfo, type));
4939   return new_value;
4940 }
4941
4942 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4943    internal function.  Return the final statement on success and set
4944    *TYPE_OUT to the vector type being loaded or stored.
4945
4946    This function only handles gathers and scatters that were recognized
4947    as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P).  */
4948
4949 static gimple *
4950 vect_recog_gather_scatter_pattern (vec_info *vinfo,
4951                                    stmt_vec_info stmt_info, tree *type_out)
4952 {
4953   /* Currently we only support this for loop vectorization.  */
4954   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4955   if (!loop_vinfo)
4956     return NULL;
4957
4958   /* Make sure that we're looking at a gather load or scatter store.  */
4959   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4960   if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4961     return NULL;
4962
4963   /* Get the boolean that controls whether the load or store happens.
4964      This is null if the operation is unconditional.  */
4965   tree mask = vect_get_load_store_mask (stmt_info);
4966
4967   /* Make sure that the target supports an appropriate internal
4968      function for the gather/scatter operation.  */
4969   gather_scatter_info gs_info;
4970   if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
4971       || gs_info.ifn == IFN_LAST)
4972     return NULL;
4973
4974   /* Convert the mask to the right form.  */
4975   tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
4976                                                  gs_info.element_type);
4977   if (mask)
4978     mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4979                                           loop_vinfo);
4980   else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
4981            || gs_info.ifn == IFN_MASK_GATHER_LOAD)
4982     mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
4983
4984   /* Get the invariant base and non-invariant offset, converting the
4985      latter to the same width as the vector elements.  */
4986   tree base = gs_info.base;
4987   tree offset_type = TREE_TYPE (gs_info.offset_vectype);
4988   tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
4989                                                 gs_info.offset, stmt_info);
4990
4991   /* Build the new pattern statement.  */
4992   tree scale = size_int (gs_info.scale);
4993   gcall *pattern_stmt;
4994   if (DR_IS_READ (dr))
4995     {
4996       tree zero = build_zero_cst (gs_info.element_type);
4997       if (mask != NULL)
4998         pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
4999                                                    offset, scale, zero, mask);
5000       else
5001         pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
5002                                                    offset, scale, zero);
5003       tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
5004       gimple_call_set_lhs (pattern_stmt, load_lhs);
5005     }
5006   else
5007     {
5008       tree rhs = vect_get_store_rhs (stmt_info);
5009       if (mask != NULL)
5010         pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
5011                                                    base, offset, scale, rhs,
5012                                                    mask);
5013       else
5014         pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
5015                                                    base, offset, scale, rhs);
5016     }
5017   gimple_call_set_nothrow (pattern_stmt, true);
5018
5019   /* Copy across relevant vectorization info and associate DR with the
5020      new pattern statement instead of the original statement.  */
5021   stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
5022   loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
5023
5024   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5025   *type_out = vectype;
5026   vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
5027
5028   return pattern_stmt;
5029 }
5030
5031 /* Return true if TYPE is a non-boolean integer type.  These are the types
5032    that we want to consider for narrowing.  */
5033
5034 static bool
5035 vect_narrowable_type_p (tree type)
5036 {
5037   return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
5038 }
5039
5040 /* Return true if the operation given by CODE can be truncated to N bits
5041    when only N bits of the output are needed.  This is only true if bit N+1
5042    of the inputs has no effect on the low N bits of the result.  */
5043
5044 static bool
5045 vect_truncatable_operation_p (tree_code code)
5046 {
5047   switch (code)
5048     {
5049     case PLUS_EXPR:
5050     case MINUS_EXPR:
5051     case MULT_EXPR:
5052     case BIT_AND_EXPR:
5053     case BIT_IOR_EXPR:
5054     case BIT_XOR_EXPR:
5055     case COND_EXPR:
5056       return true;
5057
5058     default:
5059       return false;
5060     }
5061 }
5062
5063 /* Record that STMT_INFO could be changed from operating on TYPE to
5064    operating on a type with the precision and sign given by PRECISION
5065    and SIGN respectively.  PRECISION is an arbitrary bit precision;
5066    it might not be a whole number of bytes.  */
5067
5068 static void
5069 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
5070                          unsigned int precision, signop sign)
5071 {
5072   /* Round the precision up to a whole number of bytes.  */
5073   precision = vect_element_precision (precision);
5074   if (precision < TYPE_PRECISION (type)
5075       && (!stmt_info->operation_precision
5076           || stmt_info->operation_precision > precision))
5077     {
5078       stmt_info->operation_precision = precision;
5079       stmt_info->operation_sign = sign;
5080     }
5081 }
5082
5083 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
5084    non-boolean inputs, all of which have type TYPE.  MIN_INPUT_PRECISION
5085    is an arbitrary bit precision; it might not be a whole number of bytes.  */
5086
5087 static void
5088 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
5089                               unsigned int min_input_precision)
5090 {
5091   /* This operation in isolation only requires the inputs to have
5092      MIN_INPUT_PRECISION of precision,  However, that doesn't mean
5093      that MIN_INPUT_PRECISION is a natural precision for the chain
5094      as a whole.  E.g. consider something like:
5095
5096          unsigned short *x, *y;
5097          *y = ((*x & 0xf0) >> 4) | (*y << 4);
5098
5099      The right shift can be done on unsigned chars, and only requires the
5100      result of "*x & 0xf0" to be done on unsigned chars.  But taking that
5101      approach would mean turning a natural chain of single-vector unsigned
5102      short operations into one that truncates "*x" and then extends
5103      "(*x & 0xf0) >> 4", with two vectors for each unsigned short
5104      operation and one vector for each unsigned char operation.
5105      This would be a significant pessimization.
5106
5107      Instead only propagate the maximum of this precision and the precision
5108      required by the users of the result.  This means that we don't pessimize
5109      the case above but continue to optimize things like:
5110
5111          unsigned char *y;
5112          unsigned short *x;
5113          *y = ((*x & 0xf0) >> 4) | (*y << 4);
5114
5115      Here we would truncate two vectors of *x to a single vector of
5116      unsigned chars and use single-vector unsigned char operations for
5117      everything else, rather than doing two unsigned short copies of
5118      "(*x & 0xf0) >> 4" and then truncating the result.  */
5119   min_input_precision = MAX (min_input_precision,
5120                              stmt_info->min_output_precision);
5121
5122   if (min_input_precision < TYPE_PRECISION (type)
5123       && (!stmt_info->min_input_precision
5124           || stmt_info->min_input_precision > min_input_precision))
5125     stmt_info->min_input_precision = min_input_precision;
5126 }
5127
5128 /* Subroutine of vect_determine_min_output_precision.  Return true if
5129    we can calculate a reduced number of output bits for STMT_INFO,
5130    whose result is LHS.  */
5131
5132 static bool
5133 vect_determine_min_output_precision_1 (vec_info *vinfo,
5134                                        stmt_vec_info stmt_info, tree lhs)
5135 {
5136   /* Take the maximum precision required by users of the result.  */
5137   unsigned int precision = 0;
5138   imm_use_iterator iter;
5139   use_operand_p use;
5140   FOR_EACH_IMM_USE_FAST (use, iter, lhs)
5141     {
5142       gimple *use_stmt = USE_STMT (use);
5143       if (is_gimple_debug (use_stmt))
5144         continue;
5145       stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
5146       if (!use_stmt_info || !use_stmt_info->min_input_precision)
5147         return false;
5148       /* The input precision recorded for COND_EXPRs applies only to the
5149          "then" and "else" values.  */
5150       gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
5151       if (assign
5152           && gimple_assign_rhs_code (assign) == COND_EXPR
5153           && use->use != gimple_assign_rhs2_ptr (assign)
5154           && use->use != gimple_assign_rhs3_ptr (assign))
5155         return false;
5156       precision = MAX (precision, use_stmt_info->min_input_precision);
5157     }
5158
5159   if (dump_enabled_p ())
5160     dump_printf_loc (MSG_NOTE, vect_location,
5161                      "only the low %d bits of %T are significant\n",
5162                      precision, lhs);
5163   stmt_info->min_output_precision = precision;
5164   return true;
5165 }
5166
5167 /* Calculate min_output_precision for STMT_INFO.  */
5168
5169 static void
5170 vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5171 {
5172   /* We're only interested in statements with a narrowable result.  */
5173   tree lhs = gimple_get_lhs (stmt_info->stmt);
5174   if (!lhs
5175       || TREE_CODE (lhs) != SSA_NAME
5176       || !vect_narrowable_type_p (TREE_TYPE (lhs)))
5177     return;
5178
5179   if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
5180     stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
5181 }
5182
5183 /* Use range information to decide whether STMT (described by STMT_INFO)
5184    could be done in a narrower type.  This is effectively a forward
5185    propagation, since it uses context-independent information that applies
5186    to all users of an SSA name.  */
5187
5188 static void
5189 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
5190 {
5191   tree lhs = gimple_assign_lhs (stmt);
5192   if (!lhs || TREE_CODE (lhs) != SSA_NAME)
5193     return;
5194
5195   tree type = TREE_TYPE (lhs);
5196   if (!vect_narrowable_type_p (type))
5197     return;
5198
5199   /* First see whether we have any useful range information for the result.  */
5200   unsigned int precision = TYPE_PRECISION (type);
5201   signop sign = TYPE_SIGN (type);
5202   wide_int min_value, max_value;
5203   if (!vect_get_range_info (lhs, &min_value, &max_value))
5204     return;
5205
5206   tree_code code = gimple_assign_rhs_code (stmt);
5207   unsigned int nops = gimple_num_ops (stmt);
5208
5209   if (!vect_truncatable_operation_p (code))
5210     /* Check that all relevant input operands are compatible, and update
5211        [MIN_VALUE, MAX_VALUE] to include their ranges.  */
5212     for (unsigned int i = 1; i < nops; ++i)
5213       {
5214         tree op = gimple_op (stmt, i);
5215         if (TREE_CODE (op) == INTEGER_CST)
5216           {
5217             /* Don't require the integer to have RHS_TYPE (which it might
5218                not for things like shift amounts, etc.), but do require it
5219                to fit the type.  */
5220             if (!int_fits_type_p (op, type))
5221               return;
5222
5223             min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
5224             max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
5225           }
5226         else if (TREE_CODE (op) == SSA_NAME)
5227           {
5228             /* Ignore codes that don't take uniform arguments.  */
5229             if (!types_compatible_p (TREE_TYPE (op), type))
5230               return;
5231
5232             wide_int op_min_value, op_max_value;
5233             if (!vect_get_range_info (op, &op_min_value, &op_max_value))
5234               return;
5235
5236             min_value = wi::min (min_value, op_min_value, sign);
5237             max_value = wi::max (max_value, op_max_value, sign);
5238           }
5239         else
5240           return;
5241       }
5242
5243   /* Try to switch signed types for unsigned types if we can.
5244      This is better for two reasons.  First, unsigned ops tend
5245      to be cheaper than signed ops.  Second, it means that we can
5246      handle things like:
5247
5248         signed char c;
5249         int res = (int) c & 0xff00; // range [0x0000, 0xff00]
5250
5251      as:
5252
5253         signed char c;
5254         unsigned short res_1 = (unsigned short) c & 0xff00;
5255         int res = (int) res_1;
5256
5257      where the intermediate result res_1 has unsigned rather than
5258      signed type.  */
5259   if (sign == SIGNED && !wi::neg_p (min_value))
5260     sign = UNSIGNED;
5261
5262   /* See what precision is required for MIN_VALUE and MAX_VALUE.  */
5263   unsigned int precision1 = wi::min_precision (min_value, sign);
5264   unsigned int precision2 = wi::min_precision (max_value, sign);
5265   unsigned int value_precision = MAX (precision1, precision2);
5266   if (value_precision >= precision)
5267     return;
5268
5269   if (dump_enabled_p ())
5270     dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
5271                      " without loss of precision: %G",
5272                      sign == SIGNED ? "signed" : "unsigned",
5273                      value_precision, (gimple *) stmt);
5274
5275   vect_set_operation_type (stmt_info, type, value_precision, sign);
5276   vect_set_min_input_precision (stmt_info, type, value_precision);
5277 }
5278
5279 /* Use information about the users of STMT's result to decide whether
5280    STMT (described by STMT_INFO) could be done in a narrower type.
5281    This is effectively a backward propagation.  */
5282
5283 static void
5284 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
5285 {
5286   tree_code code = gimple_assign_rhs_code (stmt);
5287   unsigned int opno = (code == COND_EXPR ? 2 : 1);
5288   tree type = TREE_TYPE (gimple_op (stmt, opno));
5289   if (!vect_narrowable_type_p (type))
5290     return;
5291
5292   unsigned int precision = TYPE_PRECISION (type);
5293   unsigned int operation_precision, min_input_precision;
5294   switch (code)
5295     {
5296     CASE_CONVERT:
5297       /* Only the bits that contribute to the output matter.  Don't change
5298          the precision of the operation itself.  */
5299       operation_precision = precision;
5300       min_input_precision = stmt_info->min_output_precision;
5301       break;
5302
5303     case LSHIFT_EXPR:
5304     case RSHIFT_EXPR:
5305       {
5306         tree shift = gimple_assign_rhs2 (stmt);
5307         if (TREE_CODE (shift) != INTEGER_CST
5308             || !wi::ltu_p (wi::to_widest (shift), precision))
5309           return;
5310         unsigned int const_shift = TREE_INT_CST_LOW (shift);
5311         if (code == LSHIFT_EXPR)
5312           {
5313             /* Avoid creating an undefined shift.
5314
5315                ??? We could instead use min_output_precision as-is and
5316                optimize out-of-range shifts to zero.  However, only
5317                degenerate testcases shift away all their useful input data,
5318                and it isn't natural to drop input operations in the middle
5319                of vectorization.  This sort of thing should really be
5320                handled before vectorization.  */
5321             operation_precision = MAX (stmt_info->min_output_precision,
5322                                        const_shift + 1);
5323             /* We need CONST_SHIFT fewer bits of the input.  */
5324             min_input_precision = (MAX (operation_precision, const_shift)
5325                                    - const_shift);
5326           }
5327         else
5328           {
5329             /* We need CONST_SHIFT extra bits to do the operation.  */
5330             operation_precision = (stmt_info->min_output_precision
5331                                    + const_shift);
5332             min_input_precision = operation_precision;
5333           }
5334         break;
5335       }
5336
5337     default:
5338       if (vect_truncatable_operation_p (code))
5339         {
5340           /* Input bit N has no effect on output bits N-1 and lower.  */
5341           operation_precision = stmt_info->min_output_precision;
5342           min_input_precision = operation_precision;
5343           break;
5344         }
5345       return;
5346     }
5347
5348   if (operation_precision < precision)
5349     {
5350       if (dump_enabled_p ())
5351         dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
5352                          " without affecting users: %G",
5353                          TYPE_UNSIGNED (type) ? "unsigned" : "signed",
5354                          operation_precision, (gimple *) stmt);
5355       vect_set_operation_type (stmt_info, type, operation_precision,
5356                                TYPE_SIGN (type));
5357     }
5358   vect_set_min_input_precision (stmt_info, type, min_input_precision);
5359 }
5360
5361 /* Return true if the statement described by STMT_INFO sets a boolean
5362    SSA_NAME and if we know how to vectorize this kind of statement using
5363    vector mask types.  */
5364
5365 static bool
5366 possible_vector_mask_operation_p (stmt_vec_info stmt_info)
5367 {
5368   tree lhs = gimple_get_lhs (stmt_info->stmt);
5369   if (!lhs
5370       || TREE_CODE (lhs) != SSA_NAME
5371       || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5372     return false;
5373
5374   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5375     {
5376       tree_code rhs_code = gimple_assign_rhs_code (assign);
5377       switch (rhs_code)
5378         {
5379         CASE_CONVERT:
5380         case SSA_NAME:
5381         case BIT_NOT_EXPR:
5382         case BIT_IOR_EXPR:
5383         case BIT_XOR_EXPR:
5384         case BIT_AND_EXPR:
5385           return true;
5386
5387         default:
5388           return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
5389         }
5390     }
5391   else if (is_a <gphi *> (stmt_info->stmt))
5392     return true;
5393   return false;
5394 }
5395
5396 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
5397    a vector mask type instead of a normal vector type.  Record the
5398    result in STMT_INFO->mask_precision.  */
5399
5400 static void
5401 vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5402 {
5403   if (!possible_vector_mask_operation_p (stmt_info))
5404     return;
5405
5406   /* If at least one boolean input uses a vector mask type,
5407      pick the mask type with the narrowest elements.
5408
5409      ??? This is the traditional behavior.  It should always produce
5410      the smallest number of operations, but isn't necessarily the
5411      optimal choice.  For example, if we have:
5412
5413        a = b & c
5414
5415      where:
5416
5417        - the user of a wants it to have a mask type for 16-bit elements (M16)
5418        - b also uses M16
5419        - c uses a mask type for 8-bit elements (M8)
5420
5421      then picking M8 gives:
5422
5423        - 1 M16->M8 pack for b
5424        - 1 M8 AND for a
5425        - 2 M8->M16 unpacks for the user of a
5426
5427      whereas picking M16 would have given:
5428
5429        - 2 M8->M16 unpacks for c
5430        - 2 M16 ANDs for a
5431
5432      The number of operations are equal, but M16 would have given
5433      a shorter dependency chain and allowed more ILP.  */
5434   unsigned int precision = ~0U;
5435   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5436     {
5437       unsigned int nops = gimple_num_ops (assign);
5438       for (unsigned int i = 1; i < nops; ++i)
5439         {
5440           tree rhs = gimple_op (assign, i);
5441           if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
5442             continue;
5443
5444           stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5445           if (!def_stmt_info)
5446             /* Don't let external or constant operands influence the choice.
5447                We can convert them to whichever vector type we pick.  */
5448             continue;
5449
5450           if (def_stmt_info->mask_precision)
5451             {
5452               if (precision > def_stmt_info->mask_precision)
5453                 precision = def_stmt_info->mask_precision;
5454             }
5455         }
5456
5457       /* If the statement compares two values that shouldn't use vector masks,
5458          try comparing the values as normal scalars instead.  */
5459       tree_code rhs_code = gimple_assign_rhs_code (assign);
5460       if (precision == ~0U
5461           && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5462         {
5463           tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
5464           scalar_mode mode;
5465           tree vectype, mask_type;
5466           if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
5467               && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
5468               && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
5469               && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
5470             precision = GET_MODE_BITSIZE (mode);
5471         }
5472     }
5473   else
5474     {
5475       gphi *phi = as_a <gphi *> (stmt_info->stmt);
5476       for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
5477         {
5478           tree rhs = gimple_phi_arg_def (phi, i);
5479
5480           stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5481           if (!def_stmt_info)
5482             /* Don't let external or constant operands influence the choice.
5483                We can convert them to whichever vector type we pick.  */
5484             continue;
5485
5486           if (def_stmt_info->mask_precision)
5487             {
5488               if (precision > def_stmt_info->mask_precision)
5489                 precision = def_stmt_info->mask_precision;
5490             }
5491         }
5492     }
5493
5494   if (dump_enabled_p ())
5495     {
5496       if (precision == ~0U)
5497         dump_printf_loc (MSG_NOTE, vect_location,
5498                          "using normal nonmask vectors for %G",
5499                          stmt_info->stmt);
5500       else
5501         dump_printf_loc (MSG_NOTE, vect_location,
5502                          "using boolean precision %d for %G",
5503                          precision, stmt_info->stmt);
5504     }
5505
5506   stmt_info->mask_precision = precision;
5507 }
5508
5509 /* Handle vect_determine_precisions for STMT_INFO, given that we
5510    have already done so for the users of its result.  */
5511
5512 void
5513 vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
5514 {
5515   vect_determine_min_output_precision (vinfo, stmt_info);
5516   if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
5517     {
5518       vect_determine_precisions_from_range (stmt_info, stmt);
5519       vect_determine_precisions_from_users (stmt_info, stmt);
5520     }
5521 }
5522
5523 /* Walk backwards through the vectorizable region to determine the
5524    values of these fields:
5525
5526    - min_output_precision
5527    - min_input_precision
5528    - operation_precision
5529    - operation_sign.  */
5530
5531 void
5532 vect_determine_precisions (vec_info *vinfo)
5533 {
5534   DUMP_VECT_SCOPE ("vect_determine_precisions");
5535
5536   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5537     {
5538       class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5539       basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5540       unsigned int nbbs = loop->num_nodes;
5541
5542       for (unsigned int i = 0; i < nbbs; i++)
5543         {
5544           basic_block bb = bbs[i];
5545           for (auto gsi = gsi_start_phis (bb);
5546                !gsi_end_p (gsi); gsi_next (&gsi))
5547             {
5548               stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5549               if (stmt_info)
5550                 vect_determine_mask_precision (vinfo, stmt_info);
5551             }
5552           for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5553             if (!is_gimple_debug (gsi_stmt (si)))
5554               vect_determine_mask_precision
5555                 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5556         }
5557       for (unsigned int i = 0; i < nbbs; i++)
5558         {
5559           basic_block bb = bbs[nbbs - i - 1];
5560           for (gimple_stmt_iterator si = gsi_last_bb (bb);
5561                !gsi_end_p (si); gsi_prev (&si))
5562             if (!is_gimple_debug (gsi_stmt (si)))
5563               vect_determine_stmt_precisions
5564                 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5565           for (auto gsi = gsi_start_phis (bb);
5566                !gsi_end_p (gsi); gsi_next (&gsi))
5567             {
5568               stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5569               if (stmt_info)
5570                 vect_determine_stmt_precisions (vinfo, stmt_info);
5571             }
5572         }
5573     }
5574   else
5575     {
5576       bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5577       for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
5578         {
5579           basic_block bb = bb_vinfo->bbs[i];
5580           for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5581             {
5582               stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5583               if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5584                 vect_determine_mask_precision (vinfo, stmt_info);
5585             }
5586           for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5587             {
5588               stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5589               if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5590                 vect_determine_mask_precision (vinfo, stmt_info);
5591             }
5592         }
5593       for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
5594         {
5595           for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]);
5596                !gsi_end_p (gsi); gsi_prev (&gsi))
5597             {
5598               stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5599               if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5600                 vect_determine_stmt_precisions (vinfo, stmt_info);
5601             }
5602           for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]);
5603                !gsi_end_p (gsi); gsi_next (&gsi))
5604             {
5605               stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5606               if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5607                 vect_determine_stmt_precisions (vinfo, stmt_info);
5608             }
5609         }
5610     }
5611 }
5612
5613 typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
5614
5615 struct vect_recog_func
5616 {
5617   vect_recog_func_ptr fn;
5618   const char *name;
5619 };
5620
5621 /* Note that ordering matters - the first pattern matching on a stmt is
5622    taken which means usually the more complex one needs to preceed the
5623    less comples onex (widen_sum only after dot_prod or sad for example).  */
5624 static vect_recog_func vect_vect_recog_func_ptrs[] = {
5625   { vect_recog_over_widening_pattern, "over_widening" },
5626   /* Must come after over_widening, which narrows the shift as much as
5627      possible beforehand.  */
5628   { vect_recog_average_pattern, "average" },
5629   { vect_recog_cond_expr_convert_pattern, "cond_expr_convert" },
5630   { vect_recog_mulhs_pattern, "mult_high" },
5631   { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
5632   { vect_recog_widen_mult_pattern, "widen_mult" },
5633   { vect_recog_dot_prod_pattern, "dot_prod" },
5634   { vect_recog_sad_pattern, "sad" },
5635   { vect_recog_widen_sum_pattern, "widen_sum" },
5636   { vect_recog_pow_pattern, "pow" },
5637   { vect_recog_popcount_pattern, "popcount" },
5638   { vect_recog_widen_shift_pattern, "widen_shift" },
5639   { vect_recog_rotate_pattern, "rotate" },
5640   { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
5641   { vect_recog_divmod_pattern, "divmod" },
5642   { vect_recog_mult_pattern, "mult" },
5643   { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
5644   { vect_recog_bool_pattern, "bool" },
5645   /* This must come before mask conversion, and includes the parts
5646      of mask conversion that are needed for gather and scatter
5647      internal functions.  */
5648   { vect_recog_gather_scatter_pattern, "gather_scatter" },
5649   { vect_recog_mask_conversion_pattern, "mask_conversion" },
5650   { vect_recog_widen_plus_pattern, "widen_plus" },
5651   { vect_recog_widen_minus_pattern, "widen_minus" },
5652 };
5653
5654 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
5655
5656 /* Mark statements that are involved in a pattern.  */
5657
5658 void
5659 vect_mark_pattern_stmts (vec_info *vinfo,
5660                          stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
5661                          tree pattern_vectype)
5662 {
5663   stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
5664   gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5665
5666   gimple *orig_pattern_stmt = NULL;
5667   if (is_pattern_stmt_p (orig_stmt_info))
5668     {
5669       /* We're replacing a statement in an existing pattern definition
5670          sequence.  */
5671       orig_pattern_stmt = orig_stmt_info->stmt;
5672       if (dump_enabled_p ())
5673         dump_printf_loc (MSG_NOTE, vect_location,
5674                          "replacing earlier pattern %G", orig_pattern_stmt);
5675
5676       /* To keep the book-keeping simple, just swap the lhs of the
5677          old and new statements, so that the old one has a valid but
5678          unused lhs.  */
5679       tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
5680       gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
5681       gimple_set_lhs (pattern_stmt, old_lhs);
5682
5683       if (dump_enabled_p ())
5684         dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
5685
5686       /* Switch to the statement that ORIG replaces.  */
5687       orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
5688
5689       /* We shouldn't be replacing the main pattern statement.  */
5690       gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
5691                   != orig_pattern_stmt);
5692     }
5693
5694   if (def_seq)
5695     for (gimple_stmt_iterator si = gsi_start (def_seq);
5696          !gsi_end_p (si); gsi_next (&si))
5697       {
5698         if (dump_enabled_p ())
5699           dump_printf_loc (MSG_NOTE, vect_location,
5700                            "extra pattern stmt: %G", gsi_stmt (si));
5701         stmt_vec_info pattern_stmt_info
5702           = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
5703                                     orig_stmt_info, pattern_vectype);
5704         /* Stmts in the def sequence are not vectorizable cycle or
5705            induction defs, instead they should all be vect_internal_def
5706            feeding the main pattern stmt which retains this def type.  */
5707         STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
5708       }
5709
5710   if (orig_pattern_stmt)
5711     {
5712       vect_init_pattern_stmt (vinfo, pattern_stmt,
5713                               orig_stmt_info, pattern_vectype);
5714
5715       /* Insert all the new pattern statements before the original one.  */
5716       gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5717       gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
5718                                                orig_def_seq);
5719       gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
5720       gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
5721
5722       /* Remove the pattern statement that this new pattern replaces.  */
5723       gsi_remove (&gsi, false);
5724     }
5725   else
5726     vect_set_pattern_stmt (vinfo,
5727                            pattern_stmt, orig_stmt_info, pattern_vectype);
5728
5729   /* Transfer reduction path info to the pattern.  */
5730   if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
5731     {
5732       gimple_match_op op;
5733       if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op))
5734         gcc_unreachable ();
5735       tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)];
5736       /* Search the pattern def sequence and the main pattern stmt.  Note
5737          we may have inserted all into a containing pattern def sequence
5738          so the following is a bit awkward.  */
5739       gimple_stmt_iterator si;
5740       gimple *s;
5741       if (def_seq)
5742         {
5743           si = gsi_start (def_seq);
5744           s = gsi_stmt (si);
5745           gsi_next (&si);
5746         }
5747       else
5748         {
5749           si = gsi_none ();
5750           s = pattern_stmt;
5751         }
5752       do
5753         {
5754           bool found = false;
5755           if (gimple_extract_op (s, &op))
5756             for (unsigned i = 0; i < op.num_ops; ++i)
5757               if (op.ops[i] == lookfor)
5758                 {
5759                   STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
5760                   lookfor = gimple_get_lhs (s);
5761                   found = true;
5762                   break;
5763                 }
5764           if (s == pattern_stmt)
5765             {
5766               if (!found && dump_enabled_p ())
5767                 dump_printf_loc (MSG_NOTE, vect_location,
5768                                  "failed to update reduction index.\n");
5769               break;
5770             }
5771           if (gsi_end_p (si))
5772             s = pattern_stmt;
5773           else
5774             {
5775               s = gsi_stmt (si);
5776               if (s == pattern_stmt)
5777                 /* Found the end inside a bigger pattern def seq.  */
5778                 si = gsi_none ();
5779               else
5780                 gsi_next (&si);
5781             }
5782         } while (1);
5783     }
5784 }
5785
5786 /* Function vect_pattern_recog_1
5787
5788    Input:
5789    PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
5790         computation pattern.
5791    STMT_INFO: A stmt from which the pattern search should start.
5792
5793    If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
5794    a sequence of statements that has the same functionality and can be
5795    used to replace STMT_INFO.  It returns the last statement in the sequence
5796    and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
5797    PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
5798    statement, having first checked that the target supports the new operation
5799    in that type.
5800
5801    This function also does some bookkeeping, as explained in the documentation
5802    for vect_recog_pattern.  */
5803
5804 static void
5805 vect_pattern_recog_1 (vec_info *vinfo,
5806                       vect_recog_func *recog_func, stmt_vec_info stmt_info)
5807 {
5808   gimple *pattern_stmt;
5809   loop_vec_info loop_vinfo;
5810   tree pattern_vectype;
5811
5812   /* If this statement has already been replaced with pattern statements,
5813      leave the original statement alone, since the first match wins.
5814      Instead try to match against the definition statements that feed
5815      the main pattern statement.  */
5816   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5817     {
5818       gimple_stmt_iterator gsi;
5819       for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5820            !gsi_end_p (gsi); gsi_next (&gsi))
5821         vect_pattern_recog_1 (vinfo, recog_func,
5822                               vinfo->lookup_stmt (gsi_stmt (gsi)));
5823       return;
5824     }
5825
5826   gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5827   pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
5828   if (!pattern_stmt)
5829     {
5830       /* Clear any half-formed pattern definition sequence.  */
5831       STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
5832       return;
5833     }
5834
5835   loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5836
5837   /* Found a vectorizable pattern.  */
5838   if (dump_enabled_p ())
5839     dump_printf_loc (MSG_NOTE, vect_location,
5840                      "%s pattern recognized: %G",
5841                      recog_func->name, pattern_stmt);
5842
5843   /* Mark the stmts that are involved in the pattern. */
5844   vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
5845
5846   /* Patterns cannot be vectorized using SLP, because they change the order of
5847      computation.  */
5848   if (loop_vinfo)
5849     {
5850       unsigned ix, ix2;
5851       stmt_vec_info *elem_ptr;
5852       VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
5853                              elem_ptr, *elem_ptr == stmt_info);
5854     }
5855 }
5856
5857
5858 /* Function vect_pattern_recog
5859
5860    Input:
5861    LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
5862         computation idioms.
5863
5864    Output - for each computation idiom that is detected we create a new stmt
5865         that provides the same functionality and that can be vectorized.  We
5866         also record some information in the struct_stmt_info of the relevant
5867         stmts, as explained below:
5868
5869    At the entry to this function we have the following stmts, with the
5870    following initial value in the STMT_VINFO fields:
5871
5872          stmt                     in_pattern_p  related_stmt    vec_stmt
5873          S1: a_i = ....                 -       -               -
5874          S2: a_2 = ..use(a_i)..         -       -               -
5875          S3: a_1 = ..use(a_2)..         -       -               -
5876          S4: a_0 = ..use(a_1)..         -       -               -
5877          S5: ... = ..use(a_0)..         -       -               -
5878
5879    Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
5880    represented by a single stmt.  We then:
5881    - create a new stmt S6 equivalent to the pattern (the stmt is not
5882      inserted into the code)
5883    - fill in the STMT_VINFO fields as follows:
5884
5885                                   in_pattern_p  related_stmt    vec_stmt
5886          S1: a_i = ....                 -       -               -
5887          S2: a_2 = ..use(a_i)..         -       -               -
5888          S3: a_1 = ..use(a_2)..         -       -               -
5889          S4: a_0 = ..use(a_1)..         true    S6              -
5890           '---> S6: a_new = ....        -       S4              -
5891          S5: ... = ..use(a_0)..         -       -               -
5892
5893    (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
5894    to each other through the RELATED_STMT field).
5895
5896    S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
5897    of S4 because it will replace all its uses.  Stmts {S1,S2,S3} will
5898    remain irrelevant unless used by stmts other than S4.
5899
5900    If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
5901    (because they are marked as irrelevant).  It will vectorize S6, and record
5902    a pointer to the new vector stmt VS6 from S6 (as usual).
5903    S4 will be skipped, and S5 will be vectorized as usual:
5904
5905                                   in_pattern_p  related_stmt    vec_stmt
5906          S1: a_i = ....                 -       -               -
5907          S2: a_2 = ..use(a_i)..         -       -               -
5908          S3: a_1 = ..use(a_2)..         -       -               -
5909        > VS6: va_new = ....             -       -               -
5910          S4: a_0 = ..use(a_1)..         true    S6              VS6
5911           '---> S6: a_new = ....        -       S4              VS6
5912        > VS5: ... = ..vuse(va_new)..    -       -               -
5913          S5: ... = ..use(a_0)..         -       -               -
5914
5915    DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
5916    elsewhere), and we'll end up with:
5917
5918         VS6: va_new = ....
5919         VS5: ... = ..vuse(va_new)..
5920
5921    In case of more than one pattern statements, e.g., widen-mult with
5922    intermediate type:
5923
5924      S1  a_t = ;
5925      S2  a_T = (TYPE) a_t;
5926            '--> S3: a_it = (interm_type) a_t;
5927      S4  prod_T = a_T * CONST;
5928            '--> S5: prod_T' = a_it w* CONST;
5929
5930    there may be other users of a_T outside the pattern.  In that case S2 will
5931    be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
5932    and vectorized.  The vector stmt VS2 will be recorded in S2, and VS3 will
5933    be recorded in S3.  */
5934
5935 void
5936 vect_pattern_recog (vec_info *vinfo)
5937 {
5938   class loop *loop;
5939   basic_block *bbs;
5940   unsigned int nbbs;
5941   gimple_stmt_iterator si;
5942   unsigned int i, j;
5943
5944   vect_determine_precisions (vinfo);
5945
5946   DUMP_VECT_SCOPE ("vect_pattern_recog");
5947
5948   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5949     {
5950       loop = LOOP_VINFO_LOOP (loop_vinfo);
5951       bbs = LOOP_VINFO_BBS (loop_vinfo);
5952       nbbs = loop->num_nodes;
5953
5954       /* Scan through the loop stmts, applying the pattern recognition
5955          functions starting at each stmt visited:  */
5956       for (i = 0; i < nbbs; i++)
5957         {
5958           basic_block bb = bbs[i];
5959           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5960             {
5961               if (is_gimple_debug (gsi_stmt (si)))
5962                 continue;
5963               stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
5964               /* Scan over all generic vect_recog_xxx_pattern functions.  */
5965               for (j = 0; j < NUM_PATTERNS; j++)
5966                 vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j],
5967                                       stmt_info);
5968             }
5969         }
5970     }
5971   else
5972     {
5973       bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5974       for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
5975         for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]);
5976              !gsi_end_p (gsi); gsi_next (&gsi))
5977           {
5978             stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (gsi));
5979             if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
5980               continue;
5981
5982             /* Scan over all generic vect_recog_xxx_pattern functions.  */
5983             for (j = 0; j < NUM_PATTERNS; j++)
5984               vect_pattern_recog_1 (vinfo,
5985                                     &vect_vect_recog_func_ptrs[j], stmt_info);
5986           }
5987     }
5988
5989   /* After this no more add_stmt calls are allowed.  */
5990   vinfo->stmt_vec_info_ro = true;
5991 }