gcc/tree-vect-patterns.cc

   1 /* Analysis Utilities for Loop Vectorization.
   2    Copyright (C) 2006-2022 Free Software Foundation, Inc.
   3    Contributed by Dorit Nuzman <dorit@il.ibm.com>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "backend.h"
  25 #include "rtl.h"
  26 #include "tree.h"
  27 #include "gimple.h"
  28 #include "ssa.h"
  29 #include "expmed.h"
  30 #include "optabs-tree.h"
  31 #include "insn-config.h"
  32 #include "recog.h"              /* FIXME: for insn_data */
  33 #include "fold-const.h"
  34 #include "stor-layout.h"
  35 #include "tree-eh.h"
  36 #include "gimplify.h"
  37 #include "gimple-iterator.h"
  38 #include "cfgloop.h"
  39 #include "tree-vectorizer.h"
  40 #include "dumpfile.h"
  41 #include "builtins.h"
  42 #include "internal-fn.h"
  43 #include "case-cfn-macros.h"
  44 #include "fold-const-call.h"
  45 #include "attribs.h"
  46 #include "cgraph.h"
  47 #include "omp-simd-clone.h"
  48 #include "predict.h"
  49 #include "tree-vector-builder.h"
  50 #include "vec-perm-indices.h"
  51 #include "gimple-range.h"
  52
  53 /* Return true if we have a useful VR_RANGE range for VAR, storing it
  54    in *MIN_VALUE and *MAX_VALUE if so.  Note the range in the dump files.  */
  55
  56 static bool
  57 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
  58 {
  59   value_range vr;
  60   get_range_query (cfun)->range_of_expr (vr, var);
  61   if (vr.undefined_p ())
  62     vr.set_varying (TREE_TYPE (var));
  63   *min_value = wi::to_wide (vr.min ());
  64   *max_value = wi::to_wide (vr.max ());
  65   value_range_kind vr_type = vr.kind ();
  66   wide_int nonzero = get_nonzero_bits (var);
  67   signop sgn = TYPE_SIGN (TREE_TYPE (var));
  68   if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
  69                                          nonzero, sgn) == VR_RANGE)
  70     {
  71       if (dump_enabled_p ())
  72         {
  73           dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
  74           dump_printf (MSG_NOTE, " has range [");
  75           dump_hex (MSG_NOTE, *min_value);
  76           dump_printf (MSG_NOTE, ", ");
  77           dump_hex (MSG_NOTE, *max_value);
  78           dump_printf (MSG_NOTE, "]\n");
  79         }
  80       return true;
  81     }
  82   else
  83     {
  84       if (dump_enabled_p ())
  85         {
  86           dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
  87           dump_printf (MSG_NOTE, " has no range info\n");
  88         }
  89       return false;
  90     }
  91 }
  92
  93 /* Report that we've found an instance of pattern PATTERN in
  94    statement STMT.  */
  95
  96 static void
  97 vect_pattern_detected (const char *name, gimple *stmt)
  98 {
  99   if (dump_enabled_p ())
 100     dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
 101 }
 102
 103 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
 104    return the pattern statement's stmt_vec_info.  Set its vector type to
 105    VECTYPE if it doesn't have one already.  */
 106
 107 static stmt_vec_info
 108 vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
 109                         stmt_vec_info orig_stmt_info, tree vectype)
 110 {
 111   stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
 112   if (pattern_stmt_info == NULL)
 113     pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
 114   gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
 115
 116   pattern_stmt_info->pattern_stmt_p = true;
 117   STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
 118   STMT_VINFO_DEF_TYPE (pattern_stmt_info)
 119     = STMT_VINFO_DEF_TYPE (orig_stmt_info);
 120   if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
 121     {
 122       gcc_assert (!vectype
 123                   || (VECTOR_BOOLEAN_TYPE_P (vectype)
 124                       == vect_use_mask_type_p (orig_stmt_info)));
 125       STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
 126       pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
 127     }
 128   return pattern_stmt_info;
 129 }
 130
 131 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
 132    Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
 133    have one already.  */
 134
 135 static void
 136 vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
 137                        stmt_vec_info orig_stmt_info, tree vectype)
 138 {
 139   STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
 140   STMT_VINFO_RELATED_STMT (orig_stmt_info)
 141     = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
 142 }
 143
 144 /* Add NEW_STMT to STMT_INFO's pattern definition statements.  If VECTYPE
 145    is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
 146    be different from the vector type of the final pattern statement.
 147    If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
 148    from which it was derived.  */
 149
 150 static inline void
 151 append_pattern_def_seq (vec_info *vinfo,
 152                         stmt_vec_info stmt_info, gimple *new_stmt,
 153                         tree vectype = NULL_TREE,
 154                         tree scalar_type_for_mask = NULL_TREE)
 155 {
 156   gcc_assert (!scalar_type_for_mask
 157               == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
 158   if (vectype)
 159     {
 160       stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
 161       STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
 162       if (scalar_type_for_mask)
 163         new_stmt_info->mask_precision
 164           = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
 165     }
 166   gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
 167                                       new_stmt);
 168 }
 169
 170 /* The caller wants to perform new operations on vect_external variable
 171    VAR, so that the result of the operations would also be vect_external.
 172    Return the edge on which the operations can be performed, if one exists.
 173    Return null if the operations should instead be treated as part of
 174    the pattern that needs them.  */
 175
 176 static edge
 177 vect_get_external_def_edge (vec_info *vinfo, tree var)
 178 {
 179   edge e = NULL;
 180   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
 181     {
 182       e = loop_preheader_edge (loop_vinfo->loop);
 183       if (!SSA_NAME_IS_DEFAULT_DEF (var))
 184         {
 185           basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 186           if (bb == NULL
 187               || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
 188             e = NULL;
 189         }
 190     }
 191   return e;
 192 }
 193
 194 /* Return true if the target supports a vector version of CODE,
 195    where CODE is known to map to a direct optab with the given SUBTYPE.
 196    ITYPE specifies the type of (some of) the scalar inputs and OTYPE
 197    specifies the type of the scalar result.
 198
 199    If CODE allows the inputs and outputs to have different type
 200    (such as for WIDEN_SUM_EXPR), it is the input mode rather
 201    than the output mode that determines the appropriate target pattern.
 202    Operand 0 of the target pattern then specifies the mode that the output
 203    must have.
 204
 205    When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
 206    Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
 207    is nonnull.  */
 208
 209 static bool
 210 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
 211                                  tree itype, tree *vecotype_out,
 212                                  tree *vecitype_out = NULL,
 213                                  enum optab_subtype subtype = optab_default)
 214 {
 215   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
 216   if (!vecitype)
 217     return false;
 218
 219   tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
 220   if (!vecotype)
 221     return false;
 222
 223   optab optab = optab_for_tree_code (code, vecitype, subtype);
 224   if (!optab)
 225     return false;
 226
 227   insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
 228   if (icode == CODE_FOR_nothing
 229       || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
 230     return false;
 231
 232   *vecotype_out = vecotype;
 233   if (vecitype_out)
 234     *vecitype_out = vecitype;
 235   return true;
 236 }
 237
 238 /* Round bit precision PRECISION up to a full element.  */
 239
 240 static unsigned int
 241 vect_element_precision (unsigned int precision)
 242 {
 243   precision = 1 << ceil_log2 (precision);
 244   return MAX (precision, BITS_PER_UNIT);
 245 }
 246
 247 /* If OP is defined by a statement that's being considered for vectorization,
 248    return information about that statement, otherwise return NULL.  */
 249
 250 static stmt_vec_info
 251 vect_get_internal_def (vec_info *vinfo, tree op)
 252 {
 253   stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
 254   if (def_stmt_info
 255       && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
 256     return def_stmt_info;
 257   return NULL;
 258 }
 259
 260 /* Check whether NAME, an ssa-name used in STMT_VINFO,
 261    is a result of a type promotion, such that:
 262      DEF_STMT: NAME = NOP (name0)
 263    If CHECK_SIGN is TRUE, check that either both types are signed or both are
 264    unsigned.  */
 265
 266 static bool
 267 type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
 268                    tree *orig_type, gimple **def_stmt, bool *promotion)
 269 {
 270   tree type = TREE_TYPE (name);
 271   tree oprnd0;
 272   enum vect_def_type dt;
 273
 274   stmt_vec_info def_stmt_info;
 275   if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
 276     return false;
 277
 278   if (dt != vect_internal_def
 279       && dt != vect_external_def && dt != vect_constant_def)
 280     return false;
 281
 282   if (!*def_stmt)
 283     return false;
 284
 285   if (!is_gimple_assign (*def_stmt))
 286     return false;
 287
 288   if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
 289     return false;
 290
 291   oprnd0 = gimple_assign_rhs1 (*def_stmt);
 292
 293   *orig_type = TREE_TYPE (oprnd0);
 294   if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
 295       || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
 296     return false;
 297
 298   if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
 299     *promotion = true;
 300   else
 301     *promotion = false;
 302
 303   if (!vect_is_simple_use (oprnd0, vinfo, &dt))
 304     return false;
 305
 306   return true;
 307 }
 308
 309 /* Holds information about an input operand after some sign changes
 310    and type promotions have been peeled away.  */
 311 class vect_unpromoted_value {
 312 public:
 313   vect_unpromoted_value ();
 314
 315   void set_op (tree, vect_def_type, stmt_vec_info = NULL);
 316
 317   /* The value obtained after peeling away zero or more casts.  */
 318   tree op;
 319
 320   /* The type of OP.  */
 321   tree type;
 322
 323   /* The definition type of OP.  */
 324   vect_def_type dt;
 325
 326   /* If OP is the result of peeling at least one cast, and if the cast
 327      of OP itself is a vectorizable statement, CASTER identifies that
 328      statement, otherwise it is null.  */
 329   stmt_vec_info caster;
 330 };
 331
 332 inline vect_unpromoted_value::vect_unpromoted_value ()
 333   : op (NULL_TREE),
 334     type (NULL_TREE),
 335     dt (vect_uninitialized_def),
 336     caster (NULL)
 337 {
 338 }
 339
 340 /* Set the operand to OP_IN, its definition type to DT_IN, and the
 341    statement that casts it to CASTER_IN.  */
 342
 343 inline void
 344 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
 345                                stmt_vec_info caster_in)
 346 {
 347   op = op_in;
 348   type = TREE_TYPE (op);
 349   dt = dt_in;
 350   caster = caster_in;
 351 }
 352
 353 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
 354    to reach some vectorizable inner operand OP', continuing as long as it
 355    is possible to convert OP' back to OP using a possible sign change
 356    followed by a possible promotion P.  Return this OP', or null if OP is
 357    not a vectorizable SSA name.  If there is a promotion P, describe its
 358    input in UNPROM, otherwise describe OP' in UNPROM.  If SINGLE_USE_P
 359    is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
 360    have more than one user.
 361
 362    A successful return means that it is possible to go from OP' to OP
 363    via UNPROM.  The cast from OP' to UNPROM is at most a sign change,
 364    whereas the cast from UNPROM to OP might be a promotion, a sign
 365    change, or a nop.
 366
 367    E.g. say we have:
 368
 369        signed short *ptr = ...;
 370        signed short C = *ptr;
 371        unsigned short B = (unsigned short) C;    // sign change
 372        signed int A = (signed int) B;            // unsigned promotion
 373        ...possible other uses of A...
 374        unsigned int OP = (unsigned int) A;       // sign change
 375
 376    In this case it's possible to go directly from C to OP using:
 377
 378        OP = (unsigned int) (unsigned short) C;
 379             +------------+ +--------------+
 380                promotion      sign change
 381
 382    so OP' would be C.  The input to the promotion is B, so UNPROM
 383    would describe B.  */
 384
 385 static tree
 386 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
 387                                       vect_unpromoted_value *unprom,
 388                                       bool *single_use_p = NULL)
 389 {
 390   tree res = NULL_TREE;
 391   tree op_type = TREE_TYPE (op);
 392   unsigned int orig_precision = TYPE_PRECISION (op_type);
 393   unsigned int min_precision = orig_precision;
 394   stmt_vec_info caster = NULL;
 395   while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
 396     {
 397       /* See whether OP is simple enough to vectorize.  */
 398       stmt_vec_info def_stmt_info;
 399       gimple *def_stmt;
 400       vect_def_type dt;
 401       if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
 402         break;
 403
 404       /* If OP is the input of a demotion, skip over it to see whether
 405          OP is itself the result of a promotion.  If so, the combined
 406          effect of the promotion and the demotion might fit the required
 407          pattern, otherwise neither operation fits.
 408
 409          This copes with cases such as the result of an arithmetic
 410          operation being truncated before being stored, and where that
 411          arithmetic operation has been recognized as an over-widened one.  */
 412       if (TYPE_PRECISION (op_type) <= min_precision)
 413         {
 414           /* Use OP as the UNPROM described above if we haven't yet
 415              found a promotion, or if using the new input preserves the
 416              sign of the previous promotion.  */
 417           if (!res
 418               || TYPE_PRECISION (unprom->type) == orig_precision
 419               || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
 420             {
 421               unprom->set_op (op, dt, caster);
 422               min_precision = TYPE_PRECISION (op_type);
 423             }
 424           /* Stop if we've already seen a promotion and if this
 425              conversion does more than change the sign.  */
 426           else if (TYPE_PRECISION (op_type)
 427                    != TYPE_PRECISION (unprom->type))
 428             break;
 429
 430           /* The sequence now extends to OP.  */
 431           res = op;
 432         }
 433
 434       /* See whether OP is defined by a cast.  Record it as CASTER if
 435          the cast is potentially vectorizable.  */
 436       if (!def_stmt)
 437         break;
 438       caster = def_stmt_info;
 439
 440       /* Ignore pattern statements, since we don't link uses for them.  */
 441       if (caster
 442           && single_use_p
 443           && !STMT_VINFO_RELATED_STMT (caster)
 444           && !has_single_use (res))
 445         *single_use_p = false;
 446
 447       gassign *assign = dyn_cast <gassign *> (def_stmt);
 448       if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
 449         break;
 450
 451       /* Continue with the input to the cast.  */
 452       op = gimple_assign_rhs1 (def_stmt);
 453       op_type = TREE_TYPE (op);
 454     }
 455   return res;
 456 }
 457
 458 /* OP is an integer operand to an operation that returns TYPE, and we
 459    want to treat the operation as a widening one.  So far we can treat
 460    it as widening from *COMMON_TYPE.
 461
 462    Return true if OP is suitable for such a widening operation,
 463    either widening from *COMMON_TYPE or from some supertype of it.
 464    Update *COMMON_TYPE to the supertype in the latter case.
 465
 466    SHIFT_P is true if OP is a shift amount.  */
 467
 468 static bool
 469 vect_joust_widened_integer (tree type, bool shift_p, tree op,
 470                             tree *common_type)
 471 {
 472   /* Calculate the minimum precision required by OP, without changing
 473      the sign of either operand.  */
 474   unsigned int precision;
 475   if (shift_p)
 476     {
 477       if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
 478         return false;
 479       precision = TREE_INT_CST_LOW (op);
 480     }
 481   else
 482     {
 483       precision = wi::min_precision (wi::to_widest (op),
 484                                      TYPE_SIGN (*common_type));
 485       if (precision * 2 > TYPE_PRECISION (type))
 486         return false;
 487     }
 488
 489   /* If OP requires a wider type, switch to that type.  The checks
 490      above ensure that this is still narrower than the result.  */
 491   precision = vect_element_precision (precision);
 492   if (TYPE_PRECISION (*common_type) < precision)
 493     *common_type = build_nonstandard_integer_type
 494       (precision, TYPE_UNSIGNED (*common_type));
 495   return true;
 496 }
 497
 498 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
 499    is narrower than type, storing the supertype in *COMMON_TYPE if so.  */
 500
 501 static bool
 502 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
 503 {
 504   if (types_compatible_p (*common_type, new_type))
 505     return true;
 506
 507   /* See if *COMMON_TYPE can hold all values of NEW_TYPE.  */
 508   if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
 509       && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
 510     return true;
 511
 512   /* See if NEW_TYPE can hold all values of *COMMON_TYPE.  */
 513   if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
 514       && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
 515     {
 516       *common_type = new_type;
 517       return true;
 518     }
 519
 520   /* We have mismatched signs, with the signed type being
 521      no wider than the unsigned type.  In this case we need
 522      a wider signed type.  */
 523   unsigned int precision = MAX (TYPE_PRECISION (*common_type),
 524                                 TYPE_PRECISION (new_type));
 525   precision *= 2;
 526
 527   if (precision * 2 > TYPE_PRECISION (type))
 528     return false;
 529
 530   *common_type = build_nonstandard_integer_type (precision, false);
 531   return true;
 532 }
 533
 534 /* Check whether STMT_INFO can be viewed as a tree of integer operations
 535    in which each node either performs CODE or WIDENED_CODE, and where
 536    each leaf operand is narrower than the result of STMT_INFO.  MAX_NOPS
 537    specifies the maximum number of leaf operands.  SHIFT_P says whether
 538    CODE and WIDENED_CODE are some sort of shift.
 539
 540    If STMT_INFO is such a tree, return the number of leaf operands
 541    and describe them in UNPROM[0] onwards.  Also set *COMMON_TYPE
 542    to a type that (a) is narrower than the result of STMT_INFO and
 543    (b) can hold all leaf operand values.
 544
 545    If SUBTYPE then allow that the signs of the operands
 546    may differ in signs but not in precision.  SUBTYPE is updated to reflect
 547    this.
 548
 549    Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
 550    exists.  */
 551
 552 static unsigned int
 553 vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
 554                       tree_code widened_code, bool shift_p,
 555                       unsigned int max_nops,
 556                       vect_unpromoted_value *unprom, tree *common_type,
 557                       enum optab_subtype *subtype = NULL)
 558 {
 559   /* Check for an integer operation with the right code.  */
 560   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
 561   if (!assign)
 562     return 0;
 563
 564   tree_code rhs_code = gimple_assign_rhs_code (assign);
 565   if (rhs_code != code && rhs_code != widened_code)
 566     return 0;
 567
 568   tree type = TREE_TYPE (gimple_assign_lhs (assign));
 569   if (!INTEGRAL_TYPE_P (type))
 570     return 0;
 571
 572   /* Assume that both operands will be leaf operands.  */
 573   max_nops -= 2;
 574
 575   /* Check the operands.  */
 576   unsigned int next_op = 0;
 577   for (unsigned int i = 0; i < 2; ++i)
 578     {
 579       vect_unpromoted_value *this_unprom = &unprom[next_op];
 580       unsigned int nops = 1;
 581       tree op = gimple_op (assign, i + 1);
 582       if (i == 1 && TREE_CODE (op) == INTEGER_CST)
 583         {
 584           /* We already have a common type from earlier operands.
 585              Update it to account for OP.  */
 586           this_unprom->set_op (op, vect_constant_def);
 587           if (!vect_joust_widened_integer (type, shift_p, op, common_type))
 588             return 0;
 589         }
 590       else
 591         {
 592           /* Only allow shifts by constants.  */
 593           if (shift_p && i == 1)
 594             return 0;
 595
 596           if (!vect_look_through_possible_promotion (vinfo, op, this_unprom))
 597             return 0;
 598
 599           if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
 600             {
 601               /* The operand isn't widened.  If STMT_INFO has the code
 602                  for an unwidened operation, recursively check whether
 603                  this operand is a node of the tree.  */
 604               if (rhs_code != code
 605                   || max_nops == 0
 606                   || this_unprom->dt != vect_internal_def)
 607                 return 0;
 608
 609               /* Give back the leaf slot allocated above now that we're
 610                  not treating this as a leaf operand.  */
 611               max_nops += 1;
 612
 613               /* Recursively process the definition of the operand.  */
 614               stmt_vec_info def_stmt_info
 615                 = vinfo->lookup_def (this_unprom->op);
 616               nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
 617                                            widened_code, shift_p, max_nops,
 618                                            this_unprom, common_type,
 619                                            subtype);
 620               if (nops == 0)
 621                 return 0;
 622
 623               max_nops -= nops;
 624             }
 625           else
 626             {
 627               /* Make sure that the operand is narrower than the result.  */
 628               if (TYPE_PRECISION (this_unprom->type) * 2
 629                   > TYPE_PRECISION (type))
 630                 return 0;
 631
 632               /* Update COMMON_TYPE for the new operand.  */
 633               if (i == 0)
 634                 *common_type = this_unprom->type;
 635               else if (!vect_joust_widened_type (type, this_unprom->type,
 636                                                  common_type))
 637                 {
 638                   if (subtype)
 639                     {
 640                       /* See if we can sign extend the smaller type.  */
 641                       if (TYPE_PRECISION (this_unprom->type)
 642                           > TYPE_PRECISION (*common_type))
 643                         *common_type = this_unprom->type;
 644                       *subtype = optab_vector_mixed_sign;
 645                     }
 646                   else
 647                     return 0;
 648                 }
 649             }
 650         }
 651       next_op += nops;
 652     }
 653   return next_op;
 654 }
 655
 656 /* Helper to return a new temporary for pattern of TYPE for STMT.  If STMT
 657    is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
 658
 659 static tree
 660 vect_recog_temp_ssa_var (tree type, gimple *stmt)
 661 {
 662   return make_temp_ssa_name (type, stmt, "patt");
 663 }
 664
 665 /* STMT2_INFO describes a type conversion that could be split into STMT1
 666    followed by a version of STMT2_INFO that takes NEW_RHS as its first
 667    input.  Try to do this using pattern statements, returning true on
 668    success.  */
 669
 670 static bool
 671 vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
 672                       gimple *stmt1, tree vectype)
 673 {
 674   if (is_pattern_stmt_p (stmt2_info))
 675     {
 676       /* STMT2_INFO is part of a pattern.  Get the statement to which
 677          the pattern is attached.  */
 678       stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
 679       vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
 680
 681       if (dump_enabled_p ())
 682         dump_printf_loc (MSG_NOTE, vect_location,
 683                          "Splitting pattern statement: %G", stmt2_info->stmt);
 684
 685       /* Since STMT2_INFO is a pattern statement, we can change it
 686          in-situ without worrying about changing the code for the
 687          containing block.  */
 688       gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
 689
 690       if (dump_enabled_p ())
 691         {
 692           dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
 693           dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
 694                            stmt2_info->stmt);
 695         }
 696
 697       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
 698       if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
 699         /* STMT2_INFO is the actual pattern statement.  Add STMT1
 700            to the end of the definition sequence.  */
 701         gimple_seq_add_stmt_without_update (def_seq, stmt1);
 702       else
 703         {
 704           /* STMT2_INFO belongs to the definition sequence.  Insert STMT1
 705              before it.  */
 706           gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
 707           gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
 708         }
 709       return true;
 710     }
 711   else
 712     {
 713       /* STMT2_INFO doesn't yet have a pattern.  Try to create a
 714          two-statement pattern now.  */
 715       gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
 716       tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
 717       tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
 718       if (!lhs_vectype)
 719         return false;
 720
 721       if (dump_enabled_p ())
 722         dump_printf_loc (MSG_NOTE, vect_location,
 723                          "Splitting statement: %G", stmt2_info->stmt);
 724
 725       /* Add STMT1 as a singleton pattern definition sequence.  */
 726       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
 727       vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
 728       gimple_seq_add_stmt_without_update (def_seq, stmt1);
 729
 730       /* Build the second of the two pattern statements.  */
 731       tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
 732       gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
 733       vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
 734
 735       if (dump_enabled_p ())
 736         {
 737           dump_printf_loc (MSG_NOTE, vect_location,
 738                            "into pattern statements: %G", stmt1);
 739           dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
 740         }
 741
 742       return true;
 743     }
 744 }
 745
 746 /* Convert UNPROM to TYPE and return the result, adding new statements
 747    to STMT_INFO's pattern definition statements if no better way is
 748    available.  VECTYPE is the vector form of TYPE.
 749
 750    If SUBTYPE then convert the type based on the subtype.  */
 751
 752 static tree
 753 vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
 754                     vect_unpromoted_value *unprom, tree vectype,
 755                     enum optab_subtype subtype = optab_default)
 756 {
 757
 758   /* Update the type if the signs differ.  */
 759   if (subtype == optab_vector_mixed_sign
 760       && TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (unprom->op)))
 761     type = build_nonstandard_integer_type (TYPE_PRECISION (type),
 762                                            TYPE_SIGN (unprom->type));
 763
 764   /* Check for a no-op conversion.  */
 765   if (types_compatible_p (type, TREE_TYPE (unprom->op)))
 766     return unprom->op;
 767
 768   /* Allow the caller to create constant vect_unpromoted_values.  */
 769   if (TREE_CODE (unprom->op) == INTEGER_CST)
 770     return wide_int_to_tree (type, wi::to_widest (unprom->op));
 771
 772   tree input = unprom->op;
 773   if (unprom->caster)
 774     {
 775       tree lhs = gimple_get_lhs (unprom->caster->stmt);
 776       tree lhs_type = TREE_TYPE (lhs);
 777
 778       /* If the result of the existing cast is the right width, use it
 779          instead of the source of the cast.  */
 780       if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
 781         input = lhs;
 782       /* If the precision we want is between the source and result
 783          precisions of the existing cast, try splitting the cast into
 784          two and tapping into a mid-way point.  */
 785       else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
 786                && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
 787         {
 788           /* In order to preserve the semantics of the original cast,
 789              give the mid-way point the same signedness as the input value.
 790
 791              It would be possible to use a signed type here instead if
 792              TYPE is signed and UNPROM->TYPE is unsigned, but that would
 793              make the sign of the midtype sensitive to the order in
 794              which we process the statements, since the signedness of
 795              TYPE is the signedness required by just one of possibly
 796              many users.  Also, unsigned promotions are usually as cheap
 797              as or cheaper than signed ones, so it's better to keep an
 798              unsigned promotion.  */
 799           tree midtype = build_nonstandard_integer_type
 800             (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
 801           tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
 802           if (vec_midtype)
 803             {
 804               input = vect_recog_temp_ssa_var (midtype, NULL);
 805               gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
 806                                                        unprom->op);
 807               if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
 808                                          vec_midtype))
 809                 append_pattern_def_seq (vinfo, stmt_info,
 810                                         new_stmt, vec_midtype);
 811             }
 812         }
 813
 814       /* See if we can reuse an existing result.  */
 815       if (types_compatible_p (type, TREE_TYPE (input)))
 816         return input;
 817     }
 818
 819   /* We need a new conversion statement.  */
 820   tree new_op = vect_recog_temp_ssa_var (type, NULL);
 821   gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
 822
 823   /* If OP is an external value, see if we can insert the new statement
 824      on an incoming edge.  */
 825   if (input == unprom->op && unprom->dt == vect_external_def)
 826     if (edge e = vect_get_external_def_edge (vinfo, input))
 827       {
 828         basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
 829         gcc_assert (!new_bb);
 830         return new_op;
 831       }
 832
 833   /* As a (common) last resort, add the statement to the pattern itself.  */
 834   append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
 835   return new_op;
 836 }
 837
 838 /* Invoke vect_convert_input for N elements of UNPROM and store the
 839    result in the corresponding elements of RESULT.
 840
 841    If SUBTYPE then convert the type based on the subtype.  */
 842
 843 static void
 844 vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
 845                      tree *result, tree type, vect_unpromoted_value *unprom,
 846                      tree vectype, enum optab_subtype subtype = optab_default)
 847 {
 848   for (unsigned int i = 0; i < n; ++i)
 849     {
 850       unsigned int j;
 851       for (j = 0; j < i; ++j)
 852         if (unprom[j].op == unprom[i].op)
 853           break;
 854
 855       if (j < i)
 856         result[i] = result[j];
 857       else
 858         result[i] = vect_convert_input (vinfo, stmt_info,
 859                                         type, &unprom[i], vectype, subtype);
 860     }
 861 }
 862
 863 /* The caller has created a (possibly empty) sequence of pattern definition
 864    statements followed by a single statement PATTERN_STMT.  Cast the result
 865    of this final statement to TYPE.  If a new statement is needed, add
 866    PATTERN_STMT to the end of STMT_INFO's pattern definition statements
 867    and return the new statement, otherwise return PATTERN_STMT as-is.
 868    VECITYPE is the vector form of PATTERN_STMT's result type.  */
 869
 870 static gimple *
 871 vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
 872                      gimple *pattern_stmt, tree vecitype)
 873 {
 874   tree lhs = gimple_get_lhs (pattern_stmt);
 875   if (!types_compatible_p (type, TREE_TYPE (lhs)))
 876     {
 877       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
 878       tree cast_var = vect_recog_temp_ssa_var (type, NULL);
 879       pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
 880     }
 881   return pattern_stmt;
 882 }
 883
 884 /* Return true if STMT_VINFO describes a reduction for which reassociation
 885    is allowed.  If STMT_INFO is part of a group, assume that it's part of
 886    a reduction chain and optimistically assume that all statements
 887    except the last allow reassociation.
 888    Also require it to have code CODE and to be a reduction
 889    in the outermost loop.  When returning true, store the operands in
 890    *OP0_OUT and *OP1_OUT.  */
 891
 892 static bool
 893 vect_reassociating_reduction_p (vec_info *vinfo,
 894                                 stmt_vec_info stmt_info, tree_code code,
 895                                 tree *op0_out, tree *op1_out)
 896 {
 897   loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
 898   if (!loop_info)
 899     return false;
 900
 901   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
 902   if (!assign || gimple_assign_rhs_code (assign) != code)
 903     return false;
 904
 905   /* We don't allow changing the order of the computation in the inner-loop
 906      when doing outer-loop vectorization.  */
 907   class loop *loop = LOOP_VINFO_LOOP (loop_info);
 908   if (loop && nested_in_vect_loop_p (loop, stmt_info))
 909     return false;
 910
 911   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
 912     {
 913       if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
 914                                        code))
 915         return false;
 916     }
 917   else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
 918     return false;
 919
 920   *op0_out = gimple_assign_rhs1 (assign);
 921   *op1_out = gimple_assign_rhs2 (assign);
 922   if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
 923     std::swap (*op0_out, *op1_out);
 924   return true;
 925 }
 926
 927 /* match.pd function to match
 928    (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
 929    with conditions:
 930    1) @1, @2, c, d, a, b are all integral type.
 931    2) There's single_use for both @1 and @2.
 932    3) a, c have same precision.
 933    4) c and @1 have different precision.
 934    5) c, d are the same type or they can differ in sign when convert is
 935    truncation.
 936
 937    record a and c and d and @3.  */
 938
 939 extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree));
 940
 941 /* Function vect_recog_cond_expr_convert
 942
 943    Try to find the following pattern:
 944
 945    TYPE_AB A,B;
 946    TYPE_CD C,D;
 947    TYPE_E E;
 948    TYPE_E op_true = (TYPE_E) A;
 949    TYPE_E op_false = (TYPE_E) B;
 950
 951    E = C cmp D ? op_true : op_false;
 952
 953    where
 954    TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
 955    TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
 956    single_use of op_true and op_false.
 957    TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
 958
 959    Input:
 960
 961    * STMT_VINFO: The stmt from which the pattern search begins.
 962    here it starts with E = c cmp D ? op_true : op_false;
 963
 964    Output:
 965
 966    TYPE1 E' = C cmp D ? A : B;
 967    TYPE3 E = (TYPE3) E';
 968
 969    There may extra nop_convert for A or B to handle different signness.
 970
 971    * TYPE_OUT: The vector type of the output of this pattern.
 972
 973    * Return value: A new stmt that will be used to replace the sequence of
 974    stmts that constitute the pattern. In this case it will be:
 975    E = (TYPE3)E';
 976    E' = C cmp D ? A : B; is recorded in pattern definition statements;  */
 977
 978 static gimple *
 979 vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
 980                                       stmt_vec_info stmt_vinfo, tree *type_out)
 981 {
 982   gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
 983   tree lhs, match[4], temp, type, new_lhs, op2;
 984   gimple *cond_stmt;
 985   gimple *pattern_stmt;
 986
 987   if (!last_stmt)
 988     return NULL;
 989
 990   lhs = gimple_assign_lhs (last_stmt);
 991
 992   /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
 993      TYPE_PRECISION (A) == TYPE_PRECISION (C).  */
 994   if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
 995     return NULL;
 996
 997   vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
 998
 999   op2 = match[2];
1000   type = TREE_TYPE (match[1]);
1001   if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
1002     {
1003       op2 = vect_recog_temp_ssa_var (type, NULL);
1004       gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
1005       append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt,
1006                               get_vectype_for_scalar_type (vinfo, type));
1007     }
1008
1009   temp = vect_recog_temp_ssa_var (type, NULL);
1010   cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
1011                                                  match[1], op2));
1012   append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt,
1013                           get_vectype_for_scalar_type (vinfo, type));
1014   new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
1015   pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp);
1016   *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
1017
1018   if (dump_enabled_p ())
1019     dump_printf_loc (MSG_NOTE, vect_location,
1020                      "created pattern stmt: %G", pattern_stmt);
1021   return pattern_stmt;
1022 }
1023
1024 /* Function vect_recog_dot_prod_pattern
1025
1026    Try to find the following pattern:
1027
1028      type1a x_t
1029      type1b y_t;
1030      TYPE1 prod;
1031      TYPE2 sum = init;
1032    loop:
1033      sum_0 = phi <init, sum_1>
1034      S1  x_t = ...
1035      S2  y_t = ...
1036      S3  x_T = (TYPE1) x_t;
1037      S4  y_T = (TYPE1) y_t;
1038      S5  prod = x_T * y_T;
1039      [S6  prod = (TYPE2) prod;  #optional]
1040      S7  sum_1 = prod + sum_0;
1041
1042    where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
1043    the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
1044    'type1a' and 'type1b' can differ.
1045
1046    Input:
1047
1048    * STMT_VINFO: The stmt from which the pattern search begins.  In the
1049    example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
1050    will be detected.
1051
1052    Output:
1053
1054    * TYPE_OUT: The type of the output  of this pattern.
1055
1056    * Return value: A new stmt that will be used to replace the sequence of
1057    stmts that constitute the pattern. In this case it will be:
1058         WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
1059
1060    Note: The dot-prod idiom is a widening reduction pattern that is
1061          vectorized without preserving all the intermediate results. It
1062          produces only N/2 (widened) results (by summing up pairs of
1063          intermediate results) rather than all N results.  Therefore, we
1064          cannot allow this pattern when we want to get all the results and in
1065          the correct order (as is the case when this computation is in an
1066          inner-loop nested in an outer-loop that us being vectorized).  */
1067
1068 static gimple *
1069 vect_recog_dot_prod_pattern (vec_info *vinfo,
1070                              stmt_vec_info stmt_vinfo, tree *type_out)
1071 {
1072   tree oprnd0, oprnd1;
1073   gimple *last_stmt = stmt_vinfo->stmt;
1074   tree type, half_type;
1075   gimple *pattern_stmt;
1076   tree var;
1077
1078   /* Look for the following pattern
1079           DX = (TYPE1) X;
1080           DY = (TYPE1) Y;
1081           DPROD = DX * DY;
1082           DDPROD = (TYPE2) DPROD;
1083           sum_1 = DDPROD + sum_0;
1084      In which
1085      - DX is double the size of X
1086      - DY is double the size of Y
1087      - DX, DY, DPROD all have the same type but the sign
1088        between X, Y and DPROD can differ.
1089      - sum is the same size of DPROD or bigger
1090      - sum has been recognized as a reduction variable.
1091
1092      This is equivalent to:
1093        DPROD = X w* Y;          #widen mult
1094        sum_1 = DPROD w+ sum_0;  #widen summation
1095      or
1096        DPROD = X w* Y;          #widen mult
1097        sum_1 = DPROD + sum_0;   #summation
1098    */
1099
1100   /* Starting from LAST_STMT, follow the defs of its uses in search
1101      of the above pattern.  */
1102
1103   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1104                                        &oprnd0, &oprnd1))
1105     return NULL;
1106
1107   type = TREE_TYPE (gimple_get_lhs (last_stmt));
1108
1109   vect_unpromoted_value unprom_mult;
1110   oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
1111
1112   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
1113      we know that oprnd1 is the reduction variable (defined by a loop-header
1114      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1115      Left to check that oprnd0 is defined by a (widen_)mult_expr  */
1116   if (!oprnd0)
1117     return NULL;
1118
1119   stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
1120   if (!mult_vinfo)
1121     return NULL;
1122
1123   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
1124      inside the loop (in case we are analyzing an outer-loop).  */
1125   vect_unpromoted_value unprom0[2];
1126   enum optab_subtype subtype = optab_vector;
1127   if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
1128                              false, 2, unprom0, &half_type, &subtype))
1129     return NULL;
1130
1131   /* If there are two widening operations, make sure they agree on the sign
1132      of the extension.  The result of an optab_vector_mixed_sign operation
1133      is signed; otherwise, the result has the same sign as the operands.  */
1134   if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
1135       && (subtype == optab_vector_mixed_sign
1136         ? TYPE_UNSIGNED (unprom_mult.type)
1137         : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
1138     return NULL;
1139
1140   vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
1141
1142   tree half_vectype;
1143   if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1144                                         type_out, &half_vectype, subtype))
1145     return NULL;
1146
1147   /* Get the inputs in the appropriate types.  */
1148   tree mult_oprnd[2];
1149   vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
1150                        unprom0, half_vectype, subtype);
1151
1152   var = vect_recog_temp_ssa_var (type, NULL);
1153   pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1154                                       mult_oprnd[0], mult_oprnd[1], oprnd1);
1155
1156   return pattern_stmt;
1157 }
1158
1159
1160 /* Function vect_recog_sad_pattern
1161
1162    Try to find the following Sum of Absolute Difference (SAD) pattern:
1163
1164      type x_t, y_t;
1165      signed TYPE1 diff, abs_diff;
1166      TYPE2 sum = init;
1167    loop:
1168      sum_0 = phi <init, sum_1>
1169      S1  x_t = ...
1170      S2  y_t = ...
1171      S3  x_T = (TYPE1) x_t;
1172      S4  y_T = (TYPE1) y_t;
1173      S5  diff = x_T - y_T;
1174      S6  abs_diff = ABS_EXPR <diff>;
1175      [S7  abs_diff = (TYPE2) abs_diff;  #optional]
1176      S8  sum_1 = abs_diff + sum_0;
1177
1178    where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1179    same size of 'TYPE1' or bigger. This is a special case of a reduction
1180    computation.
1181
1182    Input:
1183
1184    * STMT_VINFO: The stmt from which the pattern search begins.  In the
1185    example, when this function is called with S8, the pattern
1186    {S3,S4,S5,S6,S7,S8} will be detected.
1187
1188    Output:
1189
1190    * TYPE_OUT: The type of the output of this pattern.
1191
1192    * Return value: A new stmt that will be used to replace the sequence of
1193    stmts that constitute the pattern. In this case it will be:
1194         SAD_EXPR <x_t, y_t, sum_0>
1195   */
1196
1197 static gimple *
1198 vect_recog_sad_pattern (vec_info *vinfo,
1199                         stmt_vec_info stmt_vinfo, tree *type_out)
1200 {
1201   gimple *last_stmt = stmt_vinfo->stmt;
1202   tree half_type;
1203
1204   /* Look for the following pattern
1205           DX = (TYPE1) X;
1206           DY = (TYPE1) Y;
1207           DDIFF = DX - DY;
1208           DAD = ABS_EXPR <DDIFF>;
1209           DDPROD = (TYPE2) DPROD;
1210           sum_1 = DAD + sum_0;
1211      In which
1212      - DX is at least double the size of X
1213      - DY is at least double the size of Y
1214      - DX, DY, DDIFF, DAD all have the same type
1215      - sum is the same size of DAD or bigger
1216      - sum has been recognized as a reduction variable.
1217
1218      This is equivalent to:
1219        DDIFF = X w- Y;          #widen sub
1220        DAD = ABS_EXPR <DDIFF>;
1221        sum_1 = DAD w+ sum_0;    #widen summation
1222      or
1223        DDIFF = X w- Y;          #widen sub
1224        DAD = ABS_EXPR <DDIFF>;
1225        sum_1 = DAD + sum_0;     #summation
1226    */
1227
1228   /* Starting from LAST_STMT, follow the defs of its uses in search
1229      of the above pattern.  */
1230
1231   tree plus_oprnd0, plus_oprnd1;
1232   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1233                                        &plus_oprnd0, &plus_oprnd1))
1234     return NULL;
1235
1236   tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
1237
1238   /* Any non-truncating sequence of conversions is OK here, since
1239      with a successful match, the result of the ABS(U) is known to fit
1240      within the nonnegative range of the result type.  (It cannot be the
1241      negative of the minimum signed value due to the range of the widening
1242      MINUS_EXPR.)  */
1243   vect_unpromoted_value unprom_abs;
1244   plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1245                                                       &unprom_abs);
1246
1247   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
1248      we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1249      phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1250      Then check that plus_oprnd0 is defined by an abs_expr.  */
1251
1252   if (!plus_oprnd0)
1253     return NULL;
1254
1255   stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1256   if (!abs_stmt_vinfo)
1257     return NULL;
1258
1259   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
1260      inside the loop (in case we are analyzing an outer-loop).  */
1261   gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1262   if (!abs_stmt
1263       || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1264           && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1265     return NULL;
1266
1267   tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1268   tree abs_type = TREE_TYPE (abs_oprnd);
1269   if (TYPE_UNSIGNED (abs_type))
1270     return NULL;
1271
1272   /* Peel off conversions from the ABS input.  This can involve sign
1273      changes (e.g. from an unsigned subtraction to a signed ABS input)
1274      or signed promotion, but it can't include unsigned promotion.
1275      (Note that ABS of an unsigned promotion should have been folded
1276      away before now anyway.)  */
1277   vect_unpromoted_value unprom_diff;
1278   abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1279                                                     &unprom_diff);
1280   if (!abs_oprnd)
1281     return NULL;
1282   if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1283       && TYPE_UNSIGNED (unprom_diff.type))
1284     return NULL;
1285
1286   /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
1287   stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1288   if (!diff_stmt_vinfo)
1289     return NULL;
1290
1291   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
1292      inside the loop (in case we are analyzing an outer-loop).  */
1293   vect_unpromoted_value unprom[2];
1294   if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
1295                              false, 2, unprom, &half_type))
1296     return NULL;
1297
1298   vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1299
1300   tree half_vectype;
1301   if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1302                                         type_out, &half_vectype))
1303     return NULL;
1304
1305   /* Get the inputs to the SAD_EXPR in the appropriate types.  */
1306   tree sad_oprnd[2];
1307   vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
1308                        unprom, half_vectype);
1309
1310   tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1311   gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1312                                               sad_oprnd[1], plus_oprnd1);
1313
1314   return pattern_stmt;
1315 }
1316
1317 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1318    so that it can be treated as though it had the form:
1319
1320       A_TYPE a;
1321       B_TYPE b;
1322       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
1323       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
1324     | RES_TYPE a_extend = (RES_TYPE) a_cast;  // promotion from HALF_TYPE
1325     | RES_TYPE b_extend = (RES_TYPE) b_cast;  // promotion from HALF_TYPE
1326     | RES_TYPE res = a_extend ORIG_CODE b_extend;
1327
1328    Try to replace the pattern with:
1329
1330       A_TYPE a;
1331       B_TYPE b;
1332       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
1333       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
1334     | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1335     | RES_TYPE res = (EXT_TYPE) ext;  // possible no-op
1336
1337    where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1338
1339    SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts.  NAME is the
1340    name of the pattern being matched, for dump purposes.  */
1341
1342 static gimple *
1343 vect_recog_widen_op_pattern (vec_info *vinfo,
1344                              stmt_vec_info last_stmt_info, tree *type_out,
1345                              tree_code orig_code, tree_code wide_code,
1346                              bool shift_p, const char *name)
1347 {
1348   gimple *last_stmt = last_stmt_info->stmt;
1349
1350   vect_unpromoted_value unprom[2];
1351   tree half_type;
1352   if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
1353                              shift_p, 2, unprom, &half_type))
1354     return NULL;
1355
1356   /* Pattern detected.  */
1357   vect_pattern_detected (name, last_stmt);
1358
1359   tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
1360   tree itype = type;
1361   if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1362       || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1363     itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1364                                             TYPE_UNSIGNED (half_type));
1365
1366   /* Check target support  */
1367   tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1368   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1369   tree ctype = itype;
1370   tree vecctype = vecitype;
1371   if (orig_code == MINUS_EXPR
1372       && TYPE_UNSIGNED (itype)
1373       && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
1374     {
1375       /* Subtraction is special, even if half_type is unsigned and no matter
1376          whether type is signed or unsigned, if type is wider than itype,
1377          we need to sign-extend from the widening operation result to the
1378          result type.
1379          Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1380          itype unsigned short and type either int or unsigned int.
1381          Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1382          (unsigned short) 0xffff, but for type int we want the result -1
1383          and for type unsigned int 0xffffffff rather than 0xffff.  */
1384       ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
1385       vecctype = get_vectype_for_scalar_type (vinfo, ctype);
1386     }
1387
1388   enum tree_code dummy_code;
1389   int dummy_int;
1390   auto_vec<tree> dummy_vec;
1391   if (!vectype
1392       || !vecitype
1393       || !vecctype
1394       || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1395                                           vecitype, vectype,
1396                                           &dummy_code, &dummy_code,
1397                                           &dummy_int, &dummy_vec))
1398     return NULL;
1399
1400   *type_out = get_vectype_for_scalar_type (vinfo, type);
1401   if (!*type_out)
1402     return NULL;
1403
1404   tree oprnd[2];
1405   vect_convert_inputs (vinfo, last_stmt_info,
1406                        2, oprnd, half_type, unprom, vectype);
1407
1408   tree var = vect_recog_temp_ssa_var (itype, NULL);
1409   gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1410                                               oprnd[0], oprnd[1]);
1411
1412   if (vecctype != vecitype)
1413     pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
1414                                         pattern_stmt, vecitype);
1415
1416   return vect_convert_output (vinfo, last_stmt_info,
1417                               type, pattern_stmt, vecctype);
1418 }
1419
1420 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1421    to WIDEN_MULT_EXPR.  See vect_recog_widen_op_pattern for details.  */
1422
1423 static gimple *
1424 vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1425                                tree *type_out)
1426 {
1427   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1428                                       MULT_EXPR, WIDEN_MULT_EXPR, false,
1429                                       "vect_recog_widen_mult_pattern");
1430 }
1431
1432 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1433    to WIDEN_PLUS_EXPR.  See vect_recog_widen_op_pattern for details.  */
1434
1435 static gimple *
1436 vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1437                                tree *type_out)
1438 {
1439   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1440                                       PLUS_EXPR, WIDEN_PLUS_EXPR, false,
1441                                       "vect_recog_widen_plus_pattern");
1442 }
1443
1444 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1445    to WIDEN_MINUS_EXPR.  See vect_recog_widen_op_pattern for details.  */
1446 static gimple *
1447 vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1448                                tree *type_out)
1449 {
1450   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1451                                       MINUS_EXPR, WIDEN_MINUS_EXPR, false,
1452                                       "vect_recog_widen_minus_pattern");
1453 }
1454
1455 /* Function vect_recog_popcount_pattern
1456
1457    Try to find the following pattern:
1458
1459    UTYPE1 A;
1460    TYPE1 B;
1461    UTYPE2 temp_in;
1462    TYPE3 temp_out;
1463    temp_in = (UTYPE2)A;
1464
1465    temp_out = __builtin_popcount{,l,ll} (temp_in);
1466    B = (TYPE1) temp_out;
1467
1468    TYPE2 may or may not be equal to TYPE3.
1469    i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
1470    i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
1471
1472    Input:
1473
1474    * STMT_VINFO: The stmt from which the pattern search begins.
1475    here it starts with B = (TYPE1) temp_out;
1476
1477    Output:
1478
1479    * TYPE_OUT: The vector type of the output of this pattern.
1480
1481    * Return value: A new stmt that will be used to replace the sequence of
1482    stmts that constitute the pattern. In this case it will be:
1483    B = .POPCOUNT (A);
1484 */
1485
1486 static gimple *
1487 vect_recog_popcount_pattern (vec_info *vinfo,
1488                              stmt_vec_info stmt_vinfo, tree *type_out)
1489 {
1490   gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
1491   gimple *popcount_stmt, *pattern_stmt;
1492   tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
1493   auto_vec<tree> vargs;
1494
1495   /* Find B = (TYPE1) temp_out. */
1496   if (!last_stmt)
1497     return NULL;
1498   tree_code code = gimple_assign_rhs_code (last_stmt);
1499   if (!CONVERT_EXPR_CODE_P (code))
1500     return NULL;
1501
1502   lhs_oprnd = gimple_assign_lhs (last_stmt);
1503   lhs_type = TREE_TYPE (lhs_oprnd);
1504   if (!INTEGRAL_TYPE_P (lhs_type))
1505     return NULL;
1506
1507   rhs_oprnd = gimple_assign_rhs1 (last_stmt);
1508   if (TREE_CODE (rhs_oprnd) != SSA_NAME
1509       || !has_single_use (rhs_oprnd))
1510     return NULL;
1511   popcount_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
1512
1513   /* Find temp_out = __builtin_popcount{,l,ll} (temp_in);  */
1514   if (!is_gimple_call (popcount_stmt))
1515     return NULL;
1516   switch (gimple_call_combined_fn (popcount_stmt))
1517     {
1518     CASE_CFN_POPCOUNT:
1519       break;
1520     default:
1521       return NULL;
1522     }
1523
1524   if (gimple_call_num_args (popcount_stmt) != 1)
1525     return NULL;
1526
1527   rhs_oprnd = gimple_call_arg (popcount_stmt, 0);
1528   vect_unpromoted_value unprom_diff;
1529   rhs_origin = vect_look_through_possible_promotion (vinfo, rhs_oprnd,
1530                                                     &unprom_diff);
1531
1532   if (!rhs_origin)
1533     return NULL;
1534
1535   /* Input and output of .POPCOUNT should be same-precision integer.
1536      Also A should be unsigned or same precision as temp_in,
1537      otherwise there would be sign_extend from A to temp_in.  */
1538   if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type)
1539       || (!TYPE_UNSIGNED (unprom_diff.type)
1540           && (TYPE_PRECISION (unprom_diff.type)
1541               != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))))
1542     return NULL;
1543   vargs.safe_push (unprom_diff.op);
1544
1545   vect_pattern_detected ("vec_regcog_popcount_pattern", popcount_stmt);
1546   vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
1547   /* Do it only if the backend has popcount<vector_mode>2 pattern.  */
1548   if (!vec_type
1549       || !direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
1550                                           OPTIMIZE_FOR_SPEED))
1551     return NULL;
1552
1553   /* Create B = .POPCOUNT (A).  */
1554   new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1555   pattern_stmt = gimple_build_call_internal_vec (IFN_POPCOUNT, vargs);
1556   gimple_call_set_lhs (pattern_stmt, new_var);
1557   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1558   *type_out = vec_type;
1559
1560   if (dump_enabled_p ())
1561     dump_printf_loc (MSG_NOTE, vect_location,
1562                      "created pattern stmt: %G", pattern_stmt);
1563   return pattern_stmt;
1564 }
1565
1566 /* Function vect_recog_pow_pattern
1567
1568    Try to find the following pattern:
1569
1570      x = POW (y, N);
1571
1572    with POW being one of pow, powf, powi, powif and N being
1573    either 2 or 0.5.
1574
1575    Input:
1576
1577    * STMT_VINFO: The stmt from which the pattern search begins.
1578
1579    Output:
1580
1581    * TYPE_OUT: The type of the output of this pattern.
1582
1583    * Return value: A new stmt that will be used to replace the sequence of
1584    stmts that constitute the pattern. In this case it will be:
1585         x = x * x
1586    or
1587         x = sqrt (x)
1588 */
1589
1590 static gimple *
1591 vect_recog_pow_pattern (vec_info *vinfo,
1592                         stmt_vec_info stmt_vinfo, tree *type_out)
1593 {
1594   gimple *last_stmt = stmt_vinfo->stmt;
1595   tree base, exp;
1596   gimple *stmt;
1597   tree var;
1598
1599   if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1600     return NULL;
1601
1602   switch (gimple_call_combined_fn (last_stmt))
1603     {
1604     CASE_CFN_POW:
1605     CASE_CFN_POWI:
1606       break;
1607
1608     default:
1609       return NULL;
1610     }
1611
1612   base = gimple_call_arg (last_stmt, 0);
1613   exp = gimple_call_arg (last_stmt, 1);
1614   if (TREE_CODE (exp) != REAL_CST
1615       && TREE_CODE (exp) != INTEGER_CST)
1616     {
1617       if (flag_unsafe_math_optimizations
1618           && TREE_CODE (base) == REAL_CST
1619           && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
1620         {
1621           combined_fn log_cfn;
1622           built_in_function exp_bfn;
1623           switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1624             {
1625             case BUILT_IN_POW:
1626               log_cfn = CFN_BUILT_IN_LOG;
1627               exp_bfn = BUILT_IN_EXP;
1628               break;
1629             case BUILT_IN_POWF:
1630               log_cfn = CFN_BUILT_IN_LOGF;
1631               exp_bfn = BUILT_IN_EXPF;
1632               break;
1633             case BUILT_IN_POWL:
1634               log_cfn = CFN_BUILT_IN_LOGL;
1635               exp_bfn = BUILT_IN_EXPL;
1636               break;
1637             default:
1638               return NULL;
1639             }
1640           tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1641           tree exp_decl = builtin_decl_implicit (exp_bfn);
1642           /* Optimize pow (C, x) as exp (log (C) * x).  Normally match.pd
1643              does that, but if C is a power of 2, we want to use
1644              exp2 (log2 (C) * x) in the non-vectorized version, but for
1645              vectorization we don't have vectorized exp2.  */
1646           if (logc
1647               && TREE_CODE (logc) == REAL_CST
1648               && exp_decl
1649               && lookup_attribute ("omp declare simd",
1650                                    DECL_ATTRIBUTES (exp_decl)))
1651             {
1652               cgraph_node *node = cgraph_node::get_create (exp_decl);
1653               if (node->simd_clones == NULL)
1654                 {
1655                   if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1656                       || node->definition)
1657                     return NULL;
1658                   expand_simd_clones (node);
1659                   if (node->simd_clones == NULL)
1660                     return NULL;
1661                 }
1662               *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1663               if (!*type_out)
1664                 return NULL;
1665               tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1666               gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1667               append_pattern_def_seq (vinfo, stmt_vinfo, g);
1668               tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1669               g = gimple_build_call (exp_decl, 1, def);
1670               gimple_call_set_lhs (g, res);
1671               return g;
1672             }
1673         }
1674
1675       return NULL;
1676     }
1677
1678   /* We now have a pow or powi builtin function call with a constant
1679      exponent.  */
1680
1681   /* Catch squaring.  */
1682   if ((tree_fits_shwi_p (exp)
1683        && tree_to_shwi (exp) == 2)
1684       || (TREE_CODE (exp) == REAL_CST
1685           && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1686     {
1687       if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
1688                                             TREE_TYPE (base), type_out))
1689         return NULL;
1690
1691       var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1692       stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1693       return stmt;
1694     }
1695
1696   /* Catch square root.  */
1697   if (TREE_CODE (exp) == REAL_CST
1698       && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1699     {
1700       *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1701       if (*type_out
1702           && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1703                                              OPTIMIZE_FOR_SPEED))
1704         {
1705           gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1706           var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1707           gimple_call_set_lhs (stmt, var);
1708           gimple_call_set_nothrow (stmt, true);
1709           return stmt;
1710         }
1711     }
1712
1713   return NULL;
1714 }
1715
1716
1717 /* Function vect_recog_widen_sum_pattern
1718
1719    Try to find the following pattern:
1720
1721      type x_t;
1722      TYPE x_T, sum = init;
1723    loop:
1724      sum_0 = phi <init, sum_1>
1725      S1  x_t = *p;
1726      S2  x_T = (TYPE) x_t;
1727      S3  sum_1 = x_T + sum_0;
1728
1729    where type 'TYPE' is at least double the size of type 'type', i.e - we're
1730    summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1731    a special case of a reduction computation.
1732
1733    Input:
1734
1735    * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1736    when this function is called with S3, the pattern {S2,S3} will be detected.
1737
1738    Output:
1739
1740    * TYPE_OUT: The type of the output of this pattern.
1741
1742    * Return value: A new stmt that will be used to replace the sequence of
1743    stmts that constitute the pattern. In this case it will be:
1744         WIDEN_SUM <x_t, sum_0>
1745
1746    Note: The widening-sum idiom is a widening reduction pattern that is
1747          vectorized without preserving all the intermediate results. It
1748          produces only N/2 (widened) results (by summing up pairs of
1749          intermediate results) rather than all N results.  Therefore, we
1750          cannot allow this pattern when we want to get all the results and in
1751          the correct order (as is the case when this computation is in an
1752          inner-loop nested in an outer-loop that us being vectorized).  */
1753
1754 static gimple *
1755 vect_recog_widen_sum_pattern (vec_info *vinfo,
1756                               stmt_vec_info stmt_vinfo, tree *type_out)
1757 {
1758   gimple *last_stmt = stmt_vinfo->stmt;
1759   tree oprnd0, oprnd1;
1760   tree type;
1761   gimple *pattern_stmt;
1762   tree var;
1763
1764   /* Look for the following pattern
1765           DX = (TYPE) X;
1766           sum_1 = DX + sum_0;
1767      In which DX is at least double the size of X, and sum_1 has been
1768      recognized as a reduction variable.
1769    */
1770
1771   /* Starting from LAST_STMT, follow the defs of its uses in search
1772      of the above pattern.  */
1773
1774   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1775                                        &oprnd0, &oprnd1))
1776     return NULL;
1777
1778   type = TREE_TYPE (gimple_get_lhs (last_stmt));
1779
1780   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
1781      we know that oprnd1 is the reduction variable (defined by a loop-header
1782      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1783      Left to check that oprnd0 is defined by a cast from type 'type' to type
1784      'TYPE'.  */
1785
1786   vect_unpromoted_value unprom0;
1787   if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1788       || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1789     return NULL;
1790
1791   vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1792
1793   if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
1794                                         unprom0.type, type_out))
1795     return NULL;
1796
1797   var = vect_recog_temp_ssa_var (type, NULL);
1798   pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1799
1800   return pattern_stmt;
1801 }
1802
1803 /* Recognize cases in which an operation is performed in one type WTYPE
1804    but could be done more efficiently in a narrower type NTYPE.  For example,
1805    if we have:
1806
1807      ATYPE a;  // narrower than NTYPE
1808      BTYPE b;  // narrower than NTYPE
1809      WTYPE aw = (WTYPE) a;
1810      WTYPE bw = (WTYPE) b;
1811      WTYPE res = aw + bw;  // only uses of aw and bw
1812
1813    then it would be more efficient to do:
1814
1815      NTYPE an = (NTYPE) a;
1816      NTYPE bn = (NTYPE) b;
1817      NTYPE resn = an + bn;
1818      WTYPE res = (WTYPE) resn;
1819
1820    Other situations include things like:
1821
1822      ATYPE a;  // NTYPE or narrower
1823      WTYPE aw = (WTYPE) a;
1824      WTYPE res = aw + b;
1825
1826    when only "(NTYPE) res" is significant.  In that case it's more efficient
1827    to truncate "b" and do the operation on NTYPE instead:
1828
1829      NTYPE an = (NTYPE) a;
1830      NTYPE bn = (NTYPE) b;  // truncation
1831      NTYPE resn = an + bn;
1832      WTYPE res = (WTYPE) resn;
1833
1834    All users of "res" should then use "resn" instead, making the final
1835    statement dead (not marked as relevant).  The final statement is still
1836    needed to maintain the type correctness of the IR.
1837
1838    vect_determine_precisions has already determined the minimum
1839    precison of the operation and the minimum precision required
1840    by users of the result.  */
1841
1842 static gimple *
1843 vect_recog_over_widening_pattern (vec_info *vinfo,
1844                                   stmt_vec_info last_stmt_info, tree *type_out)
1845 {
1846   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1847   if (!last_stmt)
1848     return NULL;
1849
1850   /* See whether we have found that this operation can be done on a
1851      narrower type without changing its semantics.  */
1852   unsigned int new_precision = last_stmt_info->operation_precision;
1853   if (!new_precision)
1854     return NULL;
1855
1856   tree lhs = gimple_assign_lhs (last_stmt);
1857   tree type = TREE_TYPE (lhs);
1858   tree_code code = gimple_assign_rhs_code (last_stmt);
1859
1860   /* Punt for reductions where we don't handle the type conversions.  */
1861   if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
1862     return NULL;
1863
1864   /* Keep the first operand of a COND_EXPR as-is: only the other two
1865      operands are interesting.  */
1866   unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1867
1868   /* Check the operands.  */
1869   unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1870   auto_vec <vect_unpromoted_value, 3> unprom (nops);
1871   unprom.quick_grow (nops);
1872   unsigned int min_precision = 0;
1873   bool single_use_p = false;
1874   for (unsigned int i = 0; i < nops; ++i)
1875     {
1876       tree op = gimple_op (last_stmt, first_op + i);
1877       if (TREE_CODE (op) == INTEGER_CST)
1878         unprom[i].set_op (op, vect_constant_def);
1879       else if (TREE_CODE (op) == SSA_NAME)
1880         {
1881           bool op_single_use_p = true;
1882           if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1883                                                      &op_single_use_p))
1884             return NULL;
1885           /* If:
1886
1887              (1) N bits of the result are needed;
1888              (2) all inputs are widened from M<N bits; and
1889              (3) one operand OP is a single-use SSA name
1890
1891              we can shift the M->N widening from OP to the output
1892              without changing the number or type of extensions involved.
1893              This then reduces the number of copies of STMT_INFO.
1894
1895              If instead of (3) more than one operand is a single-use SSA name,
1896              shifting the extension to the output is even more of a win.
1897
1898              If instead:
1899
1900              (1) N bits of the result are needed;
1901              (2) one operand OP2 is widened from M2<N bits;
1902              (3) another operand OP1 is widened from M1<M2 bits; and
1903              (4) both OP1 and OP2 are single-use
1904
1905              the choice is between:
1906
1907              (a) truncating OP2 to M1, doing the operation on M1,
1908                  and then widening the result to N
1909
1910              (b) widening OP1 to M2, doing the operation on M2, and then
1911                  widening the result to N
1912
1913              Both shift the M2->N widening of the inputs to the output.
1914              (a) additionally shifts the M1->M2 widening to the output;
1915              it requires fewer copies of STMT_INFO but requires an extra
1916              M2->M1 truncation.
1917
1918              Which is better will depend on the complexity and cost of
1919              STMT_INFO, which is hard to predict at this stage.  However,
1920              a clear tie-breaker in favor of (b) is the fact that the
1921              truncation in (a) increases the length of the operation chain.
1922
1923              If instead of (4) only one of OP1 or OP2 is single-use,
1924              (b) is still a win over doing the operation in N bits:
1925              it still shifts the M2->N widening on the single-use operand
1926              to the output and reduces the number of STMT_INFO copies.
1927
1928              If neither operand is single-use then operating on fewer than
1929              N bits might lead to more extensions overall.  Whether it does
1930              or not depends on global information about the vectorization
1931              region, and whether that's a good trade-off would again
1932              depend on the complexity and cost of the statements involved,
1933              as well as things like register pressure that are not normally
1934              modelled at this stage.  We therefore ignore these cases
1935              and just optimize the clear single-use wins above.
1936
1937              Thus we take the maximum precision of the unpromoted operands
1938              and record whether any operand is single-use.  */
1939           if (unprom[i].dt == vect_internal_def)
1940             {
1941               min_precision = MAX (min_precision,
1942                                    TYPE_PRECISION (unprom[i].type));
1943               single_use_p |= op_single_use_p;
1944             }
1945         }
1946       else
1947         return NULL;
1948     }
1949
1950   /* Although the operation could be done in operation_precision, we have
1951      to balance that against introducing extra truncations or extensions.
1952      Calculate the minimum precision that can be handled efficiently.
1953
1954      The loop above determined that the operation could be handled
1955      efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1956      extension from the inputs to the output without introducing more
1957      instructions, and would reduce the number of instructions required
1958      for STMT_INFO itself.
1959
1960      vect_determine_precisions has also determined that the result only
1961      needs min_output_precision bits.  Truncating by a factor of N times
1962      requires a tree of N - 1 instructions, so if TYPE is N times wider
1963      than min_output_precision, doing the operation in TYPE and truncating
1964      the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1965      In contrast:
1966
1967      - truncating the input to a unary operation and doing the operation
1968        in the new type requires at most N - 1 + 1 = N instructions per
1969        output vector
1970
1971      - doing the same for a binary operation requires at most
1972        (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1973
1974      Both unary and binary operations require fewer instructions than
1975      this if the operands were extended from a suitable truncated form.
1976      Thus there is usually nothing to lose by doing operations in
1977      min_output_precision bits, but there can be something to gain.  */
1978   if (!single_use_p)
1979     min_precision = last_stmt_info->min_output_precision;
1980   else
1981     min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1982
1983   /* Apply the minimum efficient precision we just calculated.  */
1984   if (new_precision < min_precision)
1985     new_precision = min_precision;
1986   new_precision = vect_element_precision (new_precision);
1987   if (new_precision >= TYPE_PRECISION (type))
1988     return NULL;
1989
1990   vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1991
1992   *type_out = get_vectype_for_scalar_type (vinfo, type);
1993   if (!*type_out)
1994     return NULL;
1995
1996   /* We've found a viable pattern.  Get the new type of the operation.  */
1997   bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1998   tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1999
2000   /* If we're truncating an operation, we need to make sure that we
2001      don't introduce new undefined overflow.  The codes tested here are
2002      a subset of those accepted by vect_truncatable_operation_p.  */
2003   tree op_type = new_type;
2004   if (TYPE_OVERFLOW_UNDEFINED (new_type)
2005       && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
2006     op_type = build_nonstandard_integer_type (new_precision, true);
2007
2008   /* We specifically don't check here whether the target supports the
2009      new operation, since it might be something that a later pattern
2010      wants to rewrite anyway.  If targets have a minimum element size
2011      for some optabs, we should pattern-match smaller ops to larger ops
2012      where beneficial.  */
2013   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2014   tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
2015   if (!new_vectype || !op_vectype)
2016     return NULL;
2017
2018   if (dump_enabled_p ())
2019     dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
2020                      type, new_type);
2021
2022   /* Calculate the rhs operands for an operation on OP_TYPE.  */
2023   tree ops[3] = {};
2024   for (unsigned int i = 1; i < first_op; ++i)
2025     ops[i - 1] = gimple_op (last_stmt, i);
2026   vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
2027                        op_type, &unprom[0], op_vectype);
2028
2029   /* Use the operation to produce a result of type OP_TYPE.  */
2030   tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
2031   gimple *pattern_stmt = gimple_build_assign (new_var, code,
2032                                               ops[0], ops[1], ops[2]);
2033   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2034
2035   if (dump_enabled_p ())
2036     dump_printf_loc (MSG_NOTE, vect_location,
2037                      "created pattern stmt: %G", pattern_stmt);
2038
2039   /* Convert back to the original signedness, if OP_TYPE is different
2040      from NEW_TYPE.  */
2041   if (op_type != new_type)
2042     pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
2043                                         pattern_stmt, op_vectype);
2044
2045   /* Promote the result to the original type.  */
2046   pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
2047                                       pattern_stmt, new_vectype);
2048
2049   return pattern_stmt;
2050 }
2051
2052 /* Recognize the following patterns:
2053
2054      ATYPE a;  // narrower than TYPE
2055      BTYPE b;  // narrower than TYPE
2056
2057    1) Multiply high with scaling
2058      TYPE res = ((TYPE) a * (TYPE) b) >> c;
2059      Here, c is bitsize (TYPE) / 2 - 1.
2060
2061    2) ... or also with rounding
2062      TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
2063      Here, d is bitsize (TYPE) / 2 - 2.
2064
2065    3) Normal multiply high
2066      TYPE res = ((TYPE) a * (TYPE) b) >> e;
2067      Here, e is bitsize (TYPE) / 2.
2068
2069    where only the bottom half of res is used.  */
2070
2071 static gimple *
2072 vect_recog_mulhs_pattern (vec_info *vinfo,
2073                           stmt_vec_info last_stmt_info, tree *type_out)
2074 {
2075   /* Check for a right shift.  */
2076   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2077   if (!last_stmt
2078       || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
2079     return NULL;
2080
2081   /* Check that the shift result is wider than the users of the
2082      result need (i.e. that narrowing would be a natural choice).  */
2083   tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
2084   unsigned int target_precision
2085     = vect_element_precision (last_stmt_info->min_output_precision);
2086   if (!INTEGRAL_TYPE_P (lhs_type)
2087       || target_precision >= TYPE_PRECISION (lhs_type))
2088     return NULL;
2089
2090   /* Look through any change in sign on the outer shift input.  */
2091   vect_unpromoted_value unprom_rshift_input;
2092   tree rshift_input = vect_look_through_possible_promotion
2093     (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
2094   if (!rshift_input
2095       || TYPE_PRECISION (TREE_TYPE (rshift_input))
2096            != TYPE_PRECISION (lhs_type))
2097     return NULL;
2098
2099   /* Get the definition of the shift input.  */
2100   stmt_vec_info rshift_input_stmt_info
2101     = vect_get_internal_def (vinfo, rshift_input);
2102   if (!rshift_input_stmt_info)
2103     return NULL;
2104   gassign *rshift_input_stmt
2105     = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
2106   if (!rshift_input_stmt)
2107     return NULL;
2108
2109   stmt_vec_info mulh_stmt_info;
2110   tree scale_term;
2111   bool rounding_p = false;
2112
2113   /* Check for the presence of the rounding term.  */
2114   if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
2115     {
2116       /* Check that the outer shift was by 1.  */
2117       if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
2118         return NULL;
2119
2120       /* Check that the second operand of the PLUS_EXPR is 1.  */
2121       if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
2122         return NULL;
2123
2124       /* Look through any change in sign on the addition input.  */
2125       vect_unpromoted_value unprom_plus_input;
2126       tree plus_input = vect_look_through_possible_promotion
2127         (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
2128       if (!plus_input
2129            || TYPE_PRECISION (TREE_TYPE (plus_input))
2130                 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
2131         return NULL;
2132
2133       /* Get the definition of the multiply-high-scale part.  */
2134       stmt_vec_info plus_input_stmt_info
2135         = vect_get_internal_def (vinfo, plus_input);
2136       if (!plus_input_stmt_info)
2137         return NULL;
2138       gassign *plus_input_stmt
2139         = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
2140       if (!plus_input_stmt
2141           || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
2142         return NULL;
2143
2144       /* Look through any change in sign on the scaling input.  */
2145       vect_unpromoted_value unprom_scale_input;
2146       tree scale_input = vect_look_through_possible_promotion
2147         (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
2148       if (!scale_input
2149           || TYPE_PRECISION (TREE_TYPE (scale_input))
2150                != TYPE_PRECISION (TREE_TYPE (plus_input)))
2151         return NULL;
2152
2153       /* Get the definition of the multiply-high part.  */
2154       mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
2155       if (!mulh_stmt_info)
2156         return NULL;
2157
2158       /* Get the scaling term.  */
2159       scale_term = gimple_assign_rhs2 (plus_input_stmt);
2160       rounding_p = true;
2161     }
2162   else
2163     {
2164       mulh_stmt_info = rshift_input_stmt_info;
2165       scale_term = gimple_assign_rhs2 (last_stmt);
2166     }
2167
2168   /* Check that the scaling factor is constant.  */
2169   if (TREE_CODE (scale_term) != INTEGER_CST)
2170     return NULL;
2171
2172   /* Check whether the scaling input term can be seen as two widened
2173      inputs multiplied together.  */
2174   vect_unpromoted_value unprom_mult[2];
2175   tree new_type;
2176   unsigned int nops
2177     = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
2178                             false, 2, unprom_mult, &new_type);
2179   if (nops != 2)
2180     return NULL;
2181
2182   /* Adjust output precision.  */
2183   if (TYPE_PRECISION (new_type) < target_precision)
2184     new_type = build_nonstandard_integer_type
2185       (target_precision, TYPE_UNSIGNED (new_type));
2186
2187   unsigned mult_precision = TYPE_PRECISION (new_type);
2188   internal_fn ifn;
2189   /* Check that the scaling factor is expected.  Instead of
2190      target_precision, we should use the one that we actually
2191      use for internal function.  */
2192   if (rounding_p)
2193     {
2194       /* Check pattern 2).  */
2195       if (wi::to_widest (scale_term) + mult_precision + 2
2196           != TYPE_PRECISION (lhs_type))
2197         return NULL;
2198
2199       ifn = IFN_MULHRS;
2200     }
2201   else
2202     {
2203       /* Check for pattern 1).  */
2204       if (wi::to_widest (scale_term) + mult_precision + 1
2205           == TYPE_PRECISION (lhs_type))
2206         ifn = IFN_MULHS;
2207       /* Check for pattern 3).  */
2208       else if (wi::to_widest (scale_term) + mult_precision
2209                == TYPE_PRECISION (lhs_type))
2210         ifn = IFN_MULH;
2211       else
2212         return NULL;
2213     }
2214
2215   vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
2216
2217   /* Check for target support.  */
2218   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2219   if (!new_vectype
2220       || !direct_internal_fn_supported_p
2221             (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2222     return NULL;
2223
2224   /* The IR requires a valid vector type for the cast result, even though
2225      it's likely to be discarded.  */
2226   *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2227   if (!*type_out)
2228     return NULL;
2229
2230   /* Generate the IFN_MULHRS call.  */
2231   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2232   tree new_ops[2];
2233   vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2234                        unprom_mult, new_vectype);
2235   gcall *mulhrs_stmt
2236     = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
2237   gimple_call_set_lhs (mulhrs_stmt, new_var);
2238   gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
2239
2240   if (dump_enabled_p ())
2241     dump_printf_loc (MSG_NOTE, vect_location,
2242                      "created pattern stmt: %G", mulhrs_stmt);
2243
2244   return vect_convert_output (vinfo, last_stmt_info, lhs_type,
2245                               mulhrs_stmt, new_vectype);
2246 }
2247
2248 /* Recognize the patterns:
2249
2250             ATYPE a;  // narrower than TYPE
2251             BTYPE b;  // narrower than TYPE
2252         (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
2253      or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
2254
2255    where only the bottom half of avg is used.  Try to transform them into:
2256
2257         (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
2258      or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
2259
2260   followed by:
2261
2262             TYPE avg = (TYPE) avg';
2263
2264   where NTYPE is no wider than half of TYPE.  Since only the bottom half
2265   of avg is used, all or part of the cast of avg' should become redundant.
2266
2267   If there is no target support available, generate code to distribute rshift
2268   over plus and add a carry.  */
2269
2270 static gimple *
2271 vect_recog_average_pattern (vec_info *vinfo,
2272                             stmt_vec_info last_stmt_info, tree *type_out)
2273 {
2274   /* Check for a shift right by one bit.  */
2275   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2276   if (!last_stmt
2277       || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
2278       || !integer_onep (gimple_assign_rhs2 (last_stmt)))
2279     return NULL;
2280
2281   /* Check that the shift result is wider than the users of the
2282      result need (i.e. that narrowing would be a natural choice).  */
2283   tree lhs = gimple_assign_lhs (last_stmt);
2284   tree type = TREE_TYPE (lhs);
2285   unsigned int target_precision
2286     = vect_element_precision (last_stmt_info->min_output_precision);
2287   if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
2288     return NULL;
2289
2290   /* Look through any change in sign on the shift input.  */
2291   tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
2292   vect_unpromoted_value unprom_plus;
2293   rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
2294                                                      &unprom_plus);
2295   if (!rshift_rhs
2296       || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
2297     return NULL;
2298
2299   /* Get the definition of the shift input.  */
2300   stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
2301   if (!plus_stmt_info)
2302     return NULL;
2303
2304   /* Check whether the shift input can be seen as a tree of additions on
2305      2 or 3 widened inputs.
2306
2307      Note that the pattern should be a win even if the result of one or
2308      more additions is reused elsewhere: if the pattern matches, we'd be
2309      replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s.  */
2310   internal_fn ifn = IFN_AVG_FLOOR;
2311   vect_unpromoted_value unprom[3];
2312   tree new_type;
2313   unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
2314                                             WIDEN_PLUS_EXPR, false, 3,
2315                                             unprom, &new_type);
2316   if (nops == 0)
2317     return NULL;
2318   if (nops == 3)
2319     {
2320       /* Check that one operand is 1.  */
2321       unsigned int i;
2322       for (i = 0; i < 3; ++i)
2323         if (integer_onep (unprom[i].op))
2324           break;
2325       if (i == 3)
2326         return NULL;
2327       /* Throw away the 1 operand and keep the other two.  */
2328       if (i < 2)
2329         unprom[i] = unprom[2];
2330       ifn = IFN_AVG_CEIL;
2331     }
2332
2333   vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
2334
2335   /* We know that:
2336
2337      (a) the operation can be viewed as:
2338
2339            TYPE widened0 = (TYPE) UNPROM[0];
2340            TYPE widened1 = (TYPE) UNPROM[1];
2341            TYPE tmp1 = widened0 + widened1 {+ 1};
2342            TYPE tmp2 = tmp1 >> 1;   // LAST_STMT_INFO
2343
2344      (b) the first two statements are equivalent to:
2345
2346            TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
2347            TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
2348
2349      (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
2350          where sensible;
2351
2352      (d) all the operations can be performed correctly at twice the width of
2353          NEW_TYPE, due to the nature of the average operation; and
2354
2355      (e) users of the result of the right shift need only TARGET_PRECISION
2356          bits, where TARGET_PRECISION is no more than half of TYPE's
2357          precision.
2358
2359      Under these circumstances, the only situation in which NEW_TYPE
2360      could be narrower than TARGET_PRECISION is if widened0, widened1
2361      and an addition result are all used more than once.  Thus we can
2362      treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
2363      as "free", whereas widening the result of the average instruction
2364      from NEW_TYPE to TARGET_PRECISION would be a new operation.  It's
2365      therefore better not to go narrower than TARGET_PRECISION.  */
2366   if (TYPE_PRECISION (new_type) < target_precision)
2367     new_type = build_nonstandard_integer_type (target_precision,
2368                                                TYPE_UNSIGNED (new_type));
2369
2370   /* Check for target support.  */
2371   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2372   if (!new_vectype)
2373     return NULL;
2374
2375   bool fallback_p = false;
2376
2377   if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2378     ;
2379   else if (TYPE_UNSIGNED (new_type)
2380            && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
2381            && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
2382            && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
2383            && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
2384     fallback_p = true;
2385   else
2386     return NULL;
2387
2388   /* The IR requires a valid vector type for the cast result, even though
2389      it's likely to be discarded.  */
2390   *type_out = get_vectype_for_scalar_type (vinfo, type);
2391   if (!*type_out)
2392     return NULL;
2393
2394   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2395   tree new_ops[2];
2396   vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2397                        unprom, new_vectype);
2398
2399   if (fallback_p)
2400     {
2401       /* As a fallback, generate code for following sequence:
2402
2403          shifted_op0 = new_ops[0] >> 1;
2404          shifted_op1 = new_ops[1] >> 1;
2405          sum_of_shifted = shifted_op0 + shifted_op1;
2406          unmasked_carry = new_ops[0] and/or new_ops[1];
2407          carry = unmasked_carry & 1;
2408          new_var = sum_of_shifted + carry;
2409       */
2410
2411       tree one_cst = build_one_cst (new_type);
2412       gassign *g;
2413
2414       tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
2415       g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
2416       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2417
2418       tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
2419       g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
2420       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2421
2422       tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
2423       g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
2424                                shifted_op0, shifted_op1);
2425       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2426
2427       tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
2428       tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
2429       g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
2430       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2431
2432       tree carry = vect_recog_temp_ssa_var (new_type, NULL);
2433       g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
2434       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2435
2436       g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
2437       return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
2438     }
2439
2440   /* Generate the IFN_AVG* call.  */
2441   gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
2442                                                     new_ops[1]);
2443   gimple_call_set_lhs (average_stmt, new_var);
2444   gimple_set_location (average_stmt, gimple_location (last_stmt));
2445
2446   if (dump_enabled_p ())
2447     dump_printf_loc (MSG_NOTE, vect_location,
2448                      "created pattern stmt: %G", average_stmt);
2449
2450   return vect_convert_output (vinfo, last_stmt_info,
2451                               type, average_stmt, new_vectype);
2452 }
2453
2454 /* Recognize cases in which the input to a cast is wider than its
2455    output, and the input is fed by a widening operation.  Fold this
2456    by removing the unnecessary intermediate widening.  E.g.:
2457
2458      unsigned char a;
2459      unsigned int b = (unsigned int) a;
2460      unsigned short c = (unsigned short) b;
2461
2462    -->
2463
2464      unsigned short c = (unsigned short) a;
2465
2466    Although this is rare in input IR, it is an expected side-effect
2467    of the over-widening pattern above.
2468
2469    This is beneficial also for integer-to-float conversions, if the
2470    widened integer has more bits than the float, and if the unwidened
2471    input doesn't.  */
2472
2473 static gimple *
2474 vect_recog_cast_forwprop_pattern (vec_info *vinfo,
2475                                   stmt_vec_info last_stmt_info, tree *type_out)
2476 {
2477   /* Check for a cast, including an integer-to-float conversion.  */
2478   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2479   if (!last_stmt)
2480     return NULL;
2481   tree_code code = gimple_assign_rhs_code (last_stmt);
2482   if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
2483     return NULL;
2484
2485   /* Make sure that the rhs is a scalar with a natural bitsize.  */
2486   tree lhs = gimple_assign_lhs (last_stmt);
2487   if (!lhs)
2488     return NULL;
2489   tree lhs_type = TREE_TYPE (lhs);
2490   scalar_mode lhs_mode;
2491   if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
2492       || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
2493     return NULL;
2494
2495   /* Check for a narrowing operation (from a vector point of view).  */
2496   tree rhs = gimple_assign_rhs1 (last_stmt);
2497   tree rhs_type = TREE_TYPE (rhs);
2498   if (!INTEGRAL_TYPE_P (rhs_type)
2499       || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
2500       || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
2501     return NULL;
2502
2503   /* Try to find an unpromoted input.  */
2504   vect_unpromoted_value unprom;
2505   if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
2506       || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
2507     return NULL;
2508
2509   /* If the bits above RHS_TYPE matter, make sure that they're the
2510      same when extending from UNPROM as they are when extending from RHS.  */
2511   if (!INTEGRAL_TYPE_P (lhs_type)
2512       && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
2513     return NULL;
2514
2515   /* We can get the same result by casting UNPROM directly, to avoid
2516      the unnecessary widening and narrowing.  */
2517   vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
2518
2519   *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2520   if (!*type_out)
2521     return NULL;
2522
2523   tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2524   gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
2525   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2526
2527   return pattern_stmt;
2528 }
2529
2530 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
2531    to WIDEN_LSHIFT_EXPR.  See vect_recog_widen_op_pattern for details.  */
2532
2533 static gimple *
2534 vect_recog_widen_shift_pattern (vec_info *vinfo,
2535                                 stmt_vec_info last_stmt_info, tree *type_out)
2536 {
2537   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
2538                                       LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
2539                                       "vect_recog_widen_shift_pattern");
2540 }
2541
2542 /* Detect a rotate pattern wouldn't be otherwise vectorized:
2543
2544    type a_t, b_t, c_t;
2545
2546    S0 a_t = b_t r<< c_t;
2547
2548   Input/Output:
2549
2550   * STMT_VINFO: The stmt from which the pattern search begins,
2551     i.e. the shift/rotate stmt.  The original stmt (S0) is replaced
2552     with a sequence:
2553
2554    S1 d_t = -c_t;
2555    S2 e_t = d_t & (B - 1);
2556    S3 f_t = b_t << c_t;
2557    S4 g_t = b_t >> e_t;
2558    S0 a_t = f_t | g_t;
2559
2560     where B is element bitsize of type.
2561
2562   Output:
2563
2564   * TYPE_OUT: The type of the output of this pattern.
2565
2566   * Return value: A new stmt that will be used to replace the rotate
2567     S0 stmt.  */
2568
2569 static gimple *
2570 vect_recog_rotate_pattern (vec_info *vinfo,
2571                            stmt_vec_info stmt_vinfo, tree *type_out)
2572 {
2573   gimple *last_stmt = stmt_vinfo->stmt;
2574   tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
2575   gimple *pattern_stmt, *def_stmt;
2576   enum tree_code rhs_code;
2577   enum vect_def_type dt;
2578   optab optab1, optab2;
2579   edge ext_def = NULL;
2580   bool bswap16_p = false;
2581
2582   if (is_gimple_assign (last_stmt))
2583     {
2584       rhs_code = gimple_assign_rhs_code (last_stmt);
2585       switch (rhs_code)
2586         {
2587         case LROTATE_EXPR:
2588         case RROTATE_EXPR:
2589           break;
2590         default:
2591           return NULL;
2592         }
2593
2594       lhs = gimple_assign_lhs (last_stmt);
2595       oprnd0 = gimple_assign_rhs1 (last_stmt);
2596       type = TREE_TYPE (oprnd0);
2597       oprnd1 = gimple_assign_rhs2 (last_stmt);
2598     }
2599   else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
2600     {
2601       /* __builtin_bswap16 (x) is another form of x r>> 8.
2602          The vectorizer has bswap support, but only if the argument isn't
2603          promoted.  */
2604       lhs = gimple_call_lhs (last_stmt);
2605       oprnd0 = gimple_call_arg (last_stmt, 0);
2606       type = TREE_TYPE (oprnd0);
2607       if (!lhs
2608           || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
2609           || TYPE_PRECISION (type) <= 16
2610           || TREE_CODE (oprnd0) != SSA_NAME
2611           || BITS_PER_UNIT != 8
2612           || !TYPE_UNSIGNED (TREE_TYPE (lhs)))
2613         return NULL;
2614
2615       stmt_vec_info def_stmt_info;
2616       if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
2617         return NULL;
2618
2619       if (dt != vect_internal_def)
2620         return NULL;
2621
2622       if (gimple_assign_cast_p (def_stmt))
2623         {
2624           def = gimple_assign_rhs1 (def_stmt);
2625           if (INTEGRAL_TYPE_P (TREE_TYPE (def))
2626               && TYPE_PRECISION (TREE_TYPE (def)) == 16)
2627             oprnd0 = def;
2628         }
2629
2630       type = TREE_TYPE (lhs);
2631       vectype = get_vectype_for_scalar_type (vinfo, type);
2632       if (vectype == NULL_TREE)
2633         return NULL;
2634
2635       if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
2636         {
2637           /* The encoding uses one stepped pattern for each byte in the
2638              16-bit word.  */
2639           vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
2640           for (unsigned i = 0; i < 3; ++i)
2641             for (unsigned j = 0; j < 2; ++j)
2642               elts.quick_push ((i + 1) * 2 - j - 1);
2643
2644           vec_perm_indices indices (elts, 1,
2645                                     TYPE_VECTOR_SUBPARTS (char_vectype));
2646           if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2647             {
2648               /* vectorizable_bswap can handle the __builtin_bswap16 if we
2649                  undo the argument promotion.  */
2650               if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2651                 {
2652                   def = vect_recog_temp_ssa_var (type, NULL);
2653                   def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2654                   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2655                   oprnd0 = def;
2656                 }
2657
2658               /* Pattern detected.  */
2659               vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2660
2661               *type_out = vectype;
2662
2663               /* Pattern supported.  Create a stmt to be used to replace the
2664                  pattern, with the unpromoted argument.  */
2665               var = vect_recog_temp_ssa_var (type, NULL);
2666               pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
2667                                                 1, oprnd0);
2668               gimple_call_set_lhs (pattern_stmt, var);
2669               gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
2670                                       gimple_call_fntype (last_stmt));
2671               return pattern_stmt;
2672             }
2673         }
2674
2675       oprnd1 = build_int_cst (integer_type_node, 8);
2676       rhs_code = LROTATE_EXPR;
2677       bswap16_p = true;
2678     }
2679   else
2680     return NULL;
2681
2682   if (TREE_CODE (oprnd0) != SSA_NAME
2683       || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
2684       || !INTEGRAL_TYPE_P (type)
2685       || !TYPE_UNSIGNED (type))
2686     return NULL;
2687
2688   stmt_vec_info def_stmt_info;
2689   if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
2690     return NULL;
2691
2692   if (dt != vect_internal_def
2693       && dt != vect_constant_def
2694       && dt != vect_external_def)
2695     return NULL;
2696
2697   vectype = get_vectype_for_scalar_type (vinfo, type);
2698   if (vectype == NULL_TREE)
2699     return NULL;
2700
2701   /* If vector/vector or vector/scalar rotate is supported by the target,
2702      don't do anything here.  */
2703   optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2704   if (optab1
2705       && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2706     {
2707      use_rotate:
2708       if (bswap16_p)
2709         {
2710           if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2711             {
2712               def = vect_recog_temp_ssa_var (type, NULL);
2713               def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2714               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2715               oprnd0 = def;
2716             }
2717
2718           /* Pattern detected.  */
2719           vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2720
2721           *type_out = vectype;
2722
2723           /* Pattern supported.  Create a stmt to be used to replace the
2724              pattern.  */
2725           var = vect_recog_temp_ssa_var (type, NULL);
2726           pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
2727                                               oprnd1);
2728           return pattern_stmt;
2729         }
2730       return NULL;
2731     }
2732
2733   if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
2734     {
2735       optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2736       if (optab2
2737           && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2738         goto use_rotate;
2739     }
2740
2741   /* If vector/vector or vector/scalar shifts aren't supported by the target,
2742      don't do anything here either.  */
2743   optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2744   optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2745   if (!optab1
2746       || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2747       || !optab2
2748       || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2749     {
2750       if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2751         return NULL;
2752       optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2753       optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2754       if (!optab1
2755           || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2756           || !optab2
2757           || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2758         return NULL;
2759     }
2760
2761   *type_out = vectype;
2762
2763   if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2764     {
2765       def = vect_recog_temp_ssa_var (type, NULL);
2766       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2767       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2768       oprnd0 = def;
2769     }
2770
2771   if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
2772     ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2773
2774   def = NULL_TREE;
2775   scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2776   if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2777     def = oprnd1;
2778   else if (def_stmt && gimple_assign_cast_p (def_stmt))
2779     {
2780       tree rhs1 = gimple_assign_rhs1 (def_stmt);
2781       if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2782           && TYPE_PRECISION (TREE_TYPE (rhs1))
2783              == TYPE_PRECISION (type))
2784         def = rhs1;
2785     }
2786
2787   if (def == NULL_TREE)
2788     {
2789       def = vect_recog_temp_ssa_var (type, NULL);
2790       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2791       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2792     }
2793   stype = TREE_TYPE (def);
2794
2795   if (TREE_CODE (def) == INTEGER_CST)
2796     {
2797       if (!tree_fits_uhwi_p (def)
2798           || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2799           || integer_zerop (def))
2800         return NULL;
2801       def2 = build_int_cst (stype,
2802                             GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2803     }
2804   else
2805     {
2806       tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
2807
2808       if (vecstype == NULL_TREE)
2809         return NULL;
2810       def2 = vect_recog_temp_ssa_var (stype, NULL);
2811       def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2812       if (ext_def)
2813         {
2814           basic_block new_bb
2815             = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2816           gcc_assert (!new_bb);
2817         }
2818       else
2819         append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2820
2821       def2 = vect_recog_temp_ssa_var (stype, NULL);
2822       tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
2823       def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2824                                       gimple_assign_lhs (def_stmt), mask);
2825       if (ext_def)
2826         {
2827           basic_block new_bb
2828             = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2829           gcc_assert (!new_bb);
2830         }
2831       else
2832         append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2833     }
2834
2835   var1 = vect_recog_temp_ssa_var (type, NULL);
2836   def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2837                                         ? LSHIFT_EXPR : RSHIFT_EXPR,
2838                                   oprnd0, def);
2839   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2840
2841   var2 = vect_recog_temp_ssa_var (type, NULL);
2842   def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2843                                         ? RSHIFT_EXPR : LSHIFT_EXPR,
2844                                   oprnd0, def2);
2845   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2846
2847   /* Pattern detected.  */
2848   vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2849
2850   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
2851   var = vect_recog_temp_ssa_var (type, NULL);
2852   pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2853
2854   return pattern_stmt;
2855 }
2856
2857 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2858    vectorized:
2859
2860    type a_t;
2861    TYPE b_T, res_T;
2862
2863    S1 a_t = ;
2864    S2 b_T = ;
2865    S3 res_T = b_T op a_t;
2866
2867   where type 'TYPE' is a type with different size than 'type',
2868   and op is <<, >> or rotate.
2869
2870   Also detect cases:
2871
2872    type a_t;
2873    TYPE b_T, c_T, res_T;
2874
2875    S0 c_T = ;
2876    S1 a_t = (type) c_T;
2877    S2 b_T = ;
2878    S3 res_T = b_T op a_t;
2879
2880   Input/Output:
2881
2882   * STMT_VINFO: The stmt from which the pattern search begins,
2883     i.e. the shift/rotate stmt.  The original stmt (S3) is replaced
2884     with a shift/rotate which has same type on both operands, in the
2885     second case just b_T op c_T, in the first case with added cast
2886     from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2887
2888   Output:
2889
2890   * TYPE_OUT: The type of the output of this pattern.
2891
2892   * Return value: A new stmt that will be used to replace the shift/rotate
2893     S3 stmt.  */
2894
2895 static gimple *
2896 vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
2897                                         stmt_vec_info stmt_vinfo,
2898                                         tree *type_out)
2899 {
2900   gimple *last_stmt = stmt_vinfo->stmt;
2901   tree oprnd0, oprnd1, lhs, var;
2902   gimple *pattern_stmt;
2903   enum tree_code rhs_code;
2904
2905   if (!is_gimple_assign (last_stmt))
2906     return NULL;
2907
2908   rhs_code = gimple_assign_rhs_code (last_stmt);
2909   switch (rhs_code)
2910     {
2911     case LSHIFT_EXPR:
2912     case RSHIFT_EXPR:
2913     case LROTATE_EXPR:
2914     case RROTATE_EXPR:
2915       break;
2916     default:
2917       return NULL;
2918     }
2919
2920   lhs = gimple_assign_lhs (last_stmt);
2921   oprnd0 = gimple_assign_rhs1 (last_stmt);
2922   oprnd1 = gimple_assign_rhs2 (last_stmt);
2923   if (TREE_CODE (oprnd0) != SSA_NAME
2924       || TREE_CODE (oprnd1) != SSA_NAME
2925       || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2926       || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2927       || TYPE_PRECISION (TREE_TYPE (lhs))
2928          != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2929     return NULL;
2930
2931   stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2932   if (!def_vinfo)
2933     return NULL;
2934
2935   *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
2936   if (*type_out == NULL_TREE)
2937     return NULL;
2938
2939   tree def = NULL_TREE;
2940   gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2941   if (def_stmt && gimple_assign_cast_p (def_stmt))
2942     {
2943       tree rhs1 = gimple_assign_rhs1 (def_stmt);
2944       if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2945           && TYPE_PRECISION (TREE_TYPE (rhs1))
2946              == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2947         {
2948           if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2949               >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2950             def = rhs1;
2951           else
2952             {
2953               tree mask
2954                 = build_low_bits_mask (TREE_TYPE (rhs1),
2955                                        TYPE_PRECISION (TREE_TYPE (oprnd1)));
2956               def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2957               def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2958               tree vecstype = get_vectype_for_scalar_type (vinfo,
2959                                                            TREE_TYPE (rhs1));
2960               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2961             }
2962         }
2963     }
2964
2965   if (def == NULL_TREE)
2966     {
2967       def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2968       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2969       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2970     }
2971
2972   /* Pattern detected.  */
2973   vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2974
2975   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
2976   var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2977   pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2978
2979   return pattern_stmt;
2980 }
2981
2982 /* Return true iff the target has a vector optab implementing the operation
2983    CODE on type VECTYPE.  */
2984
2985 static bool
2986 target_has_vecop_for_code (tree_code code, tree vectype)
2987 {
2988   optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2989   return voptab
2990          && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2991 }
2992
2993 /* Verify that the target has optabs of VECTYPE to perform all the steps
2994    needed by the multiplication-by-immediate synthesis algorithm described by
2995    ALG and VAR.  If SYNTH_SHIFT_P is true ensure that vector addition is
2996    present.  Return true iff the target supports all the steps.  */
2997
2998 static bool
2999 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
3000                                  tree vectype, bool synth_shift_p)
3001 {
3002   if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
3003     return false;
3004
3005   bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
3006   bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
3007
3008   if (var == negate_variant
3009       && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
3010     return false;
3011
3012   /* If we must synthesize shifts with additions make sure that vector
3013      addition is available.  */
3014   if ((var == add_variant || synth_shift_p) && !supports_vplus)
3015     return false;
3016
3017   for (int i = 1; i < alg->ops; i++)
3018     {
3019       switch (alg->op[i])
3020         {
3021         case alg_shift:
3022           break;
3023         case alg_add_t_m2:
3024         case alg_add_t2_m:
3025         case alg_add_factor:
3026           if (!supports_vplus)
3027             return false;
3028           break;
3029         case alg_sub_t_m2:
3030         case alg_sub_t2_m:
3031         case alg_sub_factor:
3032           if (!supports_vminus)
3033             return false;
3034           break;
3035         case alg_unknown:
3036         case alg_m:
3037         case alg_zero:
3038         case alg_impossible:
3039           return false;
3040         default:
3041           gcc_unreachable ();
3042         }
3043     }
3044
3045   return true;
3046 }
3047
3048 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
3049    putting the final result in DEST.  Append all statements but the last into
3050    VINFO.  Return the last statement.  */
3051
3052 static gimple *
3053 synth_lshift_by_additions (vec_info *vinfo,
3054                            tree dest, tree op, HOST_WIDE_INT amnt,
3055                            stmt_vec_info stmt_info)
3056 {
3057   HOST_WIDE_INT i;
3058   tree itype = TREE_TYPE (op);
3059   tree prev_res = op;
3060   gcc_assert (amnt >= 0);
3061   for (i = 0; i < amnt; i++)
3062     {
3063       tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
3064                       : dest;
3065       gimple *stmt
3066         = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
3067       prev_res = tmp_var;
3068       if (i < amnt - 1)
3069         append_pattern_def_seq (vinfo, stmt_info, stmt);
3070       else
3071         return stmt;
3072     }
3073   gcc_unreachable ();
3074   return NULL;
3075 }
3076
3077 /* Helper for vect_synth_mult_by_constant.  Apply a binary operation
3078    CODE to operands OP1 and OP2, creating a new temporary SSA var in
3079    the process if necessary.  Append the resulting assignment statements
3080    to the sequence in STMT_VINFO.  Return the SSA variable that holds the
3081    result of the binary operation.  If SYNTH_SHIFT_P is true synthesize
3082    left shifts using additions.  */
3083
3084 static tree
3085 apply_binop_and_append_stmt (vec_info *vinfo,
3086                              tree_code code, tree op1, tree op2,
3087                              stmt_vec_info stmt_vinfo, bool synth_shift_p)
3088 {
3089   if (integer_zerop (op2)
3090       && (code == LSHIFT_EXPR
3091           || code == PLUS_EXPR))
3092     {
3093       gcc_assert (TREE_CODE (op1) == SSA_NAME);
3094       return op1;
3095     }
3096
3097   gimple *stmt;
3098   tree itype = TREE_TYPE (op1);
3099   tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
3100
3101   if (code == LSHIFT_EXPR
3102       && synth_shift_p)
3103     {
3104       stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
3105                                         TREE_INT_CST_LOW (op2), stmt_vinfo);
3106       append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3107       return tmp_var;
3108     }
3109
3110   stmt = gimple_build_assign (tmp_var, code, op1, op2);
3111   append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3112   return tmp_var;
3113 }
3114
3115 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
3116    and simple arithmetic operations to be vectorized.  Record the statements
3117    produced in STMT_VINFO and return the last statement in the sequence or
3118    NULL if it's not possible to synthesize such a multiplication.
3119    This function mirrors the behavior of expand_mult_const in expmed.cc but
3120    works on tree-ssa form.  */
3121
3122 static gimple *
3123 vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
3124                              stmt_vec_info stmt_vinfo)
3125 {
3126   tree itype = TREE_TYPE (op);
3127   machine_mode mode = TYPE_MODE (itype);
3128   struct algorithm alg;
3129   mult_variant variant;
3130   if (!tree_fits_shwi_p (val))
3131     return NULL;
3132
3133   /* Multiplication synthesis by shifts, adds and subs can introduce
3134      signed overflow where the original operation didn't.  Perform the
3135      operations on an unsigned type and cast back to avoid this.
3136      In the future we may want to relax this for synthesis algorithms
3137      that we can prove do not cause unexpected overflow.  */
3138   bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
3139
3140   tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
3141   tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
3142   if (!vectype)
3143     return NULL;
3144
3145   /* Targets that don't support vector shifts but support vector additions
3146      can synthesize shifts that way.  */
3147   bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
3148
3149   HOST_WIDE_INT hwval = tree_to_shwi (val);
3150   /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
3151      The vectorizer's benefit analysis will decide whether it's beneficial
3152      to do this.  */
3153   bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
3154                                        ? TYPE_MODE (vectype) : mode,
3155                                        hwval, &alg, &variant, MAX_COST);
3156   if (!possible)
3157     return NULL;
3158
3159   if (!target_supports_mult_synth_alg (&alg, variant, vectype, synth_shift_p))
3160     return NULL;
3161
3162   tree accumulator;
3163
3164   /* Clear out the sequence of statements so we can populate it below.  */
3165   gimple *stmt = NULL;
3166
3167   if (cast_to_unsigned_p)
3168     {
3169       tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
3170       stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
3171       append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3172       op = tmp_op;
3173     }
3174
3175   if (alg.op[0] == alg_zero)
3176     accumulator = build_int_cst (multtype, 0);
3177   else
3178     accumulator = op;
3179
3180   bool needs_fixup = (variant == negate_variant)
3181                       || (variant == add_variant);
3182
3183   for (int i = 1; i < alg.ops; i++)
3184     {
3185       tree shft_log = build_int_cst (multtype, alg.log[i]);
3186       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3187       tree tmp_var = NULL_TREE;
3188
3189       switch (alg.op[i])
3190         {
3191         case alg_shift:
3192           if (synth_shift_p)
3193             stmt
3194               = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
3195                                            alg.log[i], stmt_vinfo);
3196           else
3197             stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
3198                                          shft_log);
3199           break;
3200         case alg_add_t_m2:
3201           tmp_var
3202             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
3203                                            stmt_vinfo, synth_shift_p);
3204           stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
3205                                        tmp_var);
3206           break;
3207         case alg_sub_t_m2:
3208           tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
3209                                                  shft_log, stmt_vinfo,
3210                                                  synth_shift_p);
3211           /* In some algorithms the first step involves zeroing the
3212              accumulator.  If subtracting from such an accumulator
3213              just emit the negation directly.  */
3214           if (integer_zerop (accumulator))
3215             stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
3216           else
3217             stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
3218                                         tmp_var);
3219           break;
3220         case alg_add_t2_m:
3221           tmp_var
3222             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3223                                            shft_log, stmt_vinfo, synth_shift_p);
3224           stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
3225           break;
3226         case alg_sub_t2_m:
3227           tmp_var
3228             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3229                                            shft_log, stmt_vinfo, synth_shift_p);
3230           stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
3231           break;
3232         case alg_add_factor:
3233           tmp_var
3234             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3235                                            shft_log, stmt_vinfo, synth_shift_p);
3236           stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
3237                                        tmp_var);
3238           break;
3239         case alg_sub_factor:
3240           tmp_var
3241             = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3242                                            shft_log, stmt_vinfo, synth_shift_p);
3243           stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
3244                                       accumulator);
3245           break;
3246         default:
3247           gcc_unreachable ();
3248         }
3249       /* We don't want to append the last stmt in the sequence to stmt_vinfo
3250          but rather return it directly.  */
3251
3252       if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
3253         append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3254       accumulator = accum_tmp;
3255     }
3256   if (variant == negate_variant)
3257     {
3258       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3259       stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
3260       accumulator = accum_tmp;
3261       if (cast_to_unsigned_p)
3262         append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3263     }
3264   else if (variant == add_variant)
3265     {
3266       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3267       stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
3268       accumulator = accum_tmp;
3269       if (cast_to_unsigned_p)
3270         append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3271     }
3272   /* Move back to a signed if needed.  */
3273   if (cast_to_unsigned_p)
3274     {
3275       tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
3276       stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
3277     }
3278
3279   return stmt;
3280 }
3281
3282 /* Detect multiplication by constant and convert it into a sequence of
3283    shifts and additions, subtractions, negations.  We reuse the
3284    choose_mult_variant algorithms from expmed.cc
3285
3286    Input/Output:
3287
3288    STMT_VINFO: The stmt from which the pattern search begins,
3289    i.e. the mult stmt.
3290
3291  Output:
3292
3293   * TYPE_OUT: The type of the output of this pattern.
3294
3295   * Return value: A new stmt that will be used to replace
3296     the multiplication.  */
3297
3298 static gimple *
3299 vect_recog_mult_pattern (vec_info *vinfo,
3300                          stmt_vec_info stmt_vinfo, tree *type_out)
3301 {
3302   gimple *last_stmt = stmt_vinfo->stmt;
3303   tree oprnd0, oprnd1, vectype, itype;
3304   gimple *pattern_stmt;
3305
3306   if (!is_gimple_assign (last_stmt))
3307     return NULL;
3308
3309   if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
3310     return NULL;
3311
3312   oprnd0 = gimple_assign_rhs1 (last_stmt);
3313   oprnd1 = gimple_assign_rhs2 (last_stmt);
3314   itype = TREE_TYPE (oprnd0);
3315
3316   if (TREE_CODE (oprnd0) != SSA_NAME
3317       || TREE_CODE (oprnd1) != INTEGER_CST
3318       || !INTEGRAL_TYPE_P (itype)
3319       || !type_has_mode_precision_p (itype))
3320     return NULL;
3321
3322   vectype = get_vectype_for_scalar_type (vinfo, itype);
3323   if (vectype == NULL_TREE)
3324     return NULL;
3325
3326   /* If the target can handle vectorized multiplication natively,
3327      don't attempt to optimize this.  */
3328   optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
3329   if (mul_optab != unknown_optab)
3330     {
3331       machine_mode vec_mode = TYPE_MODE (vectype);
3332       int icode = (int) optab_handler (mul_optab, vec_mode);
3333       if (icode != CODE_FOR_nothing)
3334        return NULL;
3335     }
3336
3337   pattern_stmt = vect_synth_mult_by_constant (vinfo,
3338                                               oprnd0, oprnd1, stmt_vinfo);
3339   if (!pattern_stmt)
3340     return NULL;
3341
3342   /* Pattern detected.  */
3343   vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
3344
3345   *type_out = vectype;
3346
3347   return pattern_stmt;
3348 }
3349
3350 /* Detect a signed division by a constant that wouldn't be
3351    otherwise vectorized:
3352
3353    type a_t, b_t;
3354
3355    S1 a_t = b_t / N;
3356
3357   where type 'type' is an integral type and N is a constant.
3358
3359   Similarly handle modulo by a constant:
3360
3361    S4 a_t = b_t % N;
3362
3363   Input/Output:
3364
3365   * STMT_VINFO: The stmt from which the pattern search begins,
3366     i.e. the division stmt.  S1 is replaced by if N is a power
3367     of two constant and type is signed:
3368   S3  y_t = b_t < 0 ? N - 1 : 0;
3369   S2  x_t = b_t + y_t;
3370   S1' a_t = x_t >> log2 (N);
3371
3372     S4 is replaced if N is a power of two constant and
3373     type is signed by (where *_T temporaries have unsigned type):
3374   S9  y_T = b_t < 0 ? -1U : 0U;
3375   S8  z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
3376   S7  z_t = (type) z_T;
3377   S6  w_t = b_t + z_t;
3378   S5  x_t = w_t & (N - 1);
3379   S4' a_t = x_t - z_t;
3380
3381   Output:
3382
3383   * TYPE_OUT: The type of the output of this pattern.
3384
3385   * Return value: A new stmt that will be used to replace the division
3386     S1 or modulo S4 stmt.  */
3387
3388 static gimple *
3389 vect_recog_divmod_pattern (vec_info *vinfo,
3390                            stmt_vec_info stmt_vinfo, tree *type_out)
3391 {
3392   gimple *last_stmt = stmt_vinfo->stmt;
3393   tree oprnd0, oprnd1, vectype, itype, cond;
3394   gimple *pattern_stmt, *def_stmt;
3395   enum tree_code rhs_code;
3396   optab optab;
3397   tree q;
3398   int dummy_int, prec;
3399
3400   if (!is_gimple_assign (last_stmt))
3401     return NULL;
3402
3403   rhs_code = gimple_assign_rhs_code (last_stmt);
3404   switch (rhs_code)
3405     {
3406     case TRUNC_DIV_EXPR:
3407     case EXACT_DIV_EXPR:
3408     case TRUNC_MOD_EXPR:
3409       break;
3410     default:
3411       return NULL;
3412     }
3413
3414   oprnd0 = gimple_assign_rhs1 (last_stmt);
3415   oprnd1 = gimple_assign_rhs2 (last_stmt);
3416   itype = TREE_TYPE (oprnd0);
3417   if (TREE_CODE (oprnd0) != SSA_NAME
3418       || TREE_CODE (oprnd1) != INTEGER_CST
3419       || TREE_CODE (itype) != INTEGER_TYPE
3420       || !type_has_mode_precision_p (itype))
3421     return NULL;
3422
3423   scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
3424   vectype = get_vectype_for_scalar_type (vinfo, itype);
3425   if (vectype == NULL_TREE)
3426     return NULL;
3427
3428   if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
3429     {
3430       /* If the target can handle vectorized division or modulo natively,
3431          don't attempt to optimize this, since native division is likely
3432          to give smaller code.  */
3433       optab = optab_for_tree_code (rhs_code, vectype, optab_default);
3434       if (optab != unknown_optab)
3435         {
3436           machine_mode vec_mode = TYPE_MODE (vectype);
3437           int icode = (int) optab_handler (optab, vec_mode);
3438           if (icode != CODE_FOR_nothing)
3439             return NULL;
3440         }
3441     }
3442
3443   prec = TYPE_PRECISION (itype);
3444   if (integer_pow2p (oprnd1))
3445     {
3446       if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
3447         return NULL;
3448
3449       /* Pattern detected.  */
3450       vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3451
3452       *type_out = vectype;
3453
3454       /* Check if the target supports this internal function.  */
3455       internal_fn ifn = IFN_DIV_POW2;
3456       if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
3457         {
3458           tree shift = build_int_cst (itype, tree_log2 (oprnd1));
3459
3460           tree var_div = vect_recog_temp_ssa_var (itype, NULL);
3461           gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
3462           gimple_call_set_lhs (div_stmt, var_div);
3463
3464           if (rhs_code == TRUNC_MOD_EXPR)
3465             {
3466               append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
3467               def_stmt
3468                 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3469                                        LSHIFT_EXPR, var_div, shift);
3470               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3471               pattern_stmt
3472                 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3473                                        MINUS_EXPR, oprnd0,
3474                                        gimple_assign_lhs (def_stmt));
3475             }
3476           else
3477             pattern_stmt = div_stmt;
3478           gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3479
3480           return pattern_stmt;
3481         }
3482
3483       cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
3484                      build_int_cst (itype, 0));
3485       if (rhs_code == TRUNC_DIV_EXPR
3486           || rhs_code == EXACT_DIV_EXPR)
3487         {
3488           tree var = vect_recog_temp_ssa_var (itype, NULL);
3489           tree shift;
3490           def_stmt
3491             = gimple_build_assign (var, COND_EXPR, cond,
3492                                    fold_build2 (MINUS_EXPR, itype, oprnd1,
3493                                                 build_int_cst (itype, 1)),
3494                                    build_int_cst (itype, 0));
3495           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3496           var = vect_recog_temp_ssa_var (itype, NULL);
3497           def_stmt
3498             = gimple_build_assign (var, PLUS_EXPR, oprnd0,
3499                                    gimple_assign_lhs (def_stmt));
3500           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3501
3502           shift = build_int_cst (itype, tree_log2 (oprnd1));
3503           pattern_stmt
3504             = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3505                                    RSHIFT_EXPR, var, shift);
3506         }
3507       else
3508         {
3509           tree signmask;
3510           if (compare_tree_int (oprnd1, 2) == 0)
3511             {
3512               signmask = vect_recog_temp_ssa_var (itype, NULL);
3513               def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
3514                                               build_int_cst (itype, 1),
3515                                               build_int_cst (itype, 0));
3516               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3517             }
3518           else
3519             {
3520               tree utype
3521                 = build_nonstandard_integer_type (prec, 1);
3522               tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
3523               tree shift
3524                 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
3525                                         - tree_log2 (oprnd1));
3526               tree var = vect_recog_temp_ssa_var (utype, NULL);
3527
3528               def_stmt = gimple_build_assign (var, COND_EXPR, cond,
3529                                               build_int_cst (utype, -1),
3530                                               build_int_cst (utype, 0));
3531               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3532               var = vect_recog_temp_ssa_var (utype, NULL);
3533               def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
3534                                               gimple_assign_lhs (def_stmt),
3535                                               shift);
3536               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3537               signmask = vect_recog_temp_ssa_var (itype, NULL);
3538               def_stmt
3539                 = gimple_build_assign (signmask, NOP_EXPR, var);
3540               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3541             }
3542           def_stmt
3543             = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3544                                    PLUS_EXPR, oprnd0, signmask);
3545           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3546           def_stmt
3547             = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3548                                    BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
3549                                    fold_build2 (MINUS_EXPR, itype, oprnd1,
3550                                                 build_int_cst (itype, 1)));
3551           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3552
3553           pattern_stmt
3554             = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3555                                    MINUS_EXPR, gimple_assign_lhs (def_stmt),
3556                                    signmask);
3557         }
3558
3559       return pattern_stmt;
3560     }
3561
3562   if (prec > HOST_BITS_PER_WIDE_INT
3563       || integer_zerop (oprnd1))
3564     return NULL;
3565
3566   if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
3567     return NULL;
3568
3569   if (TYPE_UNSIGNED (itype))
3570     {
3571       unsigned HOST_WIDE_INT mh, ml;
3572       int pre_shift, post_shift;
3573       unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
3574                                   & GET_MODE_MASK (itype_mode));
3575       tree t1, t2, t3, t4;
3576
3577       if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
3578         /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0.  */
3579         return NULL;
3580
3581       /* Find a suitable multiplier and right shift count
3582          instead of multiplying with D.  */
3583       mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
3584
3585       /* If the suggested multiplier is more than SIZE bits, we can do better
3586          for even divisors, using an initial right shift.  */
3587       if (mh != 0 && (d & 1) == 0)
3588         {
3589           pre_shift = ctz_or_zero (d);
3590           mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
3591                                   &ml, &post_shift, &dummy_int);
3592           gcc_assert (!mh);
3593         }
3594       else
3595         pre_shift = 0;
3596
3597       if (mh != 0)
3598         {
3599           if (post_shift - 1 >= prec)
3600             return NULL;
3601
3602           /* t1 = oprnd0 h* ml;
3603              t2 = oprnd0 - t1;
3604              t3 = t2 >> 1;
3605              t4 = t1 + t3;
3606              q = t4 >> (post_shift - 1);  */
3607           t1 = vect_recog_temp_ssa_var (itype, NULL);
3608           def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3609                                           build_int_cst (itype, ml));
3610           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3611
3612           t2 = vect_recog_temp_ssa_var (itype, NULL);
3613           def_stmt
3614             = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
3615           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3616
3617           t3 = vect_recog_temp_ssa_var (itype, NULL);
3618           def_stmt
3619             = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
3620           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3621
3622           t4 = vect_recog_temp_ssa_var (itype, NULL);
3623           def_stmt
3624             = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
3625
3626           if (post_shift != 1)
3627             {
3628               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3629
3630               q = vect_recog_temp_ssa_var (itype, NULL);
3631               pattern_stmt
3632                 = gimple_build_assign (q, RSHIFT_EXPR, t4,
3633                                        build_int_cst (itype, post_shift - 1));
3634             }
3635           else
3636             {
3637               q = t4;
3638               pattern_stmt = def_stmt;
3639             }
3640         }
3641       else
3642         {
3643           if (pre_shift >= prec || post_shift >= prec)
3644             return NULL;
3645
3646           /* t1 = oprnd0 >> pre_shift;
3647              t2 = t1 h* ml;
3648              q = t2 >> post_shift;  */
3649           if (pre_shift)
3650             {
3651               t1 = vect_recog_temp_ssa_var (itype, NULL);
3652               def_stmt
3653                 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
3654                                        build_int_cst (NULL, pre_shift));
3655               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3656             }
3657           else
3658             t1 = oprnd0;
3659
3660           t2 = vect_recog_temp_ssa_var (itype, NULL);
3661           def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
3662                                           build_int_cst (itype, ml));
3663
3664           if (post_shift)
3665             {
3666               append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3667
3668               q = vect_recog_temp_ssa_var (itype, NULL);
3669               def_stmt
3670                 = gimple_build_assign (q, RSHIFT_EXPR, t2,
3671                                        build_int_cst (itype, post_shift));
3672             }
3673           else
3674             q = t2;
3675
3676           pattern_stmt = def_stmt;
3677         }
3678     }
3679   else
3680     {
3681       unsigned HOST_WIDE_INT ml;
3682       int post_shift;
3683       HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
3684       unsigned HOST_WIDE_INT abs_d;
3685       bool add = false;
3686       tree t1, t2, t3, t4;
3687
3688       /* Give up for -1.  */
3689       if (d == -1)
3690         return NULL;
3691
3692       /* Since d might be INT_MIN, we have to cast to
3693          unsigned HOST_WIDE_INT before negating to avoid
3694          undefined signed overflow.  */
3695       abs_d = (d >= 0
3696                ? (unsigned HOST_WIDE_INT) d
3697                : - (unsigned HOST_WIDE_INT) d);
3698
3699       /* n rem d = n rem -d */
3700       if (rhs_code == TRUNC_MOD_EXPR && d < 0)
3701         {
3702           d = abs_d;
3703           oprnd1 = build_int_cst (itype, abs_d);
3704         }
3705       if (HOST_BITS_PER_WIDE_INT >= prec
3706           && abs_d == HOST_WIDE_INT_1U << (prec - 1))
3707         /* This case is not handled correctly below.  */
3708         return NULL;
3709
3710       choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
3711       if (ml >= HOST_WIDE_INT_1U << (prec - 1))
3712         {
3713           add = true;
3714           ml |= HOST_WIDE_INT_M1U << (prec - 1);
3715         }
3716       if (post_shift >= prec)
3717         return NULL;
3718
3719       /* t1 = oprnd0 h* ml;  */
3720       t1 = vect_recog_temp_ssa_var (itype, NULL);
3721       def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3722                                       build_int_cst (itype, ml));
3723
3724       if (add)
3725         {
3726           /* t2 = t1 + oprnd0;  */
3727           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3728           t2 = vect_recog_temp_ssa_var (itype, NULL);
3729           def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
3730         }
3731       else
3732         t2 = t1;
3733
3734       if (post_shift)
3735         {
3736           /* t3 = t2 >> post_shift;  */
3737           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3738           t3 = vect_recog_temp_ssa_var (itype, NULL);
3739           def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
3740                                           build_int_cst (itype, post_shift));
3741         }
3742       else
3743         t3 = t2;
3744
3745       int msb = 1;
3746       value_range r;
3747       get_range_query (cfun)->range_of_expr (r, oprnd0);
3748       if (r.kind () == VR_RANGE)
3749         {
3750           if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
3751             msb = 0;
3752           else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
3753             msb = -1;
3754         }
3755
3756       if (msb == 0 && d >= 0)
3757         {
3758           /* q = t3;  */
3759           q = t3;
3760           pattern_stmt = def_stmt;
3761         }
3762       else
3763         {
3764           /* t4 = oprnd0 >> (prec - 1);
3765              or if we know from VRP that oprnd0 >= 0
3766              t4 = 0;
3767              or if we know from VRP that oprnd0 < 0
3768              t4 = -1;  */
3769           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3770           t4 = vect_recog_temp_ssa_var (itype, NULL);
3771           if (msb != 1)
3772             def_stmt = gimple_build_assign (t4, INTEGER_CST,
3773                                             build_int_cst (itype, msb));
3774           else
3775             def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3776                                             build_int_cst (itype, prec - 1));
3777           append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3778
3779           /* q = t3 - t4;  or q = t4 - t3;  */
3780           q = vect_recog_temp_ssa_var (itype, NULL);
3781           pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3782                                               d < 0 ? t3 : t4);
3783         }
3784     }
3785
3786   if (rhs_code == TRUNC_MOD_EXPR)
3787     {
3788       tree r, t1;
3789
3790       /* We divided.  Now finish by:
3791          t1 = q * oprnd1;
3792          r = oprnd0 - t1;  */
3793       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
3794
3795       t1 = vect_recog_temp_ssa_var (itype, NULL);
3796       def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3797       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3798
3799       r = vect_recog_temp_ssa_var (itype, NULL);
3800       pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3801     }
3802
3803   /* Pattern detected.  */
3804   vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3805
3806   *type_out = vectype;
3807   return pattern_stmt;
3808 }
3809
3810 /* Function vect_recog_mixed_size_cond_pattern
3811
3812    Try to find the following pattern:
3813
3814      type x_t, y_t;
3815      TYPE a_T, b_T, c_T;
3816    loop:
3817      S1  a_T = x_t CMP y_t ? b_T : c_T;
3818
3819    where type 'TYPE' is an integral type which has different size
3820    from 'type'.  b_T and c_T are either constants (and if 'TYPE' is wider
3821    than 'type', the constants need to fit into an integer type
3822    with the same width as 'type') or results of conversion from 'type'.
3823
3824    Input:
3825
3826    * STMT_VINFO: The stmt from which the pattern search begins.
3827
3828    Output:
3829
3830    * TYPE_OUT: The type of the output of this pattern.
3831
3832    * Return value: A new stmt that will be used to replace the pattern.
3833         Additionally a def_stmt is added.
3834
3835         a_it = x_t CMP y_t ? b_it : c_it;
3836         a_T = (TYPE) a_it;  */
3837
3838 static gimple *
3839 vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
3840                                     stmt_vec_info stmt_vinfo, tree *type_out)
3841 {
3842   gimple *last_stmt = stmt_vinfo->stmt;
3843   tree cond_expr, then_clause, else_clause;
3844   tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3845   gimple *pattern_stmt, *def_stmt;
3846   tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3847   gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3848   bool promotion;
3849   tree comp_scalar_type;
3850
3851   if (!is_gimple_assign (last_stmt)
3852       || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3853       || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3854     return NULL;
3855
3856   cond_expr = gimple_assign_rhs1 (last_stmt);
3857   then_clause = gimple_assign_rhs2 (last_stmt);
3858   else_clause = gimple_assign_rhs3 (last_stmt);
3859
3860   if (!COMPARISON_CLASS_P (cond_expr))
3861     return NULL;
3862
3863   comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3864   comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
3865   if (comp_vectype == NULL_TREE)
3866     return NULL;
3867
3868   type = TREE_TYPE (gimple_assign_lhs (last_stmt));
3869   if (types_compatible_p (type, comp_scalar_type)
3870       || ((TREE_CODE (then_clause) != INTEGER_CST
3871            || TREE_CODE (else_clause) != INTEGER_CST)
3872           && !INTEGRAL_TYPE_P (comp_scalar_type))
3873       || !INTEGRAL_TYPE_P (type))
3874     return NULL;
3875
3876   if ((TREE_CODE (then_clause) != INTEGER_CST
3877        && !type_conversion_p (vinfo, then_clause, false,
3878                               &orig_type0, &def_stmt0, &promotion))
3879       || (TREE_CODE (else_clause) != INTEGER_CST
3880           && !type_conversion_p (vinfo, else_clause, false,
3881                                  &orig_type1, &def_stmt1, &promotion)))
3882     return NULL;
3883
3884   if (orig_type0 && orig_type1
3885       && !types_compatible_p (orig_type0, orig_type1))
3886     return NULL;
3887
3888   if (orig_type0)
3889     {
3890       if (!types_compatible_p (orig_type0, comp_scalar_type))
3891         return NULL;
3892       then_clause = gimple_assign_rhs1 (def_stmt0);
3893       itype = orig_type0;
3894     }
3895
3896   if (orig_type1)
3897     {
3898       if (!types_compatible_p (orig_type1, comp_scalar_type))
3899         return NULL;
3900       else_clause = gimple_assign_rhs1 (def_stmt1);
3901       itype = orig_type1;
3902     }
3903
3904
3905   HOST_WIDE_INT cmp_mode_size
3906     = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3907
3908   scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3909   if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3910     return NULL;
3911
3912   vectype = get_vectype_for_scalar_type (vinfo, type);
3913   if (vectype == NULL_TREE)
3914     return NULL;
3915
3916   if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3917     return NULL;
3918
3919   if (itype == NULL_TREE)
3920     itype = build_nonstandard_integer_type (cmp_mode_size,
3921                                             TYPE_UNSIGNED (type));
3922
3923   if (itype == NULL_TREE
3924       || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3925     return NULL;
3926
3927   vecitype = get_vectype_for_scalar_type (vinfo, itype);
3928   if (vecitype == NULL_TREE)
3929     return NULL;
3930
3931   if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3932     return NULL;
3933
3934   if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3935     {
3936       if ((TREE_CODE (then_clause) == INTEGER_CST
3937            && !int_fits_type_p (then_clause, itype))
3938           || (TREE_CODE (else_clause) == INTEGER_CST
3939               && !int_fits_type_p (else_clause, itype)))
3940         return NULL;
3941     }
3942
3943   def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3944                                   COND_EXPR, unshare_expr (cond_expr),
3945                                   fold_convert (itype, then_clause),
3946                                   fold_convert (itype, else_clause));
3947   pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3948                                       NOP_EXPR, gimple_assign_lhs (def_stmt));
3949
3950   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
3951   *type_out = vectype;
3952
3953   vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3954
3955   return pattern_stmt;
3956 }
3957
3958
3959 /* Helper function of vect_recog_bool_pattern.  Called recursively, return
3960    true if bool VAR can and should be optimized that way.  Assume it shouldn't
3961    in case it's a result of a comparison which can be directly vectorized into
3962    a vector comparison.  Fills in STMTS with all stmts visited during the
3963    walk.  */
3964
3965 static bool
3966 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3967 {
3968   tree rhs1;
3969   enum tree_code rhs_code;
3970
3971   stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3972   if (!def_stmt_info)
3973     return false;
3974
3975   gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3976   if (!def_stmt)
3977     return false;
3978
3979   if (stmts.contains (def_stmt))
3980     return true;
3981
3982   rhs1 = gimple_assign_rhs1 (def_stmt);
3983   rhs_code = gimple_assign_rhs_code (def_stmt);
3984   switch (rhs_code)
3985     {
3986     case SSA_NAME:
3987       if (! check_bool_pattern (rhs1, vinfo, stmts))
3988         return false;
3989       break;
3990
3991     CASE_CONVERT:
3992       if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3993         return false;
3994       if (! check_bool_pattern (rhs1, vinfo, stmts))
3995         return false;
3996       break;
3997
3998     case BIT_NOT_EXPR:
3999       if (! check_bool_pattern (rhs1, vinfo, stmts))
4000         return false;
4001       break;
4002
4003     case BIT_AND_EXPR:
4004     case BIT_IOR_EXPR:
4005     case BIT_XOR_EXPR:
4006       if (! check_bool_pattern (rhs1, vinfo, stmts)
4007           || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
4008         return false;
4009       break;
4010
4011     default:
4012       if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
4013         {
4014           tree vecitype, comp_vectype;
4015
4016           /* If the comparison can throw, then is_gimple_condexpr will be
4017              false and we can't make a COND_EXPR/VEC_COND_EXPR out of it.  */
4018           if (stmt_could_throw_p (cfun, def_stmt))
4019             return false;
4020
4021           comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
4022           if (comp_vectype == NULL_TREE)
4023             return false;
4024
4025           tree mask_type = get_mask_type_for_scalar_type (vinfo,
4026                                                           TREE_TYPE (rhs1));
4027           if (mask_type
4028               && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
4029             return false;
4030
4031           if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
4032             {
4033               scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
4034               tree itype
4035                 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
4036               vecitype = get_vectype_for_scalar_type (vinfo, itype);
4037               if (vecitype == NULL_TREE)
4038                 return false;
4039             }
4040           else
4041             vecitype = comp_vectype;
4042           if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
4043             return false;
4044         }
4045       else
4046         return false;
4047       break;
4048     }
4049
4050   bool res = stmts.add (def_stmt);
4051   /* We can't end up recursing when just visiting SSA defs but not PHIs.  */
4052   gcc_assert (!res);
4053
4054   return true;
4055 }
4056
4057
4058 /* Helper function of adjust_bool_pattern.  Add a cast to TYPE to a previous
4059    stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
4060    pattern sequence.  */
4061
4062 static tree
4063 adjust_bool_pattern_cast (vec_info *vinfo,
4064                           tree type, tree var, stmt_vec_info stmt_info)
4065 {
4066   gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
4067                                            NOP_EXPR, var);
4068   append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
4069                           get_vectype_for_scalar_type (vinfo, type));
4070   return gimple_assign_lhs (cast_stmt);
4071 }
4072
4073 /* Helper function of vect_recog_bool_pattern.  Do the actual transformations.
4074    VAR is an SSA_NAME that should be transformed from bool to a wider integer
4075    type, OUT_TYPE is the desired final integer type of the whole pattern.
4076    STMT_INFO is the info of the pattern root and is where pattern stmts should
4077    be associated with.  DEFS is a map of pattern defs.  */
4078
4079 static void
4080 adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
4081                      stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
4082 {
4083   gimple *stmt = SSA_NAME_DEF_STMT (var);
4084   enum tree_code rhs_code, def_rhs_code;
4085   tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
4086   location_t loc;
4087   gimple *pattern_stmt, *def_stmt;
4088   tree trueval = NULL_TREE;
4089
4090   rhs1 = gimple_assign_rhs1 (stmt);
4091   rhs2 = gimple_assign_rhs2 (stmt);
4092   rhs_code = gimple_assign_rhs_code (stmt);
4093   loc = gimple_location (stmt);
4094   switch (rhs_code)
4095     {
4096     case SSA_NAME:
4097     CASE_CONVERT:
4098       irhs1 = *defs.get (rhs1);
4099       itype = TREE_TYPE (irhs1);
4100       pattern_stmt
4101         = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4102                                SSA_NAME, irhs1);
4103       break;
4104
4105     case BIT_NOT_EXPR:
4106       irhs1 = *defs.get (rhs1);
4107       itype = TREE_TYPE (irhs1);
4108       pattern_stmt
4109         = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4110                                BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
4111       break;
4112
4113     case BIT_AND_EXPR:
4114       /* Try to optimize x = y & (a < b ? 1 : 0); into
4115          x = (a < b ? y : 0);
4116
4117          E.g. for:
4118            bool a_b, b_b, c_b;
4119            TYPE d_T;
4120
4121            S1  a_b = x1 CMP1 y1;
4122            S2  b_b = x2 CMP2 y2;
4123            S3  c_b = a_b & b_b;
4124            S4  d_T = (TYPE) c_b;
4125
4126          we would normally emit:
4127
4128            S1'  a_T = x1 CMP1 y1 ? 1 : 0;
4129            S2'  b_T = x2 CMP2 y2 ? 1 : 0;
4130            S3'  c_T = a_T & b_T;
4131            S4'  d_T = c_T;
4132
4133          but we can save one stmt by using the
4134          result of one of the COND_EXPRs in the other COND_EXPR and leave
4135          BIT_AND_EXPR stmt out:
4136
4137            S1'  a_T = x1 CMP1 y1 ? 1 : 0;
4138            S3'  c_T = x2 CMP2 y2 ? a_T : 0;
4139            S4'  f_T = c_T;
4140
4141          At least when VEC_COND_EXPR is implemented using masks
4142          cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
4143          computes the comparison masks and ands it, in one case with
4144          all ones vector, in the other case with a vector register.
4145          Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
4146          often more expensive.  */
4147       def_stmt = SSA_NAME_DEF_STMT (rhs2);
4148       def_rhs_code = gimple_assign_rhs_code (def_stmt);
4149       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
4150         {
4151           irhs1 = *defs.get (rhs1);
4152           tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
4153           if (TYPE_PRECISION (TREE_TYPE (irhs1))
4154               == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
4155             {
4156               rhs_code = def_rhs_code;
4157               rhs1 = def_rhs1;
4158               rhs2 = gimple_assign_rhs2 (def_stmt);
4159               trueval = irhs1;
4160               goto do_compare;
4161             }
4162           else
4163             irhs2 = *defs.get (rhs2);
4164           goto and_ior_xor;
4165         }
4166       def_stmt = SSA_NAME_DEF_STMT (rhs1);
4167       def_rhs_code = gimple_assign_rhs_code (def_stmt);
4168       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
4169         {
4170           irhs2 = *defs.get (rhs2);
4171           tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
4172           if (TYPE_PRECISION (TREE_TYPE (irhs2))
4173               == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
4174             {
4175               rhs_code = def_rhs_code;
4176               rhs1 = def_rhs1;
4177               rhs2 = gimple_assign_rhs2 (def_stmt);
4178               trueval = irhs2;
4179               goto do_compare;
4180             }
4181           else
4182             irhs1 = *defs.get (rhs1);
4183           goto and_ior_xor;
4184         }
4185       /* FALLTHRU */
4186     case BIT_IOR_EXPR:
4187     case BIT_XOR_EXPR:
4188       irhs1 = *defs.get (rhs1);
4189       irhs2 = *defs.get (rhs2);
4190     and_ior_xor:
4191       if (TYPE_PRECISION (TREE_TYPE (irhs1))
4192           != TYPE_PRECISION (TREE_TYPE (irhs2)))
4193         {
4194           int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
4195           int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
4196           int out_prec = TYPE_PRECISION (out_type);
4197           if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
4198             irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
4199                                               stmt_info);
4200           else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
4201             irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
4202                                               stmt_info);
4203           else
4204             {
4205               irhs1 = adjust_bool_pattern_cast (vinfo,
4206                                                 out_type, irhs1, stmt_info);
4207               irhs2 = adjust_bool_pattern_cast (vinfo,
4208                                                 out_type, irhs2, stmt_info);
4209             }
4210         }
4211       itype = TREE_TYPE (irhs1);
4212       pattern_stmt
4213         = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4214                                rhs_code, irhs1, irhs2);
4215       break;
4216
4217     default:
4218     do_compare:
4219       gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
4220       if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
4221           || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
4222           || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
4223                        GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
4224         {
4225           scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
4226           itype
4227             = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
4228         }
4229       else
4230         itype = TREE_TYPE (rhs1);
4231       cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
4232       if (trueval == NULL_TREE)
4233         trueval = build_int_cst (itype, 1);
4234       else
4235         gcc_checking_assert (useless_type_conversion_p (itype,
4236                                                         TREE_TYPE (trueval)));
4237       pattern_stmt
4238         = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4239                                COND_EXPR, cond_expr, trueval,
4240                                build_int_cst (itype, 0));
4241       break;
4242     }
4243
4244   gimple_set_location (pattern_stmt, loc);
4245   append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
4246                           get_vectype_for_scalar_type (vinfo, itype));
4247   defs.put (var, gimple_assign_lhs (pattern_stmt));
4248 }
4249
4250 /* Comparison function to qsort a vector of gimple stmts after UID.  */
4251
4252 static int
4253 sort_after_uid (const void *p1, const void *p2)
4254 {
4255   const gimple *stmt1 = *(const gimple * const *)p1;
4256   const gimple *stmt2 = *(const gimple * const *)p2;
4257   return gimple_uid (stmt1) - gimple_uid (stmt2);
4258 }
4259
4260 /* Create pattern stmts for all stmts participating in the bool pattern
4261    specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
4262    OUT_TYPE.  Return the def of the pattern root.  */
4263
4264 static tree
4265 adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
4266                    tree out_type, stmt_vec_info stmt_info)
4267 {
4268   /* Gather original stmts in the bool pattern in their order of appearance
4269      in the IL.  */
4270   auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
4271   for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
4272        i != bool_stmt_set.end (); ++i)
4273     bool_stmts.quick_push (*i);
4274   bool_stmts.qsort (sort_after_uid);
4275
4276   /* Now process them in that order, producing pattern stmts.  */
4277   hash_map <tree, tree> defs;
4278   for (unsigned i = 0; i < bool_stmts.length (); ++i)
4279     adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
4280                          out_type, stmt_info, defs);
4281
4282   /* Pop the last pattern seq stmt and install it as pattern root for STMT.  */
4283   gimple *pattern_stmt
4284     = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4285   return gimple_assign_lhs (pattern_stmt);
4286 }
4287
4288 /* Return the proper type for converting bool VAR into
4289    an integer value or NULL_TREE if no such type exists.
4290    The type is chosen so that the converted value has the
4291    same number of elements as VAR's vector type.  */
4292
4293 static tree
4294 integer_type_for_mask (tree var, vec_info *vinfo)
4295 {
4296   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4297     return NULL_TREE;
4298
4299   stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
4300   if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
4301     return NULL_TREE;
4302
4303   return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
4304 }
4305
4306 /* Function vect_recog_bool_pattern
4307
4308    Try to find pattern like following:
4309
4310      bool a_b, b_b, c_b, d_b, e_b;
4311      TYPE f_T;
4312    loop:
4313      S1  a_b = x1 CMP1 y1;
4314      S2  b_b = x2 CMP2 y2;
4315      S3  c_b = a_b & b_b;
4316      S4  d_b = x3 CMP3 y3;
4317      S5  e_b = c_b | d_b;
4318      S6  f_T = (TYPE) e_b;
4319
4320    where type 'TYPE' is an integral type.  Or a similar pattern
4321    ending in
4322
4323      S6  f_Y = e_b ? r_Y : s_Y;
4324
4325    as results from if-conversion of a complex condition.
4326
4327    Input:
4328
4329    * STMT_VINFO: The stmt at the end from which the pattern
4330                  search begins, i.e. cast of a bool to
4331                  an integer type.
4332
4333    Output:
4334
4335    * TYPE_OUT: The type of the output of this pattern.
4336
4337    * Return value: A new stmt that will be used to replace the pattern.
4338
4339         Assuming size of TYPE is the same as size of all comparisons
4340         (otherwise some casts would be added where needed), the above
4341         sequence we create related pattern stmts:
4342         S1'  a_T = x1 CMP1 y1 ? 1 : 0;
4343         S3'  c_T = x2 CMP2 y2 ? a_T : 0;
4344         S4'  d_T = x3 CMP3 y3 ? 1 : 0;
4345         S5'  e_T = c_T | d_T;
4346         S6'  f_T = e_T;
4347
4348         Instead of the above S3' we could emit:
4349         S2'  b_T = x2 CMP2 y2 ? 1 : 0;
4350         S3'  c_T = a_T | b_T;
4351         but the above is more efficient.  */
4352
4353 static gimple *
4354 vect_recog_bool_pattern (vec_info *vinfo,
4355                          stmt_vec_info stmt_vinfo, tree *type_out)
4356 {
4357   gimple *last_stmt = stmt_vinfo->stmt;
4358   enum tree_code rhs_code;
4359   tree var, lhs, rhs, vectype;
4360   gimple *pattern_stmt;
4361
4362   if (!is_gimple_assign (last_stmt))
4363     return NULL;
4364
4365   var = gimple_assign_rhs1 (last_stmt);
4366   lhs = gimple_assign_lhs (last_stmt);
4367   rhs_code = gimple_assign_rhs_code (last_stmt);
4368
4369   if (rhs_code == VIEW_CONVERT_EXPR)
4370     var = TREE_OPERAND (var, 0);
4371
4372   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4373     return NULL;
4374
4375   hash_set<gimple *> bool_stmts;
4376
4377   if (CONVERT_EXPR_CODE_P (rhs_code)
4378       || rhs_code == VIEW_CONVERT_EXPR)
4379     {
4380       if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4381           || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4382         return NULL;
4383       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4384
4385       if (check_bool_pattern (var, vinfo, bool_stmts))
4386         {
4387           rhs = adjust_bool_stmts (vinfo, bool_stmts,
4388                                    TREE_TYPE (lhs), stmt_vinfo);
4389           lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4390           if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4391             pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4392           else
4393             pattern_stmt
4394               = gimple_build_assign (lhs, NOP_EXPR, rhs);
4395         }
4396       else
4397         {
4398           tree type = integer_type_for_mask (var, vinfo);
4399           tree cst0, cst1, tmp;
4400
4401           if (!type)
4402             return NULL;
4403
4404           /* We may directly use cond with narrowed type to avoid
4405              multiple cond exprs with following result packing and
4406              perform single cond with packed mask instead.  In case
4407              of widening we better make cond first and then extract
4408              results.  */
4409           if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
4410             type = TREE_TYPE (lhs);
4411
4412           cst0 = build_int_cst (type, 0);
4413           cst1 = build_int_cst (type, 1);
4414           tmp = vect_recog_temp_ssa_var (type, NULL);
4415           pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
4416
4417           if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
4418             {
4419               tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
4420               append_pattern_def_seq (vinfo, stmt_vinfo,
4421                                       pattern_stmt, new_vectype);
4422
4423               lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4424               pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
4425             }
4426         }
4427
4428       *type_out = vectype;
4429       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4430
4431       return pattern_stmt;
4432     }
4433   else if (rhs_code == COND_EXPR
4434            && TREE_CODE (var) == SSA_NAME)
4435     {
4436       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4437       if (vectype == NULL_TREE)
4438         return NULL;
4439
4440       /* Build a scalar type for the boolean result that when
4441          vectorized matches the vector type of the result in
4442          size and number of elements.  */
4443       unsigned prec
4444         = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
4445                                TYPE_VECTOR_SUBPARTS (vectype));
4446
4447       tree type
4448         = build_nonstandard_integer_type (prec,
4449                                           TYPE_UNSIGNED (TREE_TYPE (var)));
4450       if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
4451         return NULL;
4452
4453       if (!check_bool_pattern (var, vinfo, bool_stmts))
4454         return NULL;
4455
4456       rhs = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
4457
4458       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4459       pattern_stmt
4460           = gimple_build_assign (lhs, COND_EXPR,
4461                                  build2 (NE_EXPR, boolean_type_node,
4462                                          rhs, build_int_cst (type, 0)),
4463                                  gimple_assign_rhs2 (last_stmt),
4464                                  gimple_assign_rhs3 (last_stmt));
4465       *type_out = vectype;
4466       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4467
4468       return pattern_stmt;
4469     }
4470   else if (rhs_code == SSA_NAME
4471            && STMT_VINFO_DATA_REF (stmt_vinfo))
4472     {
4473       stmt_vec_info pattern_stmt_info;
4474       tree nunits_vectype;
4475       if (!vect_get_vector_types_for_stmt (vinfo, stmt_vinfo, &vectype,
4476                                            &nunits_vectype)
4477           || !VECTOR_MODE_P (TYPE_MODE (vectype)))
4478         return NULL;
4479
4480       if (check_bool_pattern (var, vinfo, bool_stmts))
4481         rhs = adjust_bool_stmts (vinfo, bool_stmts,
4482                                  TREE_TYPE (vectype), stmt_vinfo);
4483       else
4484         {
4485           tree type = integer_type_for_mask (var, vinfo);
4486           tree cst0, cst1, new_vectype;
4487
4488           if (!type)
4489             return NULL;
4490
4491           if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
4492             type = TREE_TYPE (vectype);
4493
4494           cst0 = build_int_cst (type, 0);
4495           cst1 = build_int_cst (type, 1);
4496           new_vectype = get_vectype_for_scalar_type (vinfo, type);
4497
4498           rhs = vect_recog_temp_ssa_var (type, NULL);
4499           pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
4500           append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
4501         }
4502
4503       lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
4504       if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4505         {
4506           tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4507           gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
4508           append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
4509           rhs = rhs2;
4510         }
4511       pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4512       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4513       vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4514       *type_out = vectype;
4515       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4516
4517       return pattern_stmt;
4518     }
4519   else
4520     return NULL;
4521 }
4522
4523
4524 /* A helper for vect_recog_mask_conversion_pattern.  Build
4525    conversion of MASK to a type suitable for masking VECTYPE.
4526    Built statement gets required vectype and is appended to
4527    a pattern sequence of STMT_VINFO.
4528
4529    Return converted mask.  */
4530
4531 static tree
4532 build_mask_conversion (vec_info *vinfo,
4533                        tree mask, tree vectype, stmt_vec_info stmt_vinfo)
4534 {
4535   gimple *stmt;
4536   tree masktype, tmp;
4537
4538   masktype = truth_type_for (vectype);
4539   tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
4540   stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
4541   append_pattern_def_seq (vinfo, stmt_vinfo,
4542                           stmt, masktype, TREE_TYPE (vectype));
4543
4544   return tmp;
4545 }
4546
4547
4548 /* Function vect_recog_mask_conversion_pattern
4549
4550    Try to find statements which require boolean type
4551    converison.  Additional conversion statements are
4552    added to handle such cases.  For example:
4553
4554    bool m_1, m_2, m_3;
4555    int i_4, i_5;
4556    double d_6, d_7;
4557    char c_1, c_2, c_3;
4558
4559    S1   m_1 = i_4 > i_5;
4560    S2   m_2 = d_6 < d_7;
4561    S3   m_3 = m_1 & m_2;
4562    S4   c_1 = m_3 ? c_2 : c_3;
4563
4564    Will be transformed into:
4565
4566    S1   m_1 = i_4 > i_5;
4567    S2   m_2 = d_6 < d_7;
4568    S3'' m_2' = (_Bool[bitsize=32])m_2
4569    S3'  m_3' = m_1 & m_2';
4570    S4'' m_3'' = (_Bool[bitsize=8])m_3'
4571    S4'  c_1' = m_3'' ? c_2 : c_3;  */
4572
4573 static gimple *
4574 vect_recog_mask_conversion_pattern (vec_info *vinfo,
4575                                     stmt_vec_info stmt_vinfo, tree *type_out)
4576 {
4577   gimple *last_stmt = stmt_vinfo->stmt;
4578   enum tree_code rhs_code;
4579   tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
4580   tree vectype1, vectype2;
4581   stmt_vec_info pattern_stmt_info;
4582   tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
4583   tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
4584
4585   /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion.  */
4586   if (is_gimple_call (last_stmt)
4587       && gimple_call_internal_p (last_stmt))
4588     {
4589       gcall *pattern_stmt;
4590
4591       internal_fn ifn = gimple_call_internal_fn (last_stmt);
4592       int mask_argno = internal_fn_mask_index (ifn);
4593       if (mask_argno < 0)
4594         return NULL;
4595
4596       bool store_p = internal_store_fn_p (ifn);
4597       if (store_p)
4598         {
4599           int rhs_index = internal_fn_stored_value_index (ifn);
4600           tree rhs = gimple_call_arg (last_stmt, rhs_index);
4601           vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
4602         }
4603       else
4604         {
4605           lhs = gimple_call_lhs (last_stmt);
4606           vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4607         }
4608
4609       tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
4610       tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
4611       if (!mask_arg_type)
4612         return NULL;
4613       vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
4614
4615       if (!vectype1 || !vectype2
4616           || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4617                        TYPE_VECTOR_SUBPARTS (vectype2)))
4618         return NULL;
4619
4620       tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
4621
4622       auto_vec<tree, 8> args;
4623       unsigned int nargs = gimple_call_num_args (last_stmt);
4624       args.safe_grow (nargs, true);
4625       for (unsigned int i = 0; i < nargs; ++i)
4626         args[i] = ((int) i == mask_argno
4627                    ? tmp
4628                    : gimple_call_arg (last_stmt, i));
4629       pattern_stmt = gimple_build_call_internal_vec (ifn, args);
4630
4631       if (!store_p)
4632         {
4633           lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4634           gimple_call_set_lhs (pattern_stmt, lhs);
4635         }
4636       gimple_call_set_nothrow (pattern_stmt, true);
4637
4638       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4639       if (STMT_VINFO_DATA_REF (stmt_vinfo))
4640         vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4641
4642       *type_out = vectype1;
4643       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4644
4645       return pattern_stmt;
4646     }
4647
4648   if (!is_gimple_assign (last_stmt))
4649     return NULL;
4650
4651   gimple *pattern_stmt;
4652   lhs = gimple_assign_lhs (last_stmt);
4653   rhs1 = gimple_assign_rhs1 (last_stmt);
4654   rhs_code = gimple_assign_rhs_code (last_stmt);
4655
4656   /* Check for cond expression requiring mask conversion.  */
4657   if (rhs_code == COND_EXPR)
4658     {
4659       vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4660
4661       if (TREE_CODE (rhs1) == SSA_NAME)
4662         {
4663           rhs1_type = integer_type_for_mask (rhs1, vinfo);
4664           if (!rhs1_type)
4665             return NULL;
4666         }
4667       else if (COMPARISON_CLASS_P (rhs1))
4668         {
4669           /* Check whether we're comparing scalar booleans and (if so)
4670              whether a better mask type exists than the mask associated
4671              with boolean-sized elements.  This avoids unnecessary packs
4672              and unpacks if the booleans are set from comparisons of
4673              wider types.  E.g. in:
4674
4675                int x1, x2, x3, x4, y1, y1;
4676                ...
4677                bool b1 = (x1 == x2);
4678                bool b2 = (x3 == x4);
4679                ... = b1 == b2 ? y1 : y2;
4680
4681              it is better for b1 and b2 to use the mask type associated
4682              with int elements rather bool (byte) elements.  */
4683           rhs1_op0 = TREE_OPERAND (rhs1, 0);
4684           rhs1_op1 = TREE_OPERAND (rhs1, 1);
4685           if (!rhs1_op0 || !rhs1_op1)
4686             return NULL;
4687           rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
4688           rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
4689
4690           if (!rhs1_op0_type)
4691             rhs1_type = TREE_TYPE (rhs1_op0);
4692           else if (!rhs1_op1_type)
4693             rhs1_type = TREE_TYPE (rhs1_op1);
4694           else if (TYPE_PRECISION (rhs1_op0_type)
4695                    != TYPE_PRECISION (rhs1_op1_type))
4696             {
4697               int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
4698                          - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4699               int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
4700                          - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4701               if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
4702                 {
4703                   if (abs (tmp0) > abs (tmp1))
4704                     rhs1_type = rhs1_op1_type;
4705                   else
4706                     rhs1_type = rhs1_op0_type;
4707                 }
4708               else
4709                 rhs1_type = build_nonstandard_integer_type
4710                   (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
4711             }
4712           else
4713             rhs1_type = rhs1_op0_type;
4714         }
4715       else
4716         return NULL;
4717
4718       vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4719
4720       if (!vectype1 || !vectype2)
4721         return NULL;
4722
4723       /* Continue if a conversion is needed.  Also continue if we have
4724          a comparison whose vector type would normally be different from
4725          VECTYPE2 when considered in isolation.  In that case we'll
4726          replace the comparison with an SSA name (so that we can record
4727          its vector type) and behave as though the comparison was an SSA
4728          name from the outset.  */
4729       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4730                     TYPE_VECTOR_SUBPARTS (vectype2))
4731           && !rhs1_op0_type
4732           && !rhs1_op1_type)
4733         return NULL;
4734
4735       /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4736          in place, we can handle it in vectorizable_condition.  This avoids
4737          unnecessary promotion stmts and increased vectorization factor.  */
4738       if (COMPARISON_CLASS_P (rhs1)
4739           && INTEGRAL_TYPE_P (rhs1_type)
4740           && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4741                        TYPE_VECTOR_SUBPARTS (vectype2)))
4742         {
4743           enum vect_def_type dt;
4744           if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4745               && dt == vect_external_def
4746               && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4747               && (dt == vect_external_def
4748                   || dt == vect_constant_def))
4749             {
4750               tree wide_scalar_type = build_nonstandard_integer_type
4751                 (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
4752               tree vectype3 = get_vectype_for_scalar_type (vinfo,
4753                                                            wide_scalar_type);
4754               if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4755                 return NULL;
4756             }
4757         }
4758
4759       /* If rhs1 is a comparison we need to move it into a
4760          separate statement.  */
4761       if (TREE_CODE (rhs1) != SSA_NAME)
4762         {
4763           tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4764           if (rhs1_op0_type
4765               && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
4766             rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
4767                                               vectype2, stmt_vinfo);
4768           if (rhs1_op1_type
4769               && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
4770             rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
4771                                       vectype2, stmt_vinfo);
4772           pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
4773                                               rhs1_op0, rhs1_op1);
4774           rhs1 = tmp;
4775           append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
4776                                   rhs1_type);
4777         }
4778
4779       if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4780                     TYPE_VECTOR_SUBPARTS (vectype2)))
4781         tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4782       else
4783         tmp = rhs1;
4784
4785       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4786       pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4787                                           gimple_assign_rhs2 (last_stmt),
4788                                           gimple_assign_rhs3 (last_stmt));
4789
4790       *type_out = vectype1;
4791       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4792
4793       return pattern_stmt;
4794     }
4795
4796   /* Now check for binary boolean operations requiring conversion for
4797      one of operands.  */
4798   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4799     return NULL;
4800
4801   if (rhs_code != BIT_IOR_EXPR
4802       && rhs_code != BIT_XOR_EXPR
4803       && rhs_code != BIT_AND_EXPR
4804       && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4805     return NULL;
4806
4807   rhs2 = gimple_assign_rhs2 (last_stmt);
4808
4809   rhs1_type = integer_type_for_mask (rhs1, vinfo);
4810   rhs2_type = integer_type_for_mask (rhs2, vinfo);
4811
4812   if (!rhs1_type || !rhs2_type
4813       || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4814     return NULL;
4815
4816   if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4817     {
4818       vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4819       if (!vectype1)
4820         return NULL;
4821       rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
4822     }
4823   else
4824     {
4825       vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
4826       if (!vectype1)
4827         return NULL;
4828       rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4829     }
4830
4831   lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4832   pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4833
4834   *type_out = vectype1;
4835   vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4836
4837   return pattern_stmt;
4838 }
4839
4840 /* STMT_INFO is a load or store.  If the load or store is conditional, return
4841    the boolean condition under which it occurs, otherwise return null.  */
4842
4843 static tree
4844 vect_get_load_store_mask (stmt_vec_info stmt_info)
4845 {
4846   if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
4847     {
4848       gcc_assert (gimple_assign_single_p (def_assign));
4849       return NULL_TREE;
4850     }
4851
4852   if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
4853     {
4854       internal_fn ifn = gimple_call_internal_fn (def_call);
4855       int mask_index = internal_fn_mask_index (ifn);
4856       return gimple_call_arg (def_call, mask_index);
4857     }
4858
4859   gcc_unreachable ();
4860 }
4861
4862 /* Return MASK if MASK is suitable for masking an operation on vectors
4863    of type VECTYPE, otherwise convert it into such a form and return
4864    the result.  Associate any conversion statements with STMT_INFO's
4865    pattern.  */
4866
4867 static tree
4868 vect_convert_mask_for_vectype (tree mask, tree vectype,
4869                                stmt_vec_info stmt_info, vec_info *vinfo)
4870 {
4871   tree mask_type = integer_type_for_mask (mask, vinfo);
4872   if (mask_type)
4873     {
4874       tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
4875       if (mask_vectype
4876           && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4877                        TYPE_VECTOR_SUBPARTS (mask_vectype)))
4878         mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
4879     }
4880   return mask;
4881 }
4882
4883 /* Return the equivalent of:
4884
4885      fold_convert (TYPE, VALUE)
4886
4887    with the expectation that the operation will be vectorized.
4888    If new statements are needed, add them as pattern statements
4889    to STMT_INFO.  */
4890
4891 static tree
4892 vect_add_conversion_to_pattern (vec_info *vinfo,
4893                                 tree type, tree value, stmt_vec_info stmt_info)
4894 {
4895   if (useless_type_conversion_p (type, TREE_TYPE (value)))
4896     return value;
4897
4898   tree new_value = vect_recog_temp_ssa_var (type, NULL);
4899   gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4900   append_pattern_def_seq (vinfo, stmt_info, conversion,
4901                           get_vectype_for_scalar_type (vinfo, type));
4902   return new_value;
4903 }
4904
4905 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4906    internal function.  Return the final statement on success and set
4907    *TYPE_OUT to the vector type being loaded or stored.
4908
4909    This function only handles gathers and scatters that were recognized
4910    as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P).  */
4911
4912 static gimple *
4913 vect_recog_gather_scatter_pattern (vec_info *vinfo,
4914                                    stmt_vec_info stmt_info, tree *type_out)
4915 {
4916   /* Currently we only support this for loop vectorization.  */
4917   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4918   if (!loop_vinfo)
4919     return NULL;
4920
4921   /* Make sure that we're looking at a gather load or scatter store.  */
4922   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4923   if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4924     return NULL;
4925
4926   /* Get the boolean that controls whether the load or store happens.
4927      This is null if the operation is unconditional.  */
4928   tree mask = vect_get_load_store_mask (stmt_info);
4929
4930   /* Make sure that the target supports an appropriate internal
4931      function for the gather/scatter operation.  */
4932   gather_scatter_info gs_info;
4933   if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
4934       || gs_info.ifn == IFN_LAST)
4935     return NULL;
4936
4937   /* Convert the mask to the right form.  */
4938   tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
4939                                                  gs_info.element_type);
4940   if (mask)
4941     mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4942                                           loop_vinfo);
4943   else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
4944            || gs_info.ifn == IFN_MASK_GATHER_LOAD)
4945     mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
4946
4947   /* Get the invariant base and non-invariant offset, converting the
4948      latter to the same width as the vector elements.  */
4949   tree base = gs_info.base;
4950   tree offset_type = TREE_TYPE (gs_info.offset_vectype);
4951   tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
4952                                                 gs_info.offset, stmt_info);
4953
4954   /* Build the new pattern statement.  */
4955   tree scale = size_int (gs_info.scale);
4956   gcall *pattern_stmt;
4957   if (DR_IS_READ (dr))
4958     {
4959       tree zero = build_zero_cst (gs_info.element_type);
4960       if (mask != NULL)
4961         pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
4962                                                    offset, scale, zero, mask);
4963       else
4964         pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4965                                                    offset, scale, zero);
4966       tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4967       gimple_call_set_lhs (pattern_stmt, load_lhs);
4968     }
4969   else
4970     {
4971       tree rhs = vect_get_store_rhs (stmt_info);
4972       if (mask != NULL)
4973         pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
4974                                                    base, offset, scale, rhs,
4975                                                    mask);
4976       else
4977         pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
4978                                                    base, offset, scale, rhs);
4979     }
4980   gimple_call_set_nothrow (pattern_stmt, true);
4981
4982   /* Copy across relevant vectorization info and associate DR with the
4983      new pattern statement instead of the original statement.  */
4984   stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
4985   loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
4986
4987   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4988   *type_out = vectype;
4989   vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
4990
4991   return pattern_stmt;
4992 }
4993
4994 /* Return true if TYPE is a non-boolean integer type.  These are the types
4995    that we want to consider for narrowing.  */
4996
4997 static bool
4998 vect_narrowable_type_p (tree type)
4999 {
5000   return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
5001 }
5002
5003 /* Return true if the operation given by CODE can be truncated to N bits
5004    when only N bits of the output are needed.  This is only true if bit N+1
5005    of the inputs has no effect on the low N bits of the result.  */
5006
5007 static bool
5008 vect_truncatable_operation_p (tree_code code)
5009 {
5010   switch (code)
5011     {
5012     case PLUS_EXPR:
5013     case MINUS_EXPR:
5014     case MULT_EXPR:
5015     case BIT_AND_EXPR:
5016     case BIT_IOR_EXPR:
5017     case BIT_XOR_EXPR:
5018     case COND_EXPR:
5019       return true;
5020
5021     default:
5022       return false;
5023     }
5024 }
5025
5026 /* Record that STMT_INFO could be changed from operating on TYPE to
5027    operating on a type with the precision and sign given by PRECISION
5028    and SIGN respectively.  PRECISION is an arbitrary bit precision;
5029    it might not be a whole number of bytes.  */
5030
5031 static void
5032 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
5033                          unsigned int precision, signop sign)
5034 {
5035   /* Round the precision up to a whole number of bytes.  */
5036   precision = vect_element_precision (precision);
5037   if (precision < TYPE_PRECISION (type)
5038       && (!stmt_info->operation_precision
5039           || stmt_info->operation_precision > precision))
5040     {
5041       stmt_info->operation_precision = precision;
5042       stmt_info->operation_sign = sign;
5043     }
5044 }
5045
5046 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
5047    non-boolean inputs, all of which have type TYPE.  MIN_INPUT_PRECISION
5048    is an arbitrary bit precision; it might not be a whole number of bytes.  */
5049
5050 static void
5051 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
5052                               unsigned int min_input_precision)
5053 {
5054   /* This operation in isolation only requires the inputs to have
5055      MIN_INPUT_PRECISION of precision,  However, that doesn't mean
5056      that MIN_INPUT_PRECISION is a natural precision for the chain
5057      as a whole.  E.g. consider something like:
5058
5059          unsigned short *x, *y;
5060          *y = ((*x & 0xf0) >> 4) | (*y << 4);
5061
5062      The right shift can be done on unsigned chars, and only requires the
5063      result of "*x & 0xf0" to be done on unsigned chars.  But taking that
5064      approach would mean turning a natural chain of single-vector unsigned
5065      short operations into one that truncates "*x" and then extends
5066      "(*x & 0xf0) >> 4", with two vectors for each unsigned short
5067      operation and one vector for each unsigned char operation.
5068      This would be a significant pessimization.
5069
5070      Instead only propagate the maximum of this precision and the precision
5071      required by the users of the result.  This means that we don't pessimize
5072      the case above but continue to optimize things like:
5073
5074          unsigned char *y;
5075          unsigned short *x;
5076          *y = ((*x & 0xf0) >> 4) | (*y << 4);
5077
5078      Here we would truncate two vectors of *x to a single vector of
5079      unsigned chars and use single-vector unsigned char operations for
5080      everything else, rather than doing two unsigned short copies of
5081      "(*x & 0xf0) >> 4" and then truncating the result.  */
5082   min_input_precision = MAX (min_input_precision,
5083                              stmt_info->min_output_precision);
5084
5085   if (min_input_precision < TYPE_PRECISION (type)
5086       && (!stmt_info->min_input_precision
5087           || stmt_info->min_input_precision > min_input_precision))
5088     stmt_info->min_input_precision = min_input_precision;
5089 }
5090
5091 /* Subroutine of vect_determine_min_output_precision.  Return true if
5092    we can calculate a reduced number of output bits for STMT_INFO,
5093    whose result is LHS.  */
5094
5095 static bool
5096 vect_determine_min_output_precision_1 (vec_info *vinfo,
5097                                        stmt_vec_info stmt_info, tree lhs)
5098 {
5099   /* Take the maximum precision required by users of the result.  */
5100   unsigned int precision = 0;
5101   imm_use_iterator iter;
5102   use_operand_p use;
5103   FOR_EACH_IMM_USE_FAST (use, iter, lhs)
5104     {
5105       gimple *use_stmt = USE_STMT (use);
5106       if (is_gimple_debug (use_stmt))
5107         continue;
5108       stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
5109       if (!use_stmt_info || !use_stmt_info->min_input_precision)
5110         return false;
5111       /* The input precision recorded for COND_EXPRs applies only to the
5112          "then" and "else" values.  */
5113       gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
5114       if (assign
5115           && gimple_assign_rhs_code (assign) == COND_EXPR
5116           && use->use != gimple_assign_rhs2_ptr (assign)
5117           && use->use != gimple_assign_rhs3_ptr (assign))
5118         return false;
5119       precision = MAX (precision, use_stmt_info->min_input_precision);
5120     }
5121
5122   if (dump_enabled_p ())
5123     dump_printf_loc (MSG_NOTE, vect_location,
5124                      "only the low %d bits of %T are significant\n",
5125                      precision, lhs);
5126   stmt_info->min_output_precision = precision;
5127   return true;
5128 }
5129
5130 /* Calculate min_output_precision for STMT_INFO.  */
5131
5132 static void
5133 vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5134 {
5135   /* We're only interested in statements with a narrowable result.  */
5136   tree lhs = gimple_get_lhs (stmt_info->stmt);
5137   if (!lhs
5138       || TREE_CODE (lhs) != SSA_NAME
5139       || !vect_narrowable_type_p (TREE_TYPE (lhs)))
5140     return;
5141
5142   if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
5143     stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
5144 }
5145
5146 /* Use range information to decide whether STMT (described by STMT_INFO)
5147    could be done in a narrower type.  This is effectively a forward
5148    propagation, since it uses context-independent information that applies
5149    to all users of an SSA name.  */
5150
5151 static void
5152 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
5153 {
5154   tree lhs = gimple_assign_lhs (stmt);
5155   if (!lhs || TREE_CODE (lhs) != SSA_NAME)
5156     return;
5157
5158   tree type = TREE_TYPE (lhs);
5159   if (!vect_narrowable_type_p (type))
5160     return;
5161
5162   /* First see whether we have any useful range information for the result.  */
5163   unsigned int precision = TYPE_PRECISION (type);
5164   signop sign = TYPE_SIGN (type);
5165   wide_int min_value, max_value;
5166   if (!vect_get_range_info (lhs, &min_value, &max_value))
5167     return;
5168
5169   tree_code code = gimple_assign_rhs_code (stmt);
5170   unsigned int nops = gimple_num_ops (stmt);
5171
5172   if (!vect_truncatable_operation_p (code))
5173     /* Check that all relevant input operands are compatible, and update
5174        [MIN_VALUE, MAX_VALUE] to include their ranges.  */
5175     for (unsigned int i = 1; i < nops; ++i)
5176       {
5177         tree op = gimple_op (stmt, i);
5178         if (TREE_CODE (op) == INTEGER_CST)
5179           {
5180             /* Don't require the integer to have RHS_TYPE (which it might
5181                not for things like shift amounts, etc.), but do require it
5182                to fit the type.  */
5183             if (!int_fits_type_p (op, type))
5184               return;
5185
5186             min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
5187             max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
5188           }
5189         else if (TREE_CODE (op) == SSA_NAME)
5190           {
5191             /* Ignore codes that don't take uniform arguments.  */
5192             if (!types_compatible_p (TREE_TYPE (op), type))
5193               return;
5194
5195             wide_int op_min_value, op_max_value;
5196             if (!vect_get_range_info (op, &op_min_value, &op_max_value))
5197               return;
5198
5199             min_value = wi::min (min_value, op_min_value, sign);
5200             max_value = wi::max (max_value, op_max_value, sign);
5201           }
5202         else
5203           return;
5204       }
5205
5206   /* Try to switch signed types for unsigned types if we can.
5207      This is better for two reasons.  First, unsigned ops tend
5208      to be cheaper than signed ops.  Second, it means that we can
5209      handle things like:
5210
5211         signed char c;
5212         int res = (int) c & 0xff00; // range [0x0000, 0xff00]
5213
5214      as:
5215
5216         signed char c;
5217         unsigned short res_1 = (unsigned short) c & 0xff00;
5218         int res = (int) res_1;
5219
5220      where the intermediate result res_1 has unsigned rather than
5221      signed type.  */
5222   if (sign == SIGNED && !wi::neg_p (min_value))
5223     sign = UNSIGNED;
5224
5225   /* See what precision is required for MIN_VALUE and MAX_VALUE.  */
5226   unsigned int precision1 = wi::min_precision (min_value, sign);
5227   unsigned int precision2 = wi::min_precision (max_value, sign);
5228   unsigned int value_precision = MAX (precision1, precision2);
5229   if (value_precision >= precision)
5230     return;
5231
5232   if (dump_enabled_p ())
5233     dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
5234                      " without loss of precision: %G",
5235                      sign == SIGNED ? "signed" : "unsigned",
5236                      value_precision, stmt);
5237
5238   vect_set_operation_type (stmt_info, type, value_precision, sign);
5239   vect_set_min_input_precision (stmt_info, type, value_precision);
5240 }
5241
5242 /* Use information about the users of STMT's result to decide whether
5243    STMT (described by STMT_INFO) could be done in a narrower type.
5244    This is effectively a backward propagation.  */
5245
5246 static void
5247 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
5248 {
5249   tree_code code = gimple_assign_rhs_code (stmt);
5250   unsigned int opno = (code == COND_EXPR ? 2 : 1);
5251   tree type = TREE_TYPE (gimple_op (stmt, opno));
5252   if (!vect_narrowable_type_p (type))
5253     return;
5254
5255   unsigned int precision = TYPE_PRECISION (type);
5256   unsigned int operation_precision, min_input_precision;
5257   switch (code)
5258     {
5259     CASE_CONVERT:
5260       /* Only the bits that contribute to the output matter.  Don't change
5261          the precision of the operation itself.  */
5262       operation_precision = precision;
5263       min_input_precision = stmt_info->min_output_precision;
5264       break;
5265
5266     case LSHIFT_EXPR:
5267     case RSHIFT_EXPR:
5268       {
5269         tree shift = gimple_assign_rhs2 (stmt);
5270         if (TREE_CODE (shift) != INTEGER_CST
5271             || !wi::ltu_p (wi::to_widest (shift), precision))
5272           return;
5273         unsigned int const_shift = TREE_INT_CST_LOW (shift);
5274         if (code == LSHIFT_EXPR)
5275           {
5276             /* Avoid creating an undefined shift.
5277
5278                ??? We could instead use min_output_precision as-is and
5279                optimize out-of-range shifts to zero.  However, only
5280                degenerate testcases shift away all their useful input data,
5281                and it isn't natural to drop input operations in the middle
5282                of vectorization.  This sort of thing should really be
5283                handled before vectorization.  */
5284             operation_precision = MAX (stmt_info->min_output_precision,
5285                                        const_shift + 1);
5286             /* We need CONST_SHIFT fewer bits of the input.  */
5287             min_input_precision = (MAX (operation_precision, const_shift)
5288                                    - const_shift);
5289           }
5290         else
5291           {
5292             /* We need CONST_SHIFT extra bits to do the operation.  */
5293             operation_precision = (stmt_info->min_output_precision
5294                                    + const_shift);
5295             min_input_precision = operation_precision;
5296           }
5297         break;
5298       }
5299
5300     default:
5301       if (vect_truncatable_operation_p (code))
5302         {
5303           /* Input bit N has no effect on output bits N-1 and lower.  */
5304           operation_precision = stmt_info->min_output_precision;
5305           min_input_precision = operation_precision;
5306           break;
5307         }
5308       return;
5309     }
5310
5311   if (operation_precision < precision)
5312     {
5313       if (dump_enabled_p ())
5314         dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
5315                          " without affecting users: %G",
5316                          TYPE_UNSIGNED (type) ? "unsigned" : "signed",
5317                          operation_precision, stmt);
5318       vect_set_operation_type (stmt_info, type, operation_precision,
5319                                TYPE_SIGN (type));
5320     }
5321   vect_set_min_input_precision (stmt_info, type, min_input_precision);
5322 }
5323
5324 /* Return true if the statement described by STMT_INFO sets a boolean
5325    SSA_NAME and if we know how to vectorize this kind of statement using
5326    vector mask types.  */
5327
5328 static bool
5329 possible_vector_mask_operation_p (stmt_vec_info stmt_info)
5330 {
5331   tree lhs = gimple_get_lhs (stmt_info->stmt);
5332   if (!lhs
5333       || TREE_CODE (lhs) != SSA_NAME
5334       || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5335     return false;
5336
5337   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5338     {
5339       tree_code rhs_code = gimple_assign_rhs_code (assign);
5340       switch (rhs_code)
5341         {
5342         CASE_CONVERT:
5343         case SSA_NAME:
5344         case BIT_NOT_EXPR:
5345         case BIT_IOR_EXPR:
5346         case BIT_XOR_EXPR:
5347         case BIT_AND_EXPR:
5348           return true;
5349
5350         default:
5351           return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
5352         }
5353     }
5354   else if (is_a <gphi *> (stmt_info->stmt))
5355     return true;
5356   return false;
5357 }
5358
5359 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
5360    a vector mask type instead of a normal vector type.  Record the
5361    result in STMT_INFO->mask_precision.  */
5362
5363 static void
5364 vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5365 {
5366   if (!possible_vector_mask_operation_p (stmt_info))
5367     return;
5368
5369   /* If at least one boolean input uses a vector mask type,
5370      pick the mask type with the narrowest elements.
5371
5372      ??? This is the traditional behavior.  It should always produce
5373      the smallest number of operations, but isn't necessarily the
5374      optimal choice.  For example, if we have:
5375
5376        a = b & c
5377
5378      where:
5379
5380        - the user of a wants it to have a mask type for 16-bit elements (M16)
5381        - b also uses M16
5382        - c uses a mask type for 8-bit elements (M8)
5383
5384      then picking M8 gives:
5385
5386        - 1 M16->M8 pack for b
5387        - 1 M8 AND for a
5388        - 2 M8->M16 unpacks for the user of a
5389
5390      whereas picking M16 would have given:
5391
5392        - 2 M8->M16 unpacks for c
5393        - 2 M16 ANDs for a
5394
5395      The number of operations are equal, but M16 would have given
5396      a shorter dependency chain and allowed more ILP.  */
5397   unsigned int precision = ~0U;
5398   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5399     {
5400       unsigned int nops = gimple_num_ops (assign);
5401       for (unsigned int i = 1; i < nops; ++i)
5402         {
5403           tree rhs = gimple_op (assign, i);
5404           if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
5405             continue;
5406
5407           stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5408           if (!def_stmt_info)
5409             /* Don't let external or constant operands influence the choice.
5410                We can convert them to whichever vector type we pick.  */
5411             continue;
5412
5413           if (def_stmt_info->mask_precision)
5414             {
5415               if (precision > def_stmt_info->mask_precision)
5416                 precision = def_stmt_info->mask_precision;
5417             }
5418         }
5419
5420       /* If the statement compares two values that shouldn't use vector masks,
5421          try comparing the values as normal scalars instead.  */
5422       tree_code rhs_code = gimple_assign_rhs_code (assign);
5423       if (precision == ~0U
5424           && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5425         {
5426           tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
5427           scalar_mode mode;
5428           tree vectype, mask_type;
5429           if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
5430               && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
5431               && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
5432               && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
5433             precision = GET_MODE_BITSIZE (mode);
5434         }
5435     }
5436   else
5437     {
5438       gphi *phi = as_a <gphi *> (stmt_info->stmt);
5439       for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
5440         {
5441           tree rhs = gimple_phi_arg_def (phi, i);
5442
5443           stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5444           if (!def_stmt_info)
5445             /* Don't let external or constant operands influence the choice.
5446                We can convert them to whichever vector type we pick.  */
5447             continue;
5448
5449           if (def_stmt_info->mask_precision)
5450             {
5451               if (precision > def_stmt_info->mask_precision)
5452                 precision = def_stmt_info->mask_precision;
5453             }
5454         }
5455     }
5456
5457   if (dump_enabled_p ())
5458     {
5459       if (precision == ~0U)
5460         dump_printf_loc (MSG_NOTE, vect_location,
5461                          "using normal nonmask vectors for %G",
5462                          stmt_info->stmt);
5463       else
5464         dump_printf_loc (MSG_NOTE, vect_location,
5465                          "using boolean precision %d for %G",
5466                          precision, stmt_info->stmt);
5467     }
5468
5469   stmt_info->mask_precision = precision;
5470 }
5471
5472 /* Handle vect_determine_precisions for STMT_INFO, given that we
5473    have already done so for the users of its result.  */
5474
5475 void
5476 vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
5477 {
5478   vect_determine_min_output_precision (vinfo, stmt_info);
5479   if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
5480     {
5481       vect_determine_precisions_from_range (stmt_info, stmt);
5482       vect_determine_precisions_from_users (stmt_info, stmt);
5483     }
5484 }
5485
5486 /* Walk backwards through the vectorizable region to determine the
5487    values of these fields:
5488
5489    - min_output_precision
5490    - min_input_precision
5491    - operation_precision
5492    - operation_sign.  */
5493
5494 void
5495 vect_determine_precisions (vec_info *vinfo)
5496 {
5497   DUMP_VECT_SCOPE ("vect_determine_precisions");
5498
5499   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5500     {
5501       class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5502       basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5503       unsigned int nbbs = loop->num_nodes;
5504
5505       for (unsigned int i = 0; i < nbbs; i++)
5506         {
5507           basic_block bb = bbs[i];
5508           for (auto gsi = gsi_start_phis (bb);
5509                !gsi_end_p (gsi); gsi_next (&gsi))
5510             {
5511               stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5512               if (stmt_info)
5513                 vect_determine_mask_precision (vinfo, stmt_info);
5514             }
5515           for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5516             if (!is_gimple_debug (gsi_stmt (si)))
5517               vect_determine_mask_precision
5518                 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5519         }
5520       for (unsigned int i = 0; i < nbbs; i++)
5521         {
5522           basic_block bb = bbs[nbbs - i - 1];
5523           for (gimple_stmt_iterator si = gsi_last_bb (bb);
5524                !gsi_end_p (si); gsi_prev (&si))
5525             if (!is_gimple_debug (gsi_stmt (si)))
5526               vect_determine_stmt_precisions
5527                 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5528           for (auto gsi = gsi_start_phis (bb);
5529                !gsi_end_p (gsi); gsi_next (&gsi))
5530             {
5531               stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5532               if (stmt_info)
5533                 vect_determine_stmt_precisions (vinfo, stmt_info);
5534             }
5535         }
5536     }
5537   else
5538     {
5539       bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5540       for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
5541         {
5542           basic_block bb = bb_vinfo->bbs[i];
5543           for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5544             {
5545               stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5546               if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5547                 vect_determine_mask_precision (vinfo, stmt_info);
5548             }
5549           for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5550             {
5551               stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5552               if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5553                 vect_determine_mask_precision (vinfo, stmt_info);
5554             }
5555         }
5556       for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
5557         {
5558           for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]);
5559                !gsi_end_p (gsi); gsi_prev (&gsi))
5560             {
5561               stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5562               if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5563                 vect_determine_stmt_precisions (vinfo, stmt_info);
5564             }
5565           for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]);
5566                !gsi_end_p (gsi); gsi_next (&gsi))
5567             {
5568               stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5569               if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5570                 vect_determine_stmt_precisions (vinfo, stmt_info);
5571             }
5572         }
5573     }
5574 }
5575
5576 typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
5577
5578 struct vect_recog_func
5579 {
5580   vect_recog_func_ptr fn;
5581   const char *name;
5582 };
5583
5584 /* Note that ordering matters - the first pattern matching on a stmt is
5585    taken which means usually the more complex one needs to preceed the
5586    less comples onex (widen_sum only after dot_prod or sad for example).  */
5587 static vect_recog_func vect_vect_recog_func_ptrs[] = {
5588   { vect_recog_over_widening_pattern, "over_widening" },
5589   /* Must come after over_widening, which narrows the shift as much as
5590      possible beforehand.  */
5591   { vect_recog_average_pattern, "average" },
5592   { vect_recog_cond_expr_convert_pattern, "cond_expr_convert" },
5593   { vect_recog_mulhs_pattern, "mult_high" },
5594   { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
5595   { vect_recog_widen_mult_pattern, "widen_mult" },
5596   { vect_recog_dot_prod_pattern, "dot_prod" },
5597   { vect_recog_sad_pattern, "sad" },
5598   { vect_recog_widen_sum_pattern, "widen_sum" },
5599   { vect_recog_pow_pattern, "pow" },
5600   { vect_recog_popcount_pattern, "popcount" },
5601   { vect_recog_widen_shift_pattern, "widen_shift" },
5602   { vect_recog_rotate_pattern, "rotate" },
5603   { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
5604   { vect_recog_divmod_pattern, "divmod" },
5605   { vect_recog_mult_pattern, "mult" },
5606   { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
5607   { vect_recog_bool_pattern, "bool" },
5608   /* This must come before mask conversion, and includes the parts
5609      of mask conversion that are needed for gather and scatter
5610      internal functions.  */
5611   { vect_recog_gather_scatter_pattern, "gather_scatter" },
5612   { vect_recog_mask_conversion_pattern, "mask_conversion" },
5613   { vect_recog_widen_plus_pattern, "widen_plus" },
5614   { vect_recog_widen_minus_pattern, "widen_minus" },
5615 };
5616
5617 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
5618
5619 /* Mark statements that are involved in a pattern.  */
5620
5621 void
5622 vect_mark_pattern_stmts (vec_info *vinfo,
5623                          stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
5624                          tree pattern_vectype)
5625 {
5626   stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
5627   gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5628
5629   gimple *orig_pattern_stmt = NULL;
5630   if (is_pattern_stmt_p (orig_stmt_info))
5631     {
5632       /* We're replacing a statement in an existing pattern definition
5633          sequence.  */
5634       orig_pattern_stmt = orig_stmt_info->stmt;
5635       if (dump_enabled_p ())
5636         dump_printf_loc (MSG_NOTE, vect_location,
5637                          "replacing earlier pattern %G", orig_pattern_stmt);
5638
5639       /* To keep the book-keeping simple, just swap the lhs of the
5640          old and new statements, so that the old one has a valid but
5641          unused lhs.  */
5642       tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
5643       gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
5644       gimple_set_lhs (pattern_stmt, old_lhs);
5645
5646       if (dump_enabled_p ())
5647         dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
5648
5649       /* Switch to the statement that ORIG replaces.  */
5650       orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
5651
5652       /* We shouldn't be replacing the main pattern statement.  */
5653       gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
5654                   != orig_pattern_stmt);
5655     }
5656
5657   if (def_seq)
5658     for (gimple_stmt_iterator si = gsi_start (def_seq);
5659          !gsi_end_p (si); gsi_next (&si))
5660       {
5661         if (dump_enabled_p ())
5662           dump_printf_loc (MSG_NOTE, vect_location,
5663                            "extra pattern stmt: %G", gsi_stmt (si));
5664         stmt_vec_info pattern_stmt_info
5665           = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
5666                                     orig_stmt_info, pattern_vectype);
5667         /* Stmts in the def sequence are not vectorizable cycle or
5668            induction defs, instead they should all be vect_internal_def
5669            feeding the main pattern stmt which retains this def type.  */
5670         STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
5671       }
5672
5673   if (orig_pattern_stmt)
5674     {
5675       vect_init_pattern_stmt (vinfo, pattern_stmt,
5676                               orig_stmt_info, pattern_vectype);
5677
5678       /* Insert all the new pattern statements before the original one.  */
5679       gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5680       gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
5681                                                orig_def_seq);
5682       gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
5683       gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
5684
5685       /* Remove the pattern statement that this new pattern replaces.  */
5686       gsi_remove (&gsi, false);
5687     }
5688   else
5689     vect_set_pattern_stmt (vinfo,
5690                            pattern_stmt, orig_stmt_info, pattern_vectype);
5691
5692   /* Transfer reduction path info to the pattern.  */
5693   if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
5694     {
5695       gimple_match_op op;
5696       if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op))
5697         gcc_unreachable ();
5698       tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)];
5699       /* Search the pattern def sequence and the main pattern stmt.  Note
5700          we may have inserted all into a containing pattern def sequence
5701          so the following is a bit awkward.  */
5702       gimple_stmt_iterator si;
5703       gimple *s;
5704       if (def_seq)
5705         {
5706           si = gsi_start (def_seq);
5707           s = gsi_stmt (si);
5708           gsi_next (&si);
5709         }
5710       else
5711         {
5712           si = gsi_none ();
5713           s = pattern_stmt;
5714         }
5715       do
5716         {
5717           bool found = false;
5718           if (gimple_extract_op (s, &op))
5719             for (unsigned i = 0; i < op.num_ops; ++i)
5720               if (op.ops[i] == lookfor)
5721                 {
5722                   STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
5723                   lookfor = gimple_get_lhs (s);
5724                   found = true;
5725                   break;
5726                 }
5727           if (s == pattern_stmt)
5728             {
5729               if (!found && dump_enabled_p ())
5730                 dump_printf_loc (MSG_NOTE, vect_location,
5731                                  "failed to update reduction index.\n");
5732               break;
5733             }
5734           if (gsi_end_p (si))
5735             s = pattern_stmt;
5736           else
5737             {
5738               s = gsi_stmt (si);
5739               if (s == pattern_stmt)
5740                 /* Found the end inside a bigger pattern def seq.  */
5741                 si = gsi_none ();
5742               else
5743                 gsi_next (&si);
5744             }
5745         } while (1);
5746     }
5747 }
5748
5749 /* Function vect_pattern_recog_1
5750
5751    Input:
5752    PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
5753         computation pattern.
5754    STMT_INFO: A stmt from which the pattern search should start.
5755
5756    If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
5757    a sequence of statements that has the same functionality and can be
5758    used to replace STMT_INFO.  It returns the last statement in the sequence
5759    and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
5760    PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
5761    statement, having first checked that the target supports the new operation
5762    in that type.
5763
5764    This function also does some bookkeeping, as explained in the documentation
5765    for vect_recog_pattern.  */
5766
5767 static void
5768 vect_pattern_recog_1 (vec_info *vinfo,
5769                       vect_recog_func *recog_func, stmt_vec_info stmt_info)
5770 {
5771   gimple *pattern_stmt;
5772   loop_vec_info loop_vinfo;
5773   tree pattern_vectype;
5774
5775   /* If this statement has already been replaced with pattern statements,
5776      leave the original statement alone, since the first match wins.
5777      Instead try to match against the definition statements that feed
5778      the main pattern statement.  */
5779   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5780     {
5781       gimple_stmt_iterator gsi;
5782       for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5783            !gsi_end_p (gsi); gsi_next (&gsi))
5784         vect_pattern_recog_1 (vinfo, recog_func,
5785                               vinfo->lookup_stmt (gsi_stmt (gsi)));
5786       return;
5787     }
5788
5789   gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5790   pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
5791   if (!pattern_stmt)
5792     {
5793       /* Clear any half-formed pattern definition sequence.  */
5794       STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
5795       return;
5796     }
5797
5798   loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5799
5800   /* Found a vectorizable pattern.  */
5801   if (dump_enabled_p ())
5802     dump_printf_loc (MSG_NOTE, vect_location,
5803                      "%s pattern recognized: %G",
5804                      recog_func->name, pattern_stmt);
5805
5806   /* Mark the stmts that are involved in the pattern. */
5807   vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
5808
5809   /* Patterns cannot be vectorized using SLP, because they change the order of
5810      computation.  */
5811   if (loop_vinfo)
5812     {
5813       unsigned ix, ix2;
5814       stmt_vec_info *elem_ptr;
5815       VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
5816                              elem_ptr, *elem_ptr == stmt_info);
5817     }
5818 }
5819
5820
5821 /* Function vect_pattern_recog
5822
5823    Input:
5824    LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
5825         computation idioms.
5826
5827    Output - for each computation idiom that is detected we create a new stmt
5828         that provides the same functionality and that can be vectorized.  We
5829         also record some information in the struct_stmt_info of the relevant
5830         stmts, as explained below:
5831
5832    At the entry to this function we have the following stmts, with the
5833    following initial value in the STMT_VINFO fields:
5834
5835          stmt                     in_pattern_p  related_stmt    vec_stmt
5836          S1: a_i = ....                 -       -               -
5837          S2: a_2 = ..use(a_i)..         -       -               -
5838          S3: a_1 = ..use(a_2)..         -       -               -
5839          S4: a_0 = ..use(a_1)..         -       -               -
5840          S5: ... = ..use(a_0)..         -       -               -
5841
5842    Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
5843    represented by a single stmt.  We then:
5844    - create a new stmt S6 equivalent to the pattern (the stmt is not
5845      inserted into the code)
5846    - fill in the STMT_VINFO fields as follows:
5847
5848                                   in_pattern_p  related_stmt    vec_stmt
5849          S1: a_i = ....                 -       -               -
5850          S2: a_2 = ..use(a_i)..         -       -               -
5851          S3: a_1 = ..use(a_2)..         -       -               -
5852          S4: a_0 = ..use(a_1)..         true    S6              -
5853           '---> S6: a_new = ....        -       S4              -
5854          S5: ... = ..use(a_0)..         -       -               -
5855
5856    (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
5857    to each other through the RELATED_STMT field).
5858
5859    S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
5860    of S4 because it will replace all its uses.  Stmts {S1,S2,S3} will
5861    remain irrelevant unless used by stmts other than S4.
5862
5863    If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
5864    (because they are marked as irrelevant).  It will vectorize S6, and record
5865    a pointer to the new vector stmt VS6 from S6 (as usual).
5866    S4 will be skipped, and S5 will be vectorized as usual:
5867
5868                                   in_pattern_p  related_stmt    vec_stmt
5869          S1: a_i = ....                 -       -               -
5870          S2: a_2 = ..use(a_i)..         -       -               -
5871          S3: a_1 = ..use(a_2)..         -       -               -
5872        > VS6: va_new = ....             -       -               -
5873          S4: a_0 = ..use(a_1)..         true    S6              VS6
5874           '---> S6: a_new = ....        -       S4              VS6
5875        > VS5: ... = ..vuse(va_new)..    -       -               -
5876          S5: ... = ..use(a_0)..         -       -               -
5877
5878    DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
5879    elsewhere), and we'll end up with:
5880
5881         VS6: va_new = ....
5882         VS5: ... = ..vuse(va_new)..
5883
5884    In case of more than one pattern statements, e.g., widen-mult with
5885    intermediate type:
5886
5887      S1  a_t = ;
5888      S2  a_T = (TYPE) a_t;
5889            '--> S3: a_it = (interm_type) a_t;
5890      S4  prod_T = a_T * CONST;
5891            '--> S5: prod_T' = a_it w* CONST;
5892
5893    there may be other users of a_T outside the pattern.  In that case S2 will
5894    be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
5895    and vectorized.  The vector stmt VS2 will be recorded in S2, and VS3 will
5896    be recorded in S3.  */
5897
5898 void
5899 vect_pattern_recog (vec_info *vinfo)
5900 {
5901   class loop *loop;
5902   basic_block *bbs;
5903   unsigned int nbbs;
5904   gimple_stmt_iterator si;
5905   unsigned int i, j;
5906
5907   vect_determine_precisions (vinfo);
5908
5909   DUMP_VECT_SCOPE ("vect_pattern_recog");
5910
5911   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5912     {
5913       loop = LOOP_VINFO_LOOP (loop_vinfo);
5914       bbs = LOOP_VINFO_BBS (loop_vinfo);
5915       nbbs = loop->num_nodes;
5916
5917       /* Scan through the loop stmts, applying the pattern recognition
5918          functions starting at each stmt visited:  */
5919       for (i = 0; i < nbbs; i++)
5920         {
5921           basic_block bb = bbs[i];
5922           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5923             {
5924               if (is_gimple_debug (gsi_stmt (si)))
5925                 continue;
5926               stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
5927               /* Scan over all generic vect_recog_xxx_pattern functions.  */
5928               for (j = 0; j < NUM_PATTERNS; j++)
5929                 vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j],
5930                                       stmt_info);
5931             }
5932         }
5933     }
5934   else
5935     {
5936       bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5937       for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
5938         for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]);
5939              !gsi_end_p (gsi); gsi_next (&gsi))
5940           {
5941             stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (gsi));
5942             if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
5943               continue;
5944
5945             /* Scan over all generic vect_recog_xxx_pattern functions.  */
5946             for (j = 0; j < NUM_PATTERNS; j++)
5947               vect_pattern_recog_1 (vinfo,
5948                                     &vect_vect_recog_func_ptrs[j], stmt_info);
5949           }
5950     }
5951
5952   /* After this no more add_stmt calls are allowed.  */
5953   vinfo->stmt_vec_info_ro = true;
5954 }