hppa: Fix bug in atomic_storedi_1 pattern
[official-gcc.git] / gcc / tree-vect-patterns.cc
blobd562f57920f66fb31193cbb559e961cab93baf7f
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2024 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "gimple-iterator.h"
29 #include "gimple-fold.h"
30 #include "ssa.h"
31 #include "expmed.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "tree-eh.h"
38 #include "gimplify.h"
39 #include "gimple-iterator.h"
40 #include "gimple-fold.h"
41 #include "gimplify-me.h"
42 #include "cfgloop.h"
43 #include "tree-vectorizer.h"
44 #include "dumpfile.h"
45 #include "builtins.h"
46 #include "internal-fn.h"
47 #include "case-cfn-macros.h"
48 #include "fold-const-call.h"
49 #include "attribs.h"
50 #include "cgraph.h"
51 #include "omp-simd-clone.h"
52 #include "predict.h"
53 #include "tree-vector-builder.h"
54 #include "vec-perm-indices.h"
55 #include "gimple-range.h"
58 /* TODO: Note the vectorizer still builds COND_EXPRs with GENERIC compares
59 in the first operand. Disentangling this is future work, the
60 IL is properly transfered to VEC_COND_EXPRs with separate compares. */
63 /* Return true if we have a useful VR_RANGE range for VAR, storing it
64 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
66 bool
67 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
69 value_range vr;
70 tree vr_min, vr_max;
71 get_range_query (cfun)->range_of_expr (vr, var);
72 if (vr.undefined_p ())
73 vr.set_varying (TREE_TYPE (var));
74 value_range_kind vr_type = get_legacy_range (vr, vr_min, vr_max);
75 *min_value = wi::to_wide (vr_min);
76 *max_value = wi::to_wide (vr_max);
77 wide_int nonzero = get_nonzero_bits (var);
78 signop sgn = TYPE_SIGN (TREE_TYPE (var));
79 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
80 nonzero, sgn) == VR_RANGE)
82 if (dump_enabled_p ())
84 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
85 dump_printf (MSG_NOTE, " has range [");
86 dump_hex (MSG_NOTE, *min_value);
87 dump_printf (MSG_NOTE, ", ");
88 dump_hex (MSG_NOTE, *max_value);
89 dump_printf (MSG_NOTE, "]\n");
91 return true;
93 else
95 if (dump_enabled_p ())
97 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
98 dump_printf (MSG_NOTE, " has no range info\n");
100 return false;
104 /* Report that we've found an instance of pattern PATTERN in
105 statement STMT. */
107 static void
108 vect_pattern_detected (const char *name, gimple *stmt)
110 if (dump_enabled_p ())
111 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
114 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
115 return the pattern statement's stmt_vec_info. Set its vector type to
116 VECTYPE if it doesn't have one already. */
118 static stmt_vec_info
119 vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
120 stmt_vec_info orig_stmt_info, tree vectype)
122 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
123 if (pattern_stmt_info == NULL)
124 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
125 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
127 pattern_stmt_info->pattern_stmt_p = true;
128 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
129 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
130 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
131 STMT_VINFO_TYPE (pattern_stmt_info) = STMT_VINFO_TYPE (orig_stmt_info);
132 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
134 gcc_assert (!vectype
135 || is_a <gcond *> (pattern_stmt)
136 || (VECTOR_BOOLEAN_TYPE_P (vectype)
137 == vect_use_mask_type_p (orig_stmt_info)));
138 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
139 pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
141 return pattern_stmt_info;
144 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
145 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
146 have one already. */
148 static void
149 vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
150 stmt_vec_info orig_stmt_info, tree vectype)
152 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
153 STMT_VINFO_RELATED_STMT (orig_stmt_info)
154 = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
157 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
158 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
159 be different from the vector type of the final pattern statement.
160 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
161 from which it was derived. */
163 static inline void
164 append_pattern_def_seq (vec_info *vinfo,
165 stmt_vec_info stmt_info, gimple *new_stmt,
166 tree vectype = NULL_TREE,
167 tree scalar_type_for_mask = NULL_TREE)
169 gcc_assert (!scalar_type_for_mask
170 == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
171 if (vectype)
173 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
174 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
175 if (scalar_type_for_mask)
176 new_stmt_info->mask_precision
177 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
179 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
180 new_stmt);
183 /* The caller wants to perform new operations on vect_external variable
184 VAR, so that the result of the operations would also be vect_external.
185 Return the edge on which the operations can be performed, if one exists.
186 Return null if the operations should instead be treated as part of
187 the pattern that needs them. */
189 static edge
190 vect_get_external_def_edge (vec_info *vinfo, tree var)
192 edge e = NULL;
193 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
195 e = loop_preheader_edge (loop_vinfo->loop);
196 if (!SSA_NAME_IS_DEFAULT_DEF (var))
198 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
199 if (bb == NULL
200 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
201 e = NULL;
204 return e;
207 /* Return true if the target supports a vector version of CODE,
208 where CODE is known to map to a direct optab with the given SUBTYPE.
209 ITYPE specifies the type of (some of) the scalar inputs and OTYPE
210 specifies the type of the scalar result.
212 If CODE allows the inputs and outputs to have different type
213 (such as for WIDEN_SUM_EXPR), it is the input mode rather
214 than the output mode that determines the appropriate target pattern.
215 Operand 0 of the target pattern then specifies the mode that the output
216 must have.
218 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
219 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
220 is nonnull. */
222 static bool
223 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
224 tree itype, tree *vecotype_out,
225 tree *vecitype_out = NULL,
226 enum optab_subtype subtype = optab_default)
228 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
229 if (!vecitype)
230 return false;
232 tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
233 if (!vecotype)
234 return false;
236 optab optab = optab_for_tree_code (code, vecitype, subtype);
237 if (!optab)
238 return false;
240 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
241 if (icode == CODE_FOR_nothing
242 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
243 return false;
245 *vecotype_out = vecotype;
246 if (vecitype_out)
247 *vecitype_out = vecitype;
248 return true;
251 /* Round bit precision PRECISION up to a full element. */
253 static unsigned int
254 vect_element_precision (unsigned int precision)
256 precision = 1 << ceil_log2 (precision);
257 return MAX (precision, BITS_PER_UNIT);
260 /* If OP is defined by a statement that's being considered for vectorization,
261 return information about that statement, otherwise return NULL. */
263 static stmt_vec_info
264 vect_get_internal_def (vec_info *vinfo, tree op)
266 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
267 if (def_stmt_info
268 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
269 return def_stmt_info;
270 return NULL;
273 /* Check whether NAME, an ssa-name used in STMT_VINFO,
274 is a result of a type promotion, such that:
275 DEF_STMT: NAME = NOP (name0)
276 If CHECK_SIGN is TRUE, check that either both types are signed or both are
277 unsigned. */
279 static bool
280 type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
281 tree *orig_type, gimple **def_stmt, bool *promotion)
283 tree type = TREE_TYPE (name);
284 tree oprnd0;
285 enum vect_def_type dt;
287 stmt_vec_info def_stmt_info;
288 if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
289 return false;
291 if (dt != vect_internal_def
292 && dt != vect_external_def && dt != vect_constant_def)
293 return false;
295 if (!*def_stmt)
296 return false;
298 if (!is_gimple_assign (*def_stmt))
299 return false;
301 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
302 return false;
304 oprnd0 = gimple_assign_rhs1 (*def_stmt);
306 *orig_type = TREE_TYPE (oprnd0);
307 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
308 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
309 return false;
311 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
312 *promotion = true;
313 else
314 *promotion = false;
316 if (!vect_is_simple_use (oprnd0, vinfo, &dt))
317 return false;
319 return true;
322 /* Holds information about an input operand after some sign changes
323 and type promotions have been peeled away. */
324 class vect_unpromoted_value {
325 public:
326 vect_unpromoted_value ();
328 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
330 /* The value obtained after peeling away zero or more casts. */
331 tree op;
333 /* The type of OP. */
334 tree type;
336 /* The definition type of OP. */
337 vect_def_type dt;
339 /* If OP is the result of peeling at least one cast, and if the cast
340 of OP itself is a vectorizable statement, CASTER identifies that
341 statement, otherwise it is null. */
342 stmt_vec_info caster;
345 inline vect_unpromoted_value::vect_unpromoted_value ()
346 : op (NULL_TREE),
347 type (NULL_TREE),
348 dt (vect_uninitialized_def),
349 caster (NULL)
353 /* Set the operand to OP_IN, its definition type to DT_IN, and the
354 statement that casts it to CASTER_IN. */
356 inline void
357 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
358 stmt_vec_info caster_in)
360 op = op_in;
361 type = TREE_TYPE (op);
362 dt = dt_in;
363 caster = caster_in;
366 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
367 to reach some vectorizable inner operand OP', continuing as long as it
368 is possible to convert OP' back to OP using a possible sign change
369 followed by a possible promotion P. Return this OP', or null if OP is
370 not a vectorizable SSA name. If there is a promotion P, describe its
371 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
372 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
373 have more than one user.
375 A successful return means that it is possible to go from OP' to OP
376 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
377 whereas the cast from UNPROM to OP might be a promotion, a sign
378 change, or a nop.
380 E.g. say we have:
382 signed short *ptr = ...;
383 signed short C = *ptr;
384 unsigned short B = (unsigned short) C; // sign change
385 signed int A = (signed int) B; // unsigned promotion
386 ...possible other uses of A...
387 unsigned int OP = (unsigned int) A; // sign change
389 In this case it's possible to go directly from C to OP using:
391 OP = (unsigned int) (unsigned short) C;
392 +------------+ +--------------+
393 promotion sign change
395 so OP' would be C. The input to the promotion is B, so UNPROM
396 would describe B. */
398 static tree
399 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
400 vect_unpromoted_value *unprom,
401 bool *single_use_p = NULL)
403 tree op_type = TREE_TYPE (op);
404 if (!INTEGRAL_TYPE_P (op_type))
405 return NULL_TREE;
407 tree res = NULL_TREE;
408 unsigned int orig_precision = TYPE_PRECISION (op_type);
409 unsigned int min_precision = orig_precision;
410 stmt_vec_info caster = NULL;
411 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
413 /* See whether OP is simple enough to vectorize. */
414 stmt_vec_info def_stmt_info;
415 gimple *def_stmt;
416 vect_def_type dt;
417 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
418 break;
420 /* If OP is the input of a demotion, skip over it to see whether
421 OP is itself the result of a promotion. If so, the combined
422 effect of the promotion and the demotion might fit the required
423 pattern, otherwise neither operation fits.
425 This copes with cases such as the result of an arithmetic
426 operation being truncated before being stored, and where that
427 arithmetic operation has been recognized as an over-widened one. */
428 if (TYPE_PRECISION (op_type) <= min_precision)
430 /* Use OP as the UNPROM described above if we haven't yet
431 found a promotion, or if using the new input preserves the
432 sign of the previous promotion. */
433 if (!res
434 || TYPE_PRECISION (unprom->type) == orig_precision
435 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
437 unprom->set_op (op, dt, caster);
438 min_precision = TYPE_PRECISION (op_type);
440 /* Stop if we've already seen a promotion and if this
441 conversion does more than change the sign. */
442 else if (TYPE_PRECISION (op_type)
443 != TYPE_PRECISION (unprom->type))
444 break;
446 /* The sequence now extends to OP. */
447 res = op;
450 /* See whether OP is defined by a cast. Record it as CASTER if
451 the cast is potentially vectorizable. */
452 if (!def_stmt)
453 break;
454 caster = def_stmt_info;
456 /* Ignore pattern statements, since we don't link uses for them. */
457 if (caster
458 && single_use_p
459 && !STMT_VINFO_RELATED_STMT (caster)
460 && !has_single_use (res))
461 *single_use_p = false;
463 gassign *assign = dyn_cast <gassign *> (def_stmt);
464 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
465 break;
467 /* Continue with the input to the cast. */
468 op = gimple_assign_rhs1 (def_stmt);
469 op_type = TREE_TYPE (op);
471 return res;
474 /* OP is an integer operand to an operation that returns TYPE, and we
475 want to treat the operation as a widening one. So far we can treat
476 it as widening from *COMMON_TYPE.
478 Return true if OP is suitable for such a widening operation,
479 either widening from *COMMON_TYPE or from some supertype of it.
480 Update *COMMON_TYPE to the supertype in the latter case.
482 SHIFT_P is true if OP is a shift amount. */
484 static bool
485 vect_joust_widened_integer (tree type, bool shift_p, tree op,
486 tree *common_type)
488 /* Calculate the minimum precision required by OP, without changing
489 the sign of either operand. */
490 unsigned int precision;
491 if (shift_p)
493 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
494 return false;
495 precision = TREE_INT_CST_LOW (op);
497 else
499 precision = wi::min_precision (wi::to_widest (op),
500 TYPE_SIGN (*common_type));
501 if (precision * 2 > TYPE_PRECISION (type))
502 return false;
505 /* If OP requires a wider type, switch to that type. The checks
506 above ensure that this is still narrower than the result. */
507 precision = vect_element_precision (precision);
508 if (TYPE_PRECISION (*common_type) < precision)
509 *common_type = build_nonstandard_integer_type
510 (precision, TYPE_UNSIGNED (*common_type));
511 return true;
514 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
515 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
517 static bool
518 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
520 if (types_compatible_p (*common_type, new_type))
521 return true;
523 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
524 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
525 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
526 return true;
528 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
529 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
530 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
532 *common_type = new_type;
533 return true;
536 /* We have mismatched signs, with the signed type being
537 no wider than the unsigned type. In this case we need
538 a wider signed type. */
539 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
540 TYPE_PRECISION (new_type));
541 precision *= 2;
543 if (precision * 2 > TYPE_PRECISION (type))
544 return false;
546 *common_type = build_nonstandard_integer_type (precision, false);
547 return true;
550 /* Check whether STMT_INFO can be viewed as a tree of integer operations
551 in which each node either performs CODE or WIDENED_CODE, and where
552 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
553 specifies the maximum number of leaf operands. SHIFT_P says whether
554 CODE and WIDENED_CODE are some sort of shift.
556 If STMT_INFO is such a tree, return the number of leaf operands
557 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
558 to a type that (a) is narrower than the result of STMT_INFO and
559 (b) can hold all leaf operand values.
561 If SUBTYPE then allow that the signs of the operands
562 may differ in signs but not in precision. SUBTYPE is updated to reflect
563 this.
565 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
566 exists. */
568 static unsigned int
569 vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
570 code_helper widened_code, bool shift_p,
571 unsigned int max_nops,
572 vect_unpromoted_value *unprom, tree *common_type,
573 enum optab_subtype *subtype = NULL)
575 /* Check for an integer operation with the right code. */
576 gimple* stmt = stmt_info->stmt;
577 if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
578 return 0;
580 code_helper rhs_code;
581 if (is_gimple_assign (stmt))
582 rhs_code = gimple_assign_rhs_code (stmt);
583 else if (is_gimple_call (stmt))
584 rhs_code = gimple_call_combined_fn (stmt);
585 else
586 return 0;
588 if (rhs_code != code
589 && rhs_code != widened_code)
590 return 0;
592 tree lhs = gimple_get_lhs (stmt);
593 tree type = TREE_TYPE (lhs);
594 if (!INTEGRAL_TYPE_P (type))
595 return 0;
597 /* Assume that both operands will be leaf operands. */
598 max_nops -= 2;
600 /* Check the operands. */
601 unsigned int next_op = 0;
602 for (unsigned int i = 0; i < 2; ++i)
604 vect_unpromoted_value *this_unprom = &unprom[next_op];
605 unsigned int nops = 1;
606 tree op = gimple_arg (stmt, i);
607 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
609 /* We already have a common type from earlier operands.
610 Update it to account for OP. */
611 this_unprom->set_op (op, vect_constant_def);
612 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
613 return 0;
615 else
617 /* Only allow shifts by constants. */
618 if (shift_p && i == 1)
619 return 0;
621 if (rhs_code != code)
623 /* If rhs_code is widened_code, don't look through further
624 possible promotions, there is a promotion already embedded
625 in the WIDEN_*_EXPR. */
626 if (TREE_CODE (op) != SSA_NAME
627 || !INTEGRAL_TYPE_P (TREE_TYPE (op)))
628 return 0;
630 stmt_vec_info def_stmt_info;
631 gimple *def_stmt;
632 vect_def_type dt;
633 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info,
634 &def_stmt))
635 return 0;
636 this_unprom->set_op (op, dt, NULL);
638 else if (!vect_look_through_possible_promotion (vinfo, op,
639 this_unprom))
640 return 0;
642 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
644 /* The operand isn't widened. If STMT_INFO has the code
645 for an unwidened operation, recursively check whether
646 this operand is a node of the tree. */
647 if (rhs_code != code
648 || max_nops == 0
649 || this_unprom->dt != vect_internal_def)
650 return 0;
652 /* Give back the leaf slot allocated above now that we're
653 not treating this as a leaf operand. */
654 max_nops += 1;
656 /* Recursively process the definition of the operand. */
657 stmt_vec_info def_stmt_info
658 = vinfo->lookup_def (this_unprom->op);
659 nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
660 widened_code, shift_p, max_nops,
661 this_unprom, common_type,
662 subtype);
663 if (nops == 0)
664 return 0;
666 max_nops -= nops;
668 else
670 /* Make sure that the operand is narrower than the result. */
671 if (TYPE_PRECISION (this_unprom->type) * 2
672 > TYPE_PRECISION (type))
673 return 0;
675 /* Update COMMON_TYPE for the new operand. */
676 if (i == 0)
677 *common_type = this_unprom->type;
678 else if (!vect_joust_widened_type (type, this_unprom->type,
679 common_type))
681 if (subtype)
683 /* See if we can sign extend the smaller type. */
684 if (TYPE_PRECISION (this_unprom->type)
685 > TYPE_PRECISION (*common_type))
686 *common_type = this_unprom->type;
687 *subtype = optab_vector_mixed_sign;
689 else
690 return 0;
694 next_op += nops;
696 return next_op;
699 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
700 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
702 static tree
703 vect_recog_temp_ssa_var (tree type, gimple *stmt = NULL)
705 return make_temp_ssa_name (type, stmt, "patt");
708 /* STMT2_INFO describes a type conversion that could be split into STMT1
709 followed by a version of STMT2_INFO that takes NEW_RHS as its first
710 input. Try to do this using pattern statements, returning true on
711 success. */
713 static bool
714 vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
715 gimple *stmt1, tree vectype)
717 if (is_pattern_stmt_p (stmt2_info))
719 /* STMT2_INFO is part of a pattern. Get the statement to which
720 the pattern is attached. */
721 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
722 vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
724 if (dump_enabled_p ())
725 dump_printf_loc (MSG_NOTE, vect_location,
726 "Splitting pattern statement: %G", stmt2_info->stmt);
728 /* Since STMT2_INFO is a pattern statement, we can change it
729 in-situ without worrying about changing the code for the
730 containing block. */
731 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
733 if (dump_enabled_p ())
735 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
736 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
737 stmt2_info->stmt);
740 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
741 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
742 /* STMT2_INFO is the actual pattern statement. Add STMT1
743 to the end of the definition sequence. */
744 gimple_seq_add_stmt_without_update (def_seq, stmt1);
745 else
747 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
748 before it. */
749 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
750 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
752 return true;
754 else
756 /* STMT2_INFO doesn't yet have a pattern. Try to create a
757 two-statement pattern now. */
758 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
759 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
760 tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
761 if (!lhs_vectype)
762 return false;
764 if (dump_enabled_p ())
765 dump_printf_loc (MSG_NOTE, vect_location,
766 "Splitting statement: %G", stmt2_info->stmt);
768 /* Add STMT1 as a singleton pattern definition sequence. */
769 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
770 vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
771 gimple_seq_add_stmt_without_update (def_seq, stmt1);
773 /* Build the second of the two pattern statements. */
774 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
775 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
776 vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
778 if (dump_enabled_p ())
780 dump_printf_loc (MSG_NOTE, vect_location,
781 "into pattern statements: %G", stmt1);
782 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
783 (gimple *) new_stmt2);
786 return true;
790 /* Look for the following pattern
791 X = x[i]
792 Y = y[i]
793 DIFF = X - Y
794 DAD = ABS_EXPR<DIFF>
796 ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
797 HALF_TYPE and UNPROM will be set should the statement be found to
798 be a widened operation.
799 DIFF_STMT will be set to the MINUS_EXPR
800 statement that precedes the ABS_STMT unless vect_widened_op_tree
801 succeeds.
803 static bool
804 vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
805 tree *half_type,
806 vect_unpromoted_value unprom[2],
807 gassign **diff_stmt)
809 if (!abs_stmt)
810 return false;
812 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
813 inside the loop (in case we are analyzing an outer-loop). */
814 enum tree_code code = gimple_assign_rhs_code (abs_stmt);
815 if (code != ABS_EXPR && code != ABSU_EXPR)
816 return false;
818 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
819 tree abs_type = TREE_TYPE (abs_oprnd);
820 if (!abs_oprnd)
821 return false;
822 if (!ANY_INTEGRAL_TYPE_P (abs_type)
823 || TYPE_OVERFLOW_WRAPS (abs_type)
824 || TYPE_UNSIGNED (abs_type))
825 return false;
827 /* Peel off conversions from the ABS input. This can involve sign
828 changes (e.g. from an unsigned subtraction to a signed ABS input)
829 or signed promotion, but it can't include unsigned promotion.
830 (Note that ABS of an unsigned promotion should have been folded
831 away before now anyway.) */
832 vect_unpromoted_value unprom_diff;
833 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
834 &unprom_diff);
835 if (!abs_oprnd)
836 return false;
837 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
838 && TYPE_UNSIGNED (unprom_diff.type))
839 return false;
841 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
842 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
843 if (!diff_stmt_vinfo)
844 return false;
846 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
847 inside the loop (in case we are analyzing an outer-loop). */
848 if (vect_widened_op_tree (vinfo, diff_stmt_vinfo,
849 MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
850 false, 2, unprom, half_type))
851 return true;
853 /* Failed to find a widen operation so we check for a regular MINUS_EXPR. */
854 gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
855 if (diff_stmt && diff
856 && gimple_assign_rhs_code (diff) == MINUS_EXPR
857 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd)))
859 *diff_stmt = diff;
860 *half_type = NULL_TREE;
861 return true;
864 return false;
867 /* Convert UNPROM to TYPE and return the result, adding new statements
868 to STMT_INFO's pattern definition statements if no better way is
869 available. VECTYPE is the vector form of TYPE.
871 If SUBTYPE then convert the type based on the subtype. */
873 static tree
874 vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
875 vect_unpromoted_value *unprom, tree vectype,
876 enum optab_subtype subtype = optab_default)
878 /* Update the type if the signs differ. */
879 if (subtype == optab_vector_mixed_sign)
881 gcc_assert (!TYPE_UNSIGNED (type));
882 if (TYPE_UNSIGNED (TREE_TYPE (unprom->op)))
884 type = unsigned_type_for (type);
885 vectype = unsigned_type_for (vectype);
889 /* Check for a no-op conversion. */
890 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
891 return unprom->op;
893 /* Allow the caller to create constant vect_unpromoted_values. */
894 if (TREE_CODE (unprom->op) == INTEGER_CST)
895 return wide_int_to_tree (type, wi::to_widest (unprom->op));
897 tree input = unprom->op;
898 if (unprom->caster)
900 tree lhs = gimple_get_lhs (unprom->caster->stmt);
901 tree lhs_type = TREE_TYPE (lhs);
903 /* If the result of the existing cast is the right width, use it
904 instead of the source of the cast. */
905 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
906 input = lhs;
907 /* If the precision we want is between the source and result
908 precisions of the existing cast, try splitting the cast into
909 two and tapping into a mid-way point. */
910 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
911 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
913 /* In order to preserve the semantics of the original cast,
914 give the mid-way point the same signedness as the input value.
916 It would be possible to use a signed type here instead if
917 TYPE is signed and UNPROM->TYPE is unsigned, but that would
918 make the sign of the midtype sensitive to the order in
919 which we process the statements, since the signedness of
920 TYPE is the signedness required by just one of possibly
921 many users. Also, unsigned promotions are usually as cheap
922 as or cheaper than signed ones, so it's better to keep an
923 unsigned promotion. */
924 tree midtype = build_nonstandard_integer_type
925 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
926 tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
927 if (vec_midtype)
929 input = vect_recog_temp_ssa_var (midtype, NULL);
930 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
931 unprom->op);
932 if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
933 vec_midtype))
934 append_pattern_def_seq (vinfo, stmt_info,
935 new_stmt, vec_midtype);
939 /* See if we can reuse an existing result. */
940 if (types_compatible_p (type, TREE_TYPE (input)))
941 return input;
944 /* We need a new conversion statement. */
945 tree new_op = vect_recog_temp_ssa_var (type, NULL);
946 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
948 /* If OP is an external value, see if we can insert the new statement
949 on an incoming edge. */
950 if (input == unprom->op && unprom->dt == vect_external_def)
951 if (edge e = vect_get_external_def_edge (vinfo, input))
953 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
954 gcc_assert (!new_bb);
955 return new_op;
958 /* As a (common) last resort, add the statement to the pattern itself. */
959 append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
960 return new_op;
963 /* Invoke vect_convert_input for N elements of UNPROM and store the
964 result in the corresponding elements of RESULT.
966 If SUBTYPE then convert the type based on the subtype. */
968 static void
969 vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
970 tree *result, tree type, vect_unpromoted_value *unprom,
971 tree vectype, enum optab_subtype subtype = optab_default)
973 for (unsigned int i = 0; i < n; ++i)
975 unsigned int j;
976 for (j = 0; j < i; ++j)
977 if (unprom[j].op == unprom[i].op)
978 break;
980 if (j < i)
981 result[i] = result[j];
982 else
983 result[i] = vect_convert_input (vinfo, stmt_info,
984 type, &unprom[i], vectype, subtype);
988 /* The caller has created a (possibly empty) sequence of pattern definition
989 statements followed by a single statement PATTERN_STMT. Cast the result
990 of this final statement to TYPE. If a new statement is needed, add
991 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
992 and return the new statement, otherwise return PATTERN_STMT as-is.
993 VECITYPE is the vector form of PATTERN_STMT's result type. */
995 static gimple *
996 vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
997 gimple *pattern_stmt, tree vecitype)
999 tree lhs = gimple_get_lhs (pattern_stmt);
1000 if (!types_compatible_p (type, TREE_TYPE (lhs)))
1002 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
1003 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
1004 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
1006 return pattern_stmt;
1009 /* Return true if STMT_VINFO describes a reduction for which reassociation
1010 is allowed. If STMT_INFO is part of a group, assume that it's part of
1011 a reduction chain and optimistically assume that all statements
1012 except the last allow reassociation.
1013 Also require it to have code CODE and to be a reduction
1014 in the outermost loop. When returning true, store the operands in
1015 *OP0_OUT and *OP1_OUT. */
1017 static bool
1018 vect_reassociating_reduction_p (vec_info *vinfo,
1019 stmt_vec_info stmt_info, tree_code code,
1020 tree *op0_out, tree *op1_out)
1022 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
1023 if (!loop_info)
1024 return false;
1026 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
1027 if (!assign || gimple_assign_rhs_code (assign) != code)
1028 return false;
1030 /* We don't allow changing the order of the computation in the inner-loop
1031 when doing outer-loop vectorization. */
1032 class loop *loop = LOOP_VINFO_LOOP (loop_info);
1033 if (loop && nested_in_vect_loop_p (loop, stmt_info))
1034 return false;
1036 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
1038 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
1039 code))
1040 return false;
1042 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
1043 return false;
1045 *op0_out = gimple_assign_rhs1 (assign);
1046 *op1_out = gimple_assign_rhs2 (assign);
1047 if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
1048 std::swap (*op0_out, *op1_out);
1049 return true;
1052 /* match.pd function to match
1053 (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
1054 with conditions:
1055 1) @1, @2, c, d, a, b are all integral type.
1056 2) There's single_use for both @1 and @2.
1057 3) a, c have same precision.
1058 4) c and @1 have different precision.
1059 5) c, d are the same type or they can differ in sign when convert is
1060 truncation.
1062 record a and c and d and @3. */
1064 extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree));
1066 /* Function vect_recog_cond_expr_convert
1068 Try to find the following pattern:
1070 TYPE_AB A,B;
1071 TYPE_CD C,D;
1072 TYPE_E E;
1073 TYPE_E op_true = (TYPE_E) A;
1074 TYPE_E op_false = (TYPE_E) B;
1076 E = C cmp D ? op_true : op_false;
1078 where
1079 TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
1080 TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
1081 single_use of op_true and op_false.
1082 TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
1084 Input:
1086 * STMT_VINFO: The stmt from which the pattern search begins.
1087 here it starts with E = c cmp D ? op_true : op_false;
1089 Output:
1091 TYPE1 E' = C cmp D ? A : B;
1092 TYPE3 E = (TYPE3) E';
1094 There may extra nop_convert for A or B to handle different signness.
1096 * TYPE_OUT: The vector type of the output of this pattern.
1098 * Return value: A new stmt that will be used to replace the sequence of
1099 stmts that constitute the pattern. In this case it will be:
1100 E = (TYPE3)E';
1101 E' = C cmp D ? A : B; is recorded in pattern definition statements; */
1103 static gimple *
1104 vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
1105 stmt_vec_info stmt_vinfo, tree *type_out)
1107 gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
1108 tree lhs, match[4], temp, type, new_lhs, op2;
1109 gimple *cond_stmt;
1110 gimple *pattern_stmt;
1112 if (!last_stmt)
1113 return NULL;
1115 lhs = gimple_assign_lhs (last_stmt);
1117 /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
1118 TYPE_PRECISION (A) == TYPE_PRECISION (C). */
1119 if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
1120 return NULL;
1122 vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
1124 op2 = match[2];
1125 type = TREE_TYPE (match[1]);
1126 if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
1128 op2 = vect_recog_temp_ssa_var (type, NULL);
1129 gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
1130 append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt,
1131 get_vectype_for_scalar_type (vinfo, type));
1134 temp = vect_recog_temp_ssa_var (type, NULL);
1135 cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
1136 match[1], op2));
1137 append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt,
1138 get_vectype_for_scalar_type (vinfo, type));
1139 new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
1140 pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp);
1141 *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
1143 if (dump_enabled_p ())
1144 dump_printf_loc (MSG_NOTE, vect_location,
1145 "created pattern stmt: %G", pattern_stmt);
1146 return pattern_stmt;
1149 /* Function vect_recog_dot_prod_pattern
1151 Try to find the following pattern:
1153 type1a x_t
1154 type1b y_t;
1155 TYPE1 prod;
1156 TYPE2 sum = init;
1157 loop:
1158 sum_0 = phi <init, sum_1>
1159 S1 x_t = ...
1160 S2 y_t = ...
1161 S3 x_T = (TYPE1) x_t;
1162 S4 y_T = (TYPE1) y_t;
1163 S5 prod = x_T * y_T;
1164 [S6 prod = (TYPE2) prod; #optional]
1165 S7 sum_1 = prod + sum_0;
1167 where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
1168 the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
1169 'type1a' and 'type1b' can differ.
1171 Input:
1173 * STMT_VINFO: The stmt from which the pattern search begins. In the
1174 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
1175 will be detected.
1177 Output:
1179 * TYPE_OUT: The type of the output of this pattern.
1181 * Return value: A new stmt that will be used to replace the sequence of
1182 stmts that constitute the pattern. In this case it will be:
1183 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
1185 Note: The dot-prod idiom is a widening reduction pattern that is
1186 vectorized without preserving all the intermediate results. It
1187 produces only N/2 (widened) results (by summing up pairs of
1188 intermediate results) rather than all N results. Therefore, we
1189 cannot allow this pattern when we want to get all the results and in
1190 the correct order (as is the case when this computation is in an
1191 inner-loop nested in an outer-loop that us being vectorized). */
1193 static gimple *
1194 vect_recog_dot_prod_pattern (vec_info *vinfo,
1195 stmt_vec_info stmt_vinfo, tree *type_out)
1197 tree oprnd0, oprnd1;
1198 gimple *last_stmt = stmt_vinfo->stmt;
1199 tree type, half_type;
1200 gimple *pattern_stmt;
1201 tree var;
1203 /* Look for the following pattern
1204 DX = (TYPE1) X;
1205 DY = (TYPE1) Y;
1206 DPROD = DX * DY;
1207 DDPROD = (TYPE2) DPROD;
1208 sum_1 = DDPROD + sum_0;
1209 In which
1210 - DX is double the size of X
1211 - DY is double the size of Y
1212 - DX, DY, DPROD all have the same type but the sign
1213 between X, Y and DPROD can differ.
1214 - sum is the same size of DPROD or bigger
1215 - sum has been recognized as a reduction variable.
1217 This is equivalent to:
1218 DPROD = X w* Y; #widen mult
1219 sum_1 = DPROD w+ sum_0; #widen summation
1221 DPROD = X w* Y; #widen mult
1222 sum_1 = DPROD + sum_0; #summation
1225 /* Starting from LAST_STMT, follow the defs of its uses in search
1226 of the above pattern. */
1228 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1229 &oprnd0, &oprnd1))
1230 return NULL;
1232 type = TREE_TYPE (gimple_get_lhs (last_stmt));
1234 vect_unpromoted_value unprom_mult;
1235 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
1237 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1238 we know that oprnd1 is the reduction variable (defined by a loop-header
1239 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1240 Left to check that oprnd0 is defined by a (widen_)mult_expr */
1241 if (!oprnd0)
1242 return NULL;
1244 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
1245 if (!mult_vinfo)
1246 return NULL;
1248 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1249 inside the loop (in case we are analyzing an outer-loop). */
1250 vect_unpromoted_value unprom0[2];
1251 enum optab_subtype subtype = optab_vector;
1252 if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
1253 false, 2, unprom0, &half_type, &subtype))
1254 return NULL;
1256 /* If there are two widening operations, make sure they agree on the sign
1257 of the extension. The result of an optab_vector_mixed_sign operation
1258 is signed; otherwise, the result has the same sign as the operands. */
1259 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
1260 && (subtype == optab_vector_mixed_sign
1261 ? TYPE_UNSIGNED (unprom_mult.type)
1262 : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
1263 return NULL;
1265 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
1267 /* If the inputs have mixed signs, canonicalize on using the signed
1268 input type for analysis. This also helps when emulating mixed-sign
1269 operations using signed operations. */
1270 if (subtype == optab_vector_mixed_sign)
1271 half_type = signed_type_for (half_type);
1273 tree half_vectype;
1274 if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1275 type_out, &half_vectype, subtype))
1277 /* We can emulate a mixed-sign dot-product using a sequence of
1278 signed dot-products; see vect_emulate_mixed_dot_prod for details. */
1279 if (subtype != optab_vector_mixed_sign
1280 || !vect_supportable_direct_optab_p (vinfo, signed_type_for (type),
1281 DOT_PROD_EXPR, half_type,
1282 type_out, &half_vectype,
1283 optab_vector))
1284 return NULL;
1286 *type_out = signed_or_unsigned_type_for (TYPE_UNSIGNED (type),
1287 *type_out);
1290 /* Get the inputs in the appropriate types. */
1291 tree mult_oprnd[2];
1292 vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
1293 unprom0, half_vectype, subtype);
1295 var = vect_recog_temp_ssa_var (type, NULL);
1296 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1297 mult_oprnd[0], mult_oprnd[1], oprnd1);
1299 return pattern_stmt;
1303 /* Function vect_recog_sad_pattern
1305 Try to find the following Sum of Absolute Difference (SAD) pattern:
1307 type x_t, y_t;
1308 signed TYPE1 diff, abs_diff;
1309 TYPE2 sum = init;
1310 loop:
1311 sum_0 = phi <init, sum_1>
1312 S1 x_t = ...
1313 S2 y_t = ...
1314 S3 x_T = (TYPE1) x_t;
1315 S4 y_T = (TYPE1) y_t;
1316 S5 diff = x_T - y_T;
1317 S6 abs_diff = ABS_EXPR <diff>;
1318 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1319 S8 sum_1 = abs_diff + sum_0;
1321 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1322 same size of 'TYPE1' or bigger. This is a special case of a reduction
1323 computation.
1325 Input:
1327 * STMT_VINFO: The stmt from which the pattern search begins. In the
1328 example, when this function is called with S8, the pattern
1329 {S3,S4,S5,S6,S7,S8} will be detected.
1331 Output:
1333 * TYPE_OUT: The type of the output of this pattern.
1335 * Return value: A new stmt that will be used to replace the sequence of
1336 stmts that constitute the pattern. In this case it will be:
1337 SAD_EXPR <x_t, y_t, sum_0>
1340 static gimple *
1341 vect_recog_sad_pattern (vec_info *vinfo,
1342 stmt_vec_info stmt_vinfo, tree *type_out)
1344 gimple *last_stmt = stmt_vinfo->stmt;
1345 tree half_type;
1347 /* Look for the following pattern
1348 DX = (TYPE1) X;
1349 DY = (TYPE1) Y;
1350 DDIFF = DX - DY;
1351 DAD = ABS_EXPR <DDIFF>;
1352 DDPROD = (TYPE2) DPROD;
1353 sum_1 = DAD + sum_0;
1354 In which
1355 - DX is at least double the size of X
1356 - DY is at least double the size of Y
1357 - DX, DY, DDIFF, DAD all have the same type
1358 - sum is the same size of DAD or bigger
1359 - sum has been recognized as a reduction variable.
1361 This is equivalent to:
1362 DDIFF = X w- Y; #widen sub
1363 DAD = ABS_EXPR <DDIFF>;
1364 sum_1 = DAD w+ sum_0; #widen summation
1366 DDIFF = X w- Y; #widen sub
1367 DAD = ABS_EXPR <DDIFF>;
1368 sum_1 = DAD + sum_0; #summation
1371 /* Starting from LAST_STMT, follow the defs of its uses in search
1372 of the above pattern. */
1374 tree plus_oprnd0, plus_oprnd1;
1375 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1376 &plus_oprnd0, &plus_oprnd1))
1377 return NULL;
1379 tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
1381 /* Any non-truncating sequence of conversions is OK here, since
1382 with a successful match, the result of the ABS(U) is known to fit
1383 within the nonnegative range of the result type. (It cannot be the
1384 negative of the minimum signed value due to the range of the widening
1385 MINUS_EXPR.) */
1386 vect_unpromoted_value unprom_abs;
1387 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1388 &unprom_abs);
1390 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1391 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1392 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1393 Then check that plus_oprnd0 is defined by an abs_expr. */
1395 if (!plus_oprnd0)
1396 return NULL;
1398 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1399 if (!abs_stmt_vinfo)
1400 return NULL;
1402 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1403 inside the loop (in case we are analyzing an outer-loop). */
1404 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1405 vect_unpromoted_value unprom[2];
1407 if (!abs_stmt)
1409 gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
1410 if (!abd_stmt
1411 || !gimple_call_internal_p (abd_stmt)
1412 || gimple_call_num_args (abd_stmt) != 2)
1413 return NULL;
1415 tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
1416 tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
1418 if (gimple_call_internal_fn (abd_stmt) == IFN_ABD)
1420 if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0,
1421 &unprom[0])
1422 || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
1423 &unprom[1]))
1424 return NULL;
1426 else if (gimple_call_internal_fn (abd_stmt) == IFN_VEC_WIDEN_ABD)
1428 unprom[0].op = abd_oprnd0;
1429 unprom[0].type = TREE_TYPE (abd_oprnd0);
1430 unprom[1].op = abd_oprnd1;
1431 unprom[1].type = TREE_TYPE (abd_oprnd1);
1433 else
1434 return NULL;
1436 half_type = unprom[0].type;
1438 else if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
1439 unprom, NULL))
1440 return NULL;
1442 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1444 tree half_vectype;
1445 if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1446 type_out, &half_vectype))
1447 return NULL;
1449 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1450 tree sad_oprnd[2];
1451 vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
1452 unprom, half_vectype);
1454 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1455 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1456 sad_oprnd[1], plus_oprnd1);
1458 return pattern_stmt;
1461 /* Function vect_recog_abd_pattern
1463 Try to find the following ABsolute Difference (ABD) or
1464 widening ABD (WIDEN_ABD) pattern:
1466 TYPE1 x;
1467 TYPE2 y;
1468 TYPE3 x_cast = (TYPE3) x; // widening or no-op
1469 TYPE3 y_cast = (TYPE3) y; // widening or no-op
1470 TYPE3 diff = x_cast - y_cast;
1471 TYPE4 diff_cast = (TYPE4) diff; // widening or no-op
1472 TYPE5 abs = ABS(U)_EXPR <diff_cast>;
1474 WIDEN_ABD exists to optimize the case where TYPE4 is at least
1475 twice as wide as TYPE3.
1477 Input:
1479 * STMT_VINFO: The stmt from which the pattern search begins
1481 Output:
1483 * TYPE_OUT: The type of the output of this pattern
1485 * Return value: A new stmt that will be used to replace the sequence of
1486 stmts that constitute the pattern, principally:
1487 out = IFN_ABD (x, y)
1488 out = IFN_WIDEN_ABD (x, y)
1491 static gimple *
1492 vect_recog_abd_pattern (vec_info *vinfo,
1493 stmt_vec_info stmt_vinfo, tree *type_out)
1495 gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
1496 if (!last_stmt)
1497 return NULL;
1499 tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1501 vect_unpromoted_value unprom[2];
1502 gassign *diff_stmt;
1503 tree half_type;
1504 if (!vect_recog_absolute_difference (vinfo, last_stmt, &half_type,
1505 unprom, &diff_stmt))
1506 return NULL;
1508 tree abd_in_type, abd_out_type;
1510 if (half_type)
1512 abd_in_type = half_type;
1513 abd_out_type = abd_in_type;
1515 else
1517 unprom[0].op = gimple_assign_rhs1 (diff_stmt);
1518 unprom[1].op = gimple_assign_rhs2 (diff_stmt);
1519 abd_in_type = signed_type_for (out_type);
1520 abd_out_type = abd_in_type;
1523 tree vectype_in = get_vectype_for_scalar_type (vinfo, abd_in_type);
1524 if (!vectype_in)
1525 return NULL;
1527 internal_fn ifn = IFN_ABD;
1528 tree vectype_out = vectype_in;
1530 if (TYPE_PRECISION (out_type) >= TYPE_PRECISION (abd_in_type) * 2
1531 && stmt_vinfo->min_output_precision >= TYPE_PRECISION (abd_in_type) * 2)
1533 tree mid_type
1534 = build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
1535 TYPE_UNSIGNED (abd_in_type));
1536 tree mid_vectype = get_vectype_for_scalar_type (vinfo, mid_type);
1538 code_helper dummy_code;
1539 int dummy_int;
1540 auto_vec<tree> dummy_vec;
1541 if (mid_vectype
1542 && supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD,
1543 stmt_vinfo, mid_vectype,
1544 vectype_in,
1545 &dummy_code, &dummy_code,
1546 &dummy_int, &dummy_vec))
1548 ifn = IFN_VEC_WIDEN_ABD;
1549 abd_out_type = mid_type;
1550 vectype_out = mid_vectype;
1554 if (ifn == IFN_ABD
1555 && !direct_internal_fn_supported_p (ifn, vectype_in,
1556 OPTIMIZE_FOR_SPEED))
1557 return NULL;
1559 vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
1561 tree abd_oprnds[2];
1562 vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
1563 abd_in_type, unprom, vectype_in);
1565 *type_out = get_vectype_for_scalar_type (vinfo, out_type);
1567 tree abd_result = vect_recog_temp_ssa_var (abd_out_type, NULL);
1568 gcall *abd_stmt = gimple_build_call_internal (ifn, 2,
1569 abd_oprnds[0], abd_oprnds[1]);
1570 gimple_call_set_lhs (abd_stmt, abd_result);
1571 gimple_set_location (abd_stmt, gimple_location (last_stmt));
1573 gimple *stmt = abd_stmt;
1574 if (TYPE_PRECISION (abd_in_type) == TYPE_PRECISION (abd_out_type)
1575 && TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type)
1576 && !TYPE_UNSIGNED (abd_out_type))
1578 tree unsign = unsigned_type_for (abd_out_type);
1579 tree unsign_vectype = get_vectype_for_scalar_type (vinfo, unsign);
1580 stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt,
1581 unsign_vectype);
1584 return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype_out);
1587 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1588 so that it can be treated as though it had the form:
1590 A_TYPE a;
1591 B_TYPE b;
1592 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1593 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1594 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1595 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1596 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1598 Try to replace the pattern with:
1600 A_TYPE a;
1601 B_TYPE b;
1602 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1603 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1604 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1605 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1607 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1609 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1610 name of the pattern being matched, for dump purposes. */
1612 static gimple *
1613 vect_recog_widen_op_pattern (vec_info *vinfo,
1614 stmt_vec_info last_stmt_info, tree *type_out,
1615 tree_code orig_code, code_helper wide_code,
1616 bool shift_p, const char *name)
1618 gimple *last_stmt = last_stmt_info->stmt;
1620 vect_unpromoted_value unprom[2];
1621 tree half_type;
1622 if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
1623 shift_p, 2, unprom, &half_type))
1625 return NULL;
1627 /* Pattern detected. */
1628 vect_pattern_detected (name, last_stmt);
1630 tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
1631 tree itype = type;
1632 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1633 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1634 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1635 TYPE_UNSIGNED (half_type));
1637 /* Check target support */
1638 tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1639 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1640 tree ctype = itype;
1641 tree vecctype = vecitype;
1642 if (orig_code == MINUS_EXPR
1643 && TYPE_UNSIGNED (itype)
1644 && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
1646 /* Subtraction is special, even if half_type is unsigned and no matter
1647 whether type is signed or unsigned, if type is wider than itype,
1648 we need to sign-extend from the widening operation result to the
1649 result type.
1650 Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1651 itype unsigned short and type either int or unsigned int.
1652 Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1653 (unsigned short) 0xffff, but for type int we want the result -1
1654 and for type unsigned int 0xffffffff rather than 0xffff. */
1655 ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
1656 vecctype = get_vectype_for_scalar_type (vinfo, ctype);
1659 code_helper dummy_code;
1660 int dummy_int;
1661 auto_vec<tree> dummy_vec;
1662 if (!vectype
1663 || !vecitype
1664 || !vecctype
1665 || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1666 vecitype, vectype,
1667 &dummy_code, &dummy_code,
1668 &dummy_int, &dummy_vec))
1669 return NULL;
1671 *type_out = get_vectype_for_scalar_type (vinfo, type);
1672 if (!*type_out)
1673 return NULL;
1675 tree oprnd[2];
1676 vect_convert_inputs (vinfo, last_stmt_info,
1677 2, oprnd, half_type, unprom, vectype);
1679 tree var = vect_recog_temp_ssa_var (itype, NULL);
1680 gimple *pattern_stmt = vect_gimple_build (var, wide_code, oprnd[0], oprnd[1]);
1682 if (vecctype != vecitype)
1683 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
1684 pattern_stmt, vecitype);
1686 return vect_convert_output (vinfo, last_stmt_info,
1687 type, pattern_stmt, vecctype);
1690 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1691 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1693 static gimple *
1694 vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1695 tree *type_out)
1697 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1698 MULT_EXPR, WIDEN_MULT_EXPR, false,
1699 "vect_recog_widen_mult_pattern");
1702 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1703 to IFN_VEC_WIDEN_PLUS. See vect_recog_widen_op_pattern for details. */
1705 static gimple *
1706 vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1707 tree *type_out)
1709 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1710 PLUS_EXPR, IFN_VEC_WIDEN_PLUS,
1711 false, "vect_recog_widen_plus_pattern");
1714 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1715 to IFN_VEC_WIDEN_MINUS. See vect_recog_widen_op_pattern for details. */
1716 static gimple *
1717 vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1718 tree *type_out)
1720 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1721 MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
1722 false, "vect_recog_widen_minus_pattern");
1725 /* Try to detect abd on widened inputs, converting IFN_ABD
1726 to IFN_VEC_WIDEN_ABD. */
1727 static gimple *
1728 vect_recog_widen_abd_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1729 tree *type_out)
1731 gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
1732 if (!last_stmt || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (last_stmt)))
1733 return NULL;
1735 tree last_rhs = gimple_assign_rhs1 (last_stmt);
1737 tree in_type = TREE_TYPE (last_rhs);
1738 tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1739 if (!INTEGRAL_TYPE_P (in_type)
1740 || !INTEGRAL_TYPE_P (out_type)
1741 || TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type)
1742 || !TYPE_UNSIGNED (in_type))
1743 return NULL;
1745 vect_unpromoted_value unprom;
1746 tree op = vect_look_through_possible_promotion (vinfo, last_rhs, &unprom);
1747 if (!op || TYPE_PRECISION (TREE_TYPE (op)) != TYPE_PRECISION (in_type))
1748 return NULL;
1750 stmt_vec_info abd_pattern_vinfo = vect_get_internal_def (vinfo, op);
1751 if (!abd_pattern_vinfo)
1752 return NULL;
1754 abd_pattern_vinfo = vect_stmt_to_vectorize (abd_pattern_vinfo);
1755 gcall *abd_stmt = dyn_cast <gcall *> (STMT_VINFO_STMT (abd_pattern_vinfo));
1756 if (!abd_stmt
1757 || !gimple_call_internal_p (abd_stmt)
1758 || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
1759 return NULL;
1761 tree vectype_in = get_vectype_for_scalar_type (vinfo, in_type);
1762 tree vectype_out = get_vectype_for_scalar_type (vinfo, out_type);
1764 code_helper dummy_code;
1765 int dummy_int;
1766 auto_vec<tree> dummy_vec;
1767 if (!supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD, stmt_vinfo,
1768 vectype_out, vectype_in,
1769 &dummy_code, &dummy_code,
1770 &dummy_int, &dummy_vec))
1771 return NULL;
1773 vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt);
1775 *type_out = vectype_out;
1777 tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
1778 tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
1779 tree widen_abd_result = vect_recog_temp_ssa_var (out_type, NULL);
1780 gcall *widen_abd_stmt = gimple_build_call_internal (IFN_VEC_WIDEN_ABD, 2,
1781 abd_oprnd0, abd_oprnd1);
1782 gimple_call_set_lhs (widen_abd_stmt, widen_abd_result);
1783 gimple_set_location (widen_abd_stmt, gimple_location (last_stmt));
1784 return widen_abd_stmt;
1787 /* Function vect_recog_ctz_ffs_pattern
1789 Try to find the following pattern:
1791 TYPE1 A;
1792 TYPE1 B;
1794 B = __builtin_ctz{,l,ll} (A);
1798 B = __builtin_ffs{,l,ll} (A);
1800 Input:
1802 * STMT_VINFO: The stmt from which the pattern search begins.
1803 here it starts with B = __builtin_* (A);
1805 Output:
1807 * TYPE_OUT: The vector type of the output of this pattern.
1809 * Return value: A new stmt that will be used to replace the sequence of
1810 stmts that constitute the pattern, using clz or popcount builtins. */
1812 static gimple *
1813 vect_recog_ctz_ffs_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1814 tree *type_out)
1816 gimple *call_stmt = stmt_vinfo->stmt;
1817 gimple *pattern_stmt;
1818 tree rhs_oprnd, rhs_type, lhs_oprnd, lhs_type, vec_type, vec_rhs_type;
1819 tree new_var;
1820 internal_fn ifn = IFN_LAST, ifnnew = IFN_LAST;
1821 bool defined_at_zero = true, defined_at_zero_new = false;
1822 int val = 0, val_new = 0, val_cmp = 0;
1823 int prec;
1824 int sub = 0, add = 0;
1825 location_t loc;
1827 if (!is_gimple_call (call_stmt))
1828 return NULL;
1830 if (gimple_call_num_args (call_stmt) != 1
1831 && gimple_call_num_args (call_stmt) != 2)
1832 return NULL;
1834 rhs_oprnd = gimple_call_arg (call_stmt, 0);
1835 rhs_type = TREE_TYPE (rhs_oprnd);
1836 lhs_oprnd = gimple_call_lhs (call_stmt);
1837 if (!lhs_oprnd)
1838 return NULL;
1839 lhs_type = TREE_TYPE (lhs_oprnd);
1840 if (!INTEGRAL_TYPE_P (lhs_type)
1841 || !INTEGRAL_TYPE_P (rhs_type)
1842 || !type_has_mode_precision_p (rhs_type)
1843 || TREE_CODE (rhs_oprnd) != SSA_NAME)
1844 return NULL;
1846 switch (gimple_call_combined_fn (call_stmt))
1848 CASE_CFN_CTZ:
1849 ifn = IFN_CTZ;
1850 if (!gimple_call_internal_p (call_stmt)
1851 || gimple_call_num_args (call_stmt) != 2)
1852 defined_at_zero = false;
1853 else
1854 val = tree_to_shwi (gimple_call_arg (call_stmt, 1));
1855 break;
1856 CASE_CFN_FFS:
1857 ifn = IFN_FFS;
1858 break;
1859 default:
1860 return NULL;
1863 prec = TYPE_PRECISION (rhs_type);
1864 loc = gimple_location (call_stmt);
1866 vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
1867 if (!vec_type)
1868 return NULL;
1870 vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
1871 if (!vec_rhs_type)
1872 return NULL;
1874 /* Do it only if the backend doesn't have ctz<vector_mode>2 or
1875 ffs<vector_mode>2 pattern but does have clz<vector_mode>2 or
1876 popcount<vector_mode>2. */
1877 if (!vec_type
1878 || direct_internal_fn_supported_p (ifn, vec_rhs_type,
1879 OPTIMIZE_FOR_SPEED))
1880 return NULL;
1882 if (ifn == IFN_FFS
1883 && direct_internal_fn_supported_p (IFN_CTZ, vec_rhs_type,
1884 OPTIMIZE_FOR_SPEED))
1886 ifnnew = IFN_CTZ;
1887 defined_at_zero_new
1888 = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
1889 val_new) == 2;
1891 else if (direct_internal_fn_supported_p (IFN_CLZ, vec_rhs_type,
1892 OPTIMIZE_FOR_SPEED))
1894 ifnnew = IFN_CLZ;
1895 defined_at_zero_new
1896 = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
1897 val_new) == 2;
1899 if ((ifnnew == IFN_LAST
1900 || (defined_at_zero && !defined_at_zero_new))
1901 && direct_internal_fn_supported_p (IFN_POPCOUNT, vec_rhs_type,
1902 OPTIMIZE_FOR_SPEED))
1904 ifnnew = IFN_POPCOUNT;
1905 defined_at_zero_new = true;
1906 val_new = prec;
1908 if (ifnnew == IFN_LAST)
1909 return NULL;
1911 vect_pattern_detected ("vec_recog_ctz_ffs_pattern", call_stmt);
1913 val_cmp = val_new;
1914 if ((ifnnew == IFN_CLZ
1915 && defined_at_zero
1916 && defined_at_zero_new
1917 && val == prec
1918 && val_new == prec)
1919 || (ifnnew == IFN_POPCOUNT && ifn == IFN_CTZ))
1921 /* .CTZ (X) = PREC - .CLZ ((X - 1) & ~X)
1922 .CTZ (X) = .POPCOUNT ((X - 1) & ~X). */
1923 if (ifnnew == IFN_CLZ)
1924 sub = prec;
1925 val_cmp = prec;
1927 if (!TYPE_UNSIGNED (rhs_type))
1929 rhs_type = unsigned_type_for (rhs_type);
1930 vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
1931 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1932 pattern_stmt = gimple_build_assign (new_var, NOP_EXPR, rhs_oprnd);
1933 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
1934 vec_rhs_type);
1935 rhs_oprnd = new_var;
1938 tree m1 = vect_recog_temp_ssa_var (rhs_type, NULL);
1939 pattern_stmt = gimple_build_assign (m1, PLUS_EXPR, rhs_oprnd,
1940 build_int_cst (rhs_type, -1));
1941 gimple_set_location (pattern_stmt, loc);
1942 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1944 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1945 pattern_stmt = gimple_build_assign (new_var, BIT_NOT_EXPR, rhs_oprnd);
1946 gimple_set_location (pattern_stmt, loc);
1947 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1948 rhs_oprnd = new_var;
1950 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1951 pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
1952 m1, rhs_oprnd);
1953 gimple_set_location (pattern_stmt, loc);
1954 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1955 rhs_oprnd = new_var;
1957 else if (ifnnew == IFN_CLZ)
1959 /* .CTZ (X) = (PREC - 1) - .CLZ (X & -X)
1960 .FFS (X) = PREC - .CLZ (X & -X). */
1961 sub = prec - (ifn == IFN_CTZ);
1962 val_cmp = sub - val_new;
1964 tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
1965 pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
1966 gimple_set_location (pattern_stmt, loc);
1967 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1969 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1970 pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
1971 rhs_oprnd, neg);
1972 gimple_set_location (pattern_stmt, loc);
1973 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1974 rhs_oprnd = new_var;
1976 else if (ifnnew == IFN_POPCOUNT)
1978 /* .CTZ (X) = PREC - .POPCOUNT (X | -X)
1979 .FFS (X) = (PREC + 1) - .POPCOUNT (X | -X). */
1980 sub = prec + (ifn == IFN_FFS);
1981 val_cmp = sub;
1983 tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
1984 pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
1985 gimple_set_location (pattern_stmt, loc);
1986 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1988 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1989 pattern_stmt = gimple_build_assign (new_var, BIT_IOR_EXPR,
1990 rhs_oprnd, neg);
1991 gimple_set_location (pattern_stmt, loc);
1992 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1993 rhs_oprnd = new_var;
1995 else if (ifnnew == IFN_CTZ)
1997 /* .FFS (X) = .CTZ (X) + 1. */
1998 add = 1;
1999 val_cmp++;
2002 /* Create B = .IFNNEW (A). */
2003 new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2004 if ((ifnnew == IFN_CLZ || ifnnew == IFN_CTZ) && defined_at_zero_new)
2005 pattern_stmt
2006 = gimple_build_call_internal (ifnnew, 2, rhs_oprnd,
2007 build_int_cst (integer_type_node,
2008 val_new));
2009 else
2010 pattern_stmt = gimple_build_call_internal (ifnnew, 1, rhs_oprnd);
2011 gimple_call_set_lhs (pattern_stmt, new_var);
2012 gimple_set_location (pattern_stmt, loc);
2013 *type_out = vec_type;
2015 if (sub)
2017 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2018 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2019 pattern_stmt = gimple_build_assign (ret_var, MINUS_EXPR,
2020 build_int_cst (lhs_type, sub),
2021 new_var);
2022 gimple_set_location (pattern_stmt, loc);
2023 new_var = ret_var;
2025 else if (add)
2027 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2028 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2029 pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
2030 build_int_cst (lhs_type, add));
2031 gimple_set_location (pattern_stmt, loc);
2032 new_var = ret_var;
2035 if (defined_at_zero
2036 && (!defined_at_zero_new || val != val_cmp))
2038 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2039 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2040 rhs_oprnd = gimple_call_arg (call_stmt, 0);
2041 rhs_type = TREE_TYPE (rhs_oprnd);
2042 tree cmp = build2_loc (loc, NE_EXPR, boolean_type_node,
2043 rhs_oprnd, build_zero_cst (rhs_type));
2044 pattern_stmt = gimple_build_assign (ret_var, COND_EXPR, cmp,
2045 new_var,
2046 build_int_cst (lhs_type, val));
2049 if (dump_enabled_p ())
2050 dump_printf_loc (MSG_NOTE, vect_location,
2051 "created pattern stmt: %G", pattern_stmt);
2053 return pattern_stmt;
2056 /* Function vect_recog_popcount_clz_ctz_ffs_pattern
2058 Try to find the following pattern:
2060 UTYPE1 A;
2061 TYPE1 B;
2062 UTYPE2 temp_in;
2063 TYPE3 temp_out;
2064 temp_in = (UTYPE2)A;
2066 temp_out = __builtin_popcount{,l,ll} (temp_in);
2067 B = (TYPE1) temp_out;
2069 TYPE2 may or may not be equal to TYPE3.
2070 i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
2071 i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
2073 Input:
2075 * STMT_VINFO: The stmt from which the pattern search begins.
2076 here it starts with B = (TYPE1) temp_out;
2078 Output:
2080 * TYPE_OUT: The vector type of the output of this pattern.
2082 * Return value: A new stmt that will be used to replace the sequence of
2083 stmts that constitute the pattern. In this case it will be:
2084 B = .POPCOUNT (A);
2086 Similarly for clz, ctz and ffs.
2089 static gimple *
2090 vect_recog_popcount_clz_ctz_ffs_pattern (vec_info *vinfo,
2091 stmt_vec_info stmt_vinfo,
2092 tree *type_out)
2094 gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
2095 gimple *call_stmt, *pattern_stmt;
2096 tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
2097 internal_fn ifn = IFN_LAST;
2098 int addend = 0;
2100 /* Find B = (TYPE1) temp_out. */
2101 if (!last_stmt)
2102 return NULL;
2103 tree_code code = gimple_assign_rhs_code (last_stmt);
2104 if (!CONVERT_EXPR_CODE_P (code))
2105 return NULL;
2107 lhs_oprnd = gimple_assign_lhs (last_stmt);
2108 lhs_type = TREE_TYPE (lhs_oprnd);
2109 if (!INTEGRAL_TYPE_P (lhs_type))
2110 return NULL;
2112 rhs_oprnd = gimple_assign_rhs1 (last_stmt);
2113 if (TREE_CODE (rhs_oprnd) != SSA_NAME
2114 || !has_single_use (rhs_oprnd))
2115 return NULL;
2116 call_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
2118 /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
2119 if (!is_gimple_call (call_stmt))
2120 return NULL;
2121 switch (gimple_call_combined_fn (call_stmt))
2123 int val;
2124 CASE_CFN_POPCOUNT:
2125 ifn = IFN_POPCOUNT;
2126 break;
2127 CASE_CFN_CLZ:
2128 ifn = IFN_CLZ;
2129 /* Punt if call result is unsigned and defined value at zero
2130 is negative, as the negative value doesn't extend correctly. */
2131 if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
2132 && gimple_call_internal_p (call_stmt)
2133 && CLZ_DEFINED_VALUE_AT_ZERO
2134 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
2135 && val < 0)
2136 return NULL;
2137 break;
2138 CASE_CFN_CTZ:
2139 ifn = IFN_CTZ;
2140 /* Punt if call result is unsigned and defined value at zero
2141 is negative, as the negative value doesn't extend correctly. */
2142 if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
2143 && gimple_call_internal_p (call_stmt)
2144 && CTZ_DEFINED_VALUE_AT_ZERO
2145 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
2146 && val < 0)
2147 return NULL;
2148 break;
2149 CASE_CFN_FFS:
2150 ifn = IFN_FFS;
2151 break;
2152 default:
2153 return NULL;
2156 if (gimple_call_num_args (call_stmt) != 1
2157 && gimple_call_num_args (call_stmt) != 2)
2158 return NULL;
2160 rhs_oprnd = gimple_call_arg (call_stmt, 0);
2161 vect_unpromoted_value unprom_diff;
2162 rhs_origin
2163 = vect_look_through_possible_promotion (vinfo, rhs_oprnd, &unprom_diff);
2165 if (!rhs_origin)
2166 return NULL;
2168 /* Input and output of .POPCOUNT should be same-precision integer. */
2169 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type))
2170 return NULL;
2172 /* Also A should be unsigned or same precision as temp_in, otherwise
2173 different builtins/internal functions have different behaviors. */
2174 if (TYPE_PRECISION (unprom_diff.type)
2175 != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))
2176 switch (ifn)
2178 case IFN_POPCOUNT:
2179 /* For popcount require zero extension, which doesn't add any
2180 further bits to the count. */
2181 if (!TYPE_UNSIGNED (unprom_diff.type))
2182 return NULL;
2183 break;
2184 case IFN_CLZ:
2185 /* clzll (x) == clz (x) + 32 for unsigned x != 0, so ok
2186 if it is undefined at zero or if it matches also for the
2187 defined value there. */
2188 if (!TYPE_UNSIGNED (unprom_diff.type))
2189 return NULL;
2190 if (!type_has_mode_precision_p (lhs_type)
2191 || !type_has_mode_precision_p (TREE_TYPE (rhs_oprnd)))
2192 return NULL;
2193 addend = (TYPE_PRECISION (TREE_TYPE (rhs_oprnd))
2194 - TYPE_PRECISION (lhs_type));
2195 if (gimple_call_internal_p (call_stmt)
2196 && gimple_call_num_args (call_stmt) == 2)
2198 int val1, val2;
2199 val1 = tree_to_shwi (gimple_call_arg (call_stmt, 1));
2200 int d2
2201 = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2202 val2);
2203 if (d2 != 2 || val1 != val2 + addend)
2204 return NULL;
2206 break;
2207 case IFN_CTZ:
2208 /* ctzll (x) == ctz (x) for unsigned or signed x != 0, so ok
2209 if it is undefined at zero or if it matches also for the
2210 defined value there. */
2211 if (gimple_call_internal_p (call_stmt)
2212 && gimple_call_num_args (call_stmt) == 2)
2214 int val1, val2;
2215 val1 = tree_to_shwi (gimple_call_arg (call_stmt, 1));
2216 int d2
2217 = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2218 val2);
2219 if (d2 != 2 || val1 != val2)
2220 return NULL;
2222 break;
2223 case IFN_FFS:
2224 /* ffsll (x) == ffs (x) for unsigned or signed x. */
2225 break;
2226 default:
2227 gcc_unreachable ();
2230 vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
2231 /* Do it only if the backend has popcount<vector_mode>2 etc. pattern. */
2232 if (!vec_type)
2233 return NULL;
2235 bool supported
2236 = direct_internal_fn_supported_p (ifn, vec_type, OPTIMIZE_FOR_SPEED);
2237 if (!supported)
2238 switch (ifn)
2240 case IFN_POPCOUNT:
2241 case IFN_CLZ:
2242 return NULL;
2243 case IFN_FFS:
2244 /* vect_recog_ctz_ffs_pattern can implement ffs using ctz. */
2245 if (direct_internal_fn_supported_p (IFN_CTZ, vec_type,
2246 OPTIMIZE_FOR_SPEED))
2247 break;
2248 /* FALLTHRU */
2249 case IFN_CTZ:
2250 /* vect_recog_ctz_ffs_pattern can implement ffs or ctz using
2251 clz or popcount. */
2252 if (direct_internal_fn_supported_p (IFN_CLZ, vec_type,
2253 OPTIMIZE_FOR_SPEED))
2254 break;
2255 if (direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
2256 OPTIMIZE_FOR_SPEED))
2257 break;
2258 return NULL;
2259 default:
2260 gcc_unreachable ();
2263 vect_pattern_detected ("vec_recog_popcount_clz_ctz_ffs_pattern",
2264 call_stmt);
2266 /* Create B = .POPCOUNT (A). */
2267 new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2268 tree arg2 = NULL_TREE;
2269 int val;
2270 if (ifn == IFN_CLZ
2271 && CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2272 val) == 2)
2273 arg2 = build_int_cst (integer_type_node, val);
2274 else if (ifn == IFN_CTZ
2275 && CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2276 val) == 2)
2277 arg2 = build_int_cst (integer_type_node, val);
2278 if (arg2)
2279 pattern_stmt = gimple_build_call_internal (ifn, 2, unprom_diff.op, arg2);
2280 else
2281 pattern_stmt = gimple_build_call_internal (ifn, 1, unprom_diff.op);
2282 gimple_call_set_lhs (pattern_stmt, new_var);
2283 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2284 *type_out = vec_type;
2286 if (dump_enabled_p ())
2287 dump_printf_loc (MSG_NOTE, vect_location,
2288 "created pattern stmt: %G", pattern_stmt);
2290 if (addend)
2292 gcc_assert (supported);
2293 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2294 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2295 pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
2296 build_int_cst (lhs_type, addend));
2298 else if (!supported)
2300 stmt_vec_info new_stmt_info = vinfo->add_stmt (pattern_stmt);
2301 STMT_VINFO_VECTYPE (new_stmt_info) = vec_type;
2302 pattern_stmt
2303 = vect_recog_ctz_ffs_pattern (vinfo, new_stmt_info, type_out);
2304 if (pattern_stmt == NULL)
2305 return NULL;
2306 if (gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (new_stmt_info))
2308 gimple_seq *pseq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
2309 gimple_seq_add_seq_without_update (pseq, seq);
2312 return pattern_stmt;
2315 /* Function vect_recog_pow_pattern
2317 Try to find the following pattern:
2319 x = POW (y, N);
2321 with POW being one of pow, powf, powi, powif and N being
2322 either 2 or 0.5.
2324 Input:
2326 * STMT_VINFO: The stmt from which the pattern search begins.
2328 Output:
2330 * TYPE_OUT: The type of the output of this pattern.
2332 * Return value: A new stmt that will be used to replace the sequence of
2333 stmts that constitute the pattern. In this case it will be:
2334 x = x * x
2336 x = sqrt (x)
2339 static gimple *
2340 vect_recog_pow_pattern (vec_info *vinfo,
2341 stmt_vec_info stmt_vinfo, tree *type_out)
2343 gimple *last_stmt = stmt_vinfo->stmt;
2344 tree base, exp;
2345 gimple *stmt;
2346 tree var;
2348 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
2349 return NULL;
2351 switch (gimple_call_combined_fn (last_stmt))
2353 CASE_CFN_POW:
2354 CASE_CFN_POWI:
2355 break;
2357 default:
2358 return NULL;
2361 base = gimple_call_arg (last_stmt, 0);
2362 exp = gimple_call_arg (last_stmt, 1);
2363 if (TREE_CODE (exp) != REAL_CST
2364 && TREE_CODE (exp) != INTEGER_CST)
2366 if (flag_unsafe_math_optimizations
2367 && TREE_CODE (base) == REAL_CST
2368 && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
2370 combined_fn log_cfn;
2371 built_in_function exp_bfn;
2372 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
2374 case BUILT_IN_POW:
2375 log_cfn = CFN_BUILT_IN_LOG;
2376 exp_bfn = BUILT_IN_EXP;
2377 break;
2378 case BUILT_IN_POWF:
2379 log_cfn = CFN_BUILT_IN_LOGF;
2380 exp_bfn = BUILT_IN_EXPF;
2381 break;
2382 case BUILT_IN_POWL:
2383 log_cfn = CFN_BUILT_IN_LOGL;
2384 exp_bfn = BUILT_IN_EXPL;
2385 break;
2386 default:
2387 return NULL;
2389 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
2390 tree exp_decl = builtin_decl_implicit (exp_bfn);
2391 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
2392 does that, but if C is a power of 2, we want to use
2393 exp2 (log2 (C) * x) in the non-vectorized version, but for
2394 vectorization we don't have vectorized exp2. */
2395 if (logc
2396 && TREE_CODE (logc) == REAL_CST
2397 && exp_decl
2398 && lookup_attribute ("omp declare simd",
2399 DECL_ATTRIBUTES (exp_decl)))
2401 cgraph_node *node = cgraph_node::get_create (exp_decl);
2402 if (node->simd_clones == NULL)
2404 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
2405 || node->definition)
2406 return NULL;
2407 expand_simd_clones (node);
2408 if (node->simd_clones == NULL)
2409 return NULL;
2411 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
2412 if (!*type_out)
2413 return NULL;
2414 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2415 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
2416 append_pattern_def_seq (vinfo, stmt_vinfo, g);
2417 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2418 g = gimple_build_call (exp_decl, 1, def);
2419 gimple_call_set_lhs (g, res);
2420 return g;
2424 return NULL;
2427 /* We now have a pow or powi builtin function call with a constant
2428 exponent. */
2430 /* Catch squaring. */
2431 if ((tree_fits_shwi_p (exp)
2432 && tree_to_shwi (exp) == 2)
2433 || (TREE_CODE (exp) == REAL_CST
2434 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
2436 if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
2437 TREE_TYPE (base), type_out))
2438 return NULL;
2440 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2441 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
2442 return stmt;
2445 /* Catch square root. */
2446 if (TREE_CODE (exp) == REAL_CST
2447 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
2449 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
2450 if (*type_out
2451 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
2452 OPTIMIZE_FOR_SPEED))
2454 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
2455 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
2456 gimple_call_set_lhs (stmt, var);
2457 gimple_call_set_nothrow (stmt, true);
2458 return stmt;
2462 return NULL;
2466 /* Function vect_recog_widen_sum_pattern
2468 Try to find the following pattern:
2470 type x_t;
2471 TYPE x_T, sum = init;
2472 loop:
2473 sum_0 = phi <init, sum_1>
2474 S1 x_t = *p;
2475 S2 x_T = (TYPE) x_t;
2476 S3 sum_1 = x_T + sum_0;
2478 where type 'TYPE' is at least double the size of type 'type', i.e - we're
2479 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
2480 a special case of a reduction computation.
2482 Input:
2484 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
2485 when this function is called with S3, the pattern {S2,S3} will be detected.
2487 Output:
2489 * TYPE_OUT: The type of the output of this pattern.
2491 * Return value: A new stmt that will be used to replace the sequence of
2492 stmts that constitute the pattern. In this case it will be:
2493 WIDEN_SUM <x_t, sum_0>
2495 Note: The widening-sum idiom is a widening reduction pattern that is
2496 vectorized without preserving all the intermediate results. It
2497 produces only N/2 (widened) results (by summing up pairs of
2498 intermediate results) rather than all N results. Therefore, we
2499 cannot allow this pattern when we want to get all the results and in
2500 the correct order (as is the case when this computation is in an
2501 inner-loop nested in an outer-loop that us being vectorized). */
2503 static gimple *
2504 vect_recog_widen_sum_pattern (vec_info *vinfo,
2505 stmt_vec_info stmt_vinfo, tree *type_out)
2507 gimple *last_stmt = stmt_vinfo->stmt;
2508 tree oprnd0, oprnd1;
2509 tree type;
2510 gimple *pattern_stmt;
2511 tree var;
2513 /* Look for the following pattern
2514 DX = (TYPE) X;
2515 sum_1 = DX + sum_0;
2516 In which DX is at least double the size of X, and sum_1 has been
2517 recognized as a reduction variable.
2520 /* Starting from LAST_STMT, follow the defs of its uses in search
2521 of the above pattern. */
2523 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
2524 &oprnd0, &oprnd1)
2525 || TREE_CODE (oprnd0) != SSA_NAME
2526 || !vinfo->lookup_def (oprnd0))
2527 return NULL;
2529 type = TREE_TYPE (gimple_get_lhs (last_stmt));
2531 /* So far so good. Since last_stmt was detected as a (summation) reduction,
2532 we know that oprnd1 is the reduction variable (defined by a loop-header
2533 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
2534 Left to check that oprnd0 is defined by a cast from type 'type' to type
2535 'TYPE'. */
2537 vect_unpromoted_value unprom0;
2538 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
2539 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
2540 return NULL;
2542 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
2544 if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
2545 unprom0.type, type_out))
2546 return NULL;
2548 var = vect_recog_temp_ssa_var (type, NULL);
2549 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
2551 return pattern_stmt;
2554 /* Function vect_recog_bitfield_ref_pattern
2556 Try to find the following pattern:
2558 bf_value = BIT_FIELD_REF (container, bitsize, bitpos);
2559 result = (type_out) bf_value;
2563 if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
2565 where type_out is a non-bitfield type, that is to say, it's precision matches
2566 2^(TYPE_SIZE(type_out) - (TYPE_UNSIGNED (type_out) ? 1 : 2)).
2568 Input:
2570 * STMT_VINFO: The stmt from which the pattern search begins.
2571 here it starts with:
2572 result = (type_out) bf_value;
2576 if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
2578 Output:
2580 * TYPE_OUT: The vector type of the output of this pattern.
2582 * Return value: A new stmt that will be used to replace the sequence of
2583 stmts that constitute the pattern. If the precision of type_out is bigger
2584 than the precision type of _1 we perform the widening before the shifting,
2585 since the new precision will be large enough to shift the value and moving
2586 widening operations up the statement chain enables the generation of
2587 widening loads. If we are widening and the operation after the pattern is
2588 an addition then we mask first and shift later, to enable the generation of
2589 shifting adds. In the case of narrowing we will always mask first, shift
2590 last and then perform a narrowing operation. This will enable the
2591 generation of narrowing shifts.
2593 Widening with mask first, shift later:
2594 container = (type_out) container;
2595 masked = container & (((1 << bitsize) - 1) << bitpos);
2596 result = masked >> bitpos;
2598 Widening with shift first, mask last:
2599 container = (type_out) container;
2600 shifted = container >> bitpos;
2601 result = shifted & ((1 << bitsize) - 1);
2603 Narrowing:
2604 masked = container & (((1 << bitsize) - 1) << bitpos);
2605 result = masked >> bitpos;
2606 result = (type_out) result;
2608 If the bitfield is signed and it's wider than type_out, we need to
2609 keep the result sign-extended:
2610 container = (type) container;
2611 masked = container << (prec - bitsize - bitpos);
2612 result = (type_out) (masked >> (prec - bitsize));
2614 Here type is the signed variant of the wider of type_out and the type
2615 of container.
2617 The shifting is always optional depending on whether bitpos != 0.
2619 When the original bitfield was inside a gcond then an new gcond is also
2620 generated with the newly `result` as the operand to the comparison.
2624 static gimple *
2625 vect_recog_bitfield_ref_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
2626 tree *type_out)
2628 gimple *bf_stmt = NULL;
2629 tree lhs = NULL_TREE;
2630 tree ret_type = NULL_TREE;
2631 gimple *stmt = STMT_VINFO_STMT (stmt_info);
2632 if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
2634 tree op = gimple_cond_lhs (cond_stmt);
2635 if (TREE_CODE (op) != SSA_NAME)
2636 return NULL;
2637 bf_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
2638 if (TREE_CODE (gimple_cond_rhs (cond_stmt)) != INTEGER_CST)
2639 return NULL;
2641 else if (is_gimple_assign (stmt)
2642 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
2643 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
2645 gimple *second_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
2646 bf_stmt = dyn_cast <gassign *> (second_stmt);
2647 lhs = gimple_assign_lhs (stmt);
2648 ret_type = TREE_TYPE (lhs);
2651 if (!bf_stmt
2652 || gimple_assign_rhs_code (bf_stmt) != BIT_FIELD_REF)
2653 return NULL;
2655 tree bf_ref = gimple_assign_rhs1 (bf_stmt);
2656 tree container = TREE_OPERAND (bf_ref, 0);
2657 ret_type = ret_type ? ret_type : TREE_TYPE (container);
2659 if (!bit_field_offset (bf_ref).is_constant ()
2660 || !bit_field_size (bf_ref).is_constant ()
2661 || !tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (container))))
2662 return NULL;
2664 if (!INTEGRAL_TYPE_P (TREE_TYPE (bf_ref))
2665 || !INTEGRAL_TYPE_P (TREE_TYPE (container))
2666 || TYPE_MODE (TREE_TYPE (container)) == E_BLKmode)
2667 return NULL;
2669 gimple *use_stmt, *pattern_stmt;
2670 use_operand_p use_p;
2671 bool shift_first = true;
2672 tree container_type = TREE_TYPE (container);
2673 tree vectype = get_vectype_for_scalar_type (vinfo, container_type);
2675 /* Calculate shift_n before the adjustments for widening loads, otherwise
2676 the container may change and we have to consider offset change for
2677 widening loads on big endianness. The shift_n calculated here can be
2678 independent of widening. */
2679 unsigned HOST_WIDE_INT shift_n = bit_field_offset (bf_ref).to_constant ();
2680 unsigned HOST_WIDE_INT mask_width = bit_field_size (bf_ref).to_constant ();
2681 unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
2682 if (BYTES_BIG_ENDIAN)
2683 shift_n = prec - shift_n - mask_width;
2685 bool ref_sext = (!TYPE_UNSIGNED (TREE_TYPE (bf_ref)) &&
2686 TYPE_PRECISION (ret_type) > mask_width);
2687 bool load_widen = (TYPE_PRECISION (TREE_TYPE (container)) <
2688 TYPE_PRECISION (ret_type));
2690 /* We move the conversion earlier if the loaded type is smaller than the
2691 return type to enable the use of widening loads. And if we need a
2692 sign extension, we need to convert the loaded value early to a signed
2693 type as well. */
2694 if (ref_sext || load_widen)
2696 tree type = load_widen ? ret_type : container_type;
2697 if (ref_sext)
2698 type = gimple_signed_type (type);
2699 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type),
2700 NOP_EXPR, container);
2701 container = gimple_get_lhs (pattern_stmt);
2702 container_type = TREE_TYPE (container);
2703 prec = tree_to_uhwi (TYPE_SIZE (container_type));
2704 vectype = get_vectype_for_scalar_type (vinfo, container_type);
2705 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2707 else if (!useless_type_conversion_p (TREE_TYPE (container), ret_type))
2708 /* If we are doing the conversion last then also delay the shift as we may
2709 be able to combine the shift and conversion in certain cases. */
2710 shift_first = false;
2712 /* If the only use of the result of this BIT_FIELD_REF + CONVERT is a
2713 PLUS_EXPR then do the shift last as some targets can combine the shift and
2714 add into a single instruction. */
2715 if (lhs && single_imm_use (lhs, &use_p, &use_stmt))
2717 if (gimple_code (use_stmt) == GIMPLE_ASSIGN
2718 && gimple_assign_rhs_code (use_stmt) == PLUS_EXPR)
2719 shift_first = false;
2722 /* If we don't have to shift we only generate the mask, so just fix the
2723 code-path to shift_first. */
2724 if (shift_n == 0)
2725 shift_first = true;
2727 tree result;
2728 if (shift_first && !ref_sext)
2730 tree shifted = container;
2731 if (shift_n)
2733 pattern_stmt
2734 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2735 RSHIFT_EXPR, container,
2736 build_int_cst (sizetype, shift_n));
2737 shifted = gimple_assign_lhs (pattern_stmt);
2738 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2741 tree mask = wide_int_to_tree (container_type,
2742 wi::mask (mask_width, false, prec));
2744 pattern_stmt
2745 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2746 BIT_AND_EXPR, shifted, mask);
2747 result = gimple_assign_lhs (pattern_stmt);
2749 else
2751 tree temp = vect_recog_temp_ssa_var (container_type);
2752 if (!ref_sext)
2754 tree mask = wide_int_to_tree (container_type,
2755 wi::shifted_mask (shift_n,
2756 mask_width,
2757 false, prec));
2758 pattern_stmt = gimple_build_assign (temp, BIT_AND_EXPR,
2759 container, mask);
2761 else
2763 HOST_WIDE_INT shl = prec - shift_n - mask_width;
2764 shift_n += shl;
2765 pattern_stmt = gimple_build_assign (temp, LSHIFT_EXPR,
2766 container,
2767 build_int_cst (sizetype,
2768 shl));
2771 tree masked = gimple_assign_lhs (pattern_stmt);
2772 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2773 pattern_stmt
2774 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2775 RSHIFT_EXPR, masked,
2776 build_int_cst (sizetype, shift_n));
2777 result = gimple_assign_lhs (pattern_stmt);
2780 if (!useless_type_conversion_p (TREE_TYPE (result), ret_type))
2782 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2783 pattern_stmt
2784 = gimple_build_assign (vect_recog_temp_ssa_var (ret_type),
2785 NOP_EXPR, result);
2788 if (!lhs)
2790 if (!vectype)
2791 return NULL;
2793 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2794 vectype = truth_type_for (vectype);
2796 /* FIXME: This part extracts the boolean value out of the bitfield in the
2797 same way as vect_recog_gcond_pattern does. However because
2798 patterns cannot match the same root twice, when we handle and
2799 lower the bitfield in the gcond, vect_recog_gcond_pattern can't
2800 apply anymore. We should really fix it so that we don't need to
2801 duplicate transformations like these. */
2802 tree new_lhs = vect_recog_temp_ssa_var (boolean_type_node, NULL);
2803 gcond *cond_stmt = dyn_cast <gcond *> (stmt_info->stmt);
2804 tree cond_cst = gimple_cond_rhs (cond_stmt);
2805 gimple *new_stmt
2806 = gimple_build_assign (new_lhs, gimple_cond_code (cond_stmt),
2807 gimple_get_lhs (pattern_stmt),
2808 fold_convert (container_type, cond_cst));
2809 append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype, container_type);
2810 pattern_stmt
2811 = gimple_build_cond (NE_EXPR, new_lhs,
2812 build_zero_cst (TREE_TYPE (new_lhs)),
2813 NULL_TREE, NULL_TREE);
2816 *type_out = STMT_VINFO_VECTYPE (stmt_info);
2817 vect_pattern_detected ("bitfield_ref pattern", stmt_info->stmt);
2819 return pattern_stmt;
2822 /* Function vect_recog_bit_insert_pattern
2824 Try to find the following pattern:
2826 written = BIT_INSERT_EXPR (container, value, bitpos);
2828 Input:
2830 * STMT_VINFO: The stmt we want to replace.
2832 Output:
2834 * TYPE_OUT: The vector type of the output of this pattern.
2836 * Return value: A new stmt that will be used to replace the sequence of
2837 stmts that constitute the pattern. In this case it will be:
2838 value = (container_type) value; // Make sure
2839 shifted = value << bitpos; // Shift value into place
2840 masked = shifted & (mask << bitpos); // Mask off the non-relevant bits in
2841 // the 'to-write value'.
2842 cleared = container & ~(mask << bitpos); // Clearing the bits we want to
2843 // write to from the value we want
2844 // to write to.
2845 written = cleared | masked; // Write bits.
2848 where mask = ((1 << TYPE_PRECISION (value)) - 1), a mask to keep the number of
2849 bits corresponding to the real size of the bitfield value we are writing to.
2850 The shifting is always optional depending on whether bitpos != 0.
2854 static gimple *
2855 vect_recog_bit_insert_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
2856 tree *type_out)
2858 gassign *bf_stmt = dyn_cast <gassign *> (stmt_info->stmt);
2859 if (!bf_stmt || gimple_assign_rhs_code (bf_stmt) != BIT_INSERT_EXPR)
2860 return NULL;
2862 tree container = gimple_assign_rhs1 (bf_stmt);
2863 tree value = gimple_assign_rhs2 (bf_stmt);
2864 tree shift = gimple_assign_rhs3 (bf_stmt);
2866 tree bf_type = TREE_TYPE (value);
2867 tree container_type = TREE_TYPE (container);
2869 if (!INTEGRAL_TYPE_P (container_type)
2870 || !tree_fits_uhwi_p (TYPE_SIZE (container_type)))
2871 return NULL;
2873 gimple *pattern_stmt;
2875 vect_unpromoted_value unprom;
2876 unprom.set_op (value, vect_internal_def);
2877 value = vect_convert_input (vinfo, stmt_info, container_type, &unprom,
2878 get_vectype_for_scalar_type (vinfo,
2879 container_type));
2881 unsigned HOST_WIDE_INT mask_width = TYPE_PRECISION (bf_type);
2882 unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
2883 unsigned HOST_WIDE_INT shift_n = tree_to_uhwi (shift);
2884 if (BYTES_BIG_ENDIAN)
2886 shift_n = prec - shift_n - mask_width;
2887 shift = build_int_cst (TREE_TYPE (shift), shift_n);
2890 if (!useless_type_conversion_p (TREE_TYPE (value), container_type))
2892 pattern_stmt =
2893 gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2894 NOP_EXPR, value);
2895 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2896 value = gimple_get_lhs (pattern_stmt);
2899 /* Shift VALUE into place. */
2900 tree shifted = value;
2901 if (shift_n)
2903 gimple_seq stmts = NULL;
2904 shifted
2905 = gimple_build (&stmts, LSHIFT_EXPR, container_type, value, shift);
2906 if (!gimple_seq_empty_p (stmts))
2907 append_pattern_def_seq (vinfo, stmt_info,
2908 gimple_seq_first_stmt (stmts));
2911 tree mask_t
2912 = wide_int_to_tree (container_type,
2913 wi::shifted_mask (shift_n, mask_width, false, prec));
2915 /* Clear bits we don't want to write back from SHIFTED. */
2916 gimple_seq stmts = NULL;
2917 tree masked = gimple_build (&stmts, BIT_AND_EXPR, container_type, shifted,
2918 mask_t);
2919 if (!gimple_seq_empty_p (stmts))
2921 pattern_stmt = gimple_seq_first_stmt (stmts);
2922 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2925 /* Mask off the bits in the container that we are to write to. */
2926 mask_t = wide_int_to_tree (container_type,
2927 wi::shifted_mask (shift_n, mask_width, true, prec));
2928 tree cleared = vect_recog_temp_ssa_var (container_type);
2929 pattern_stmt = gimple_build_assign (cleared, BIT_AND_EXPR, container, mask_t);
2930 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2932 /* Write MASKED into CLEARED. */
2933 pattern_stmt
2934 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2935 BIT_IOR_EXPR, cleared, masked);
2937 *type_out = STMT_VINFO_VECTYPE (stmt_info);
2938 vect_pattern_detected ("bit_insert pattern", stmt_info->stmt);
2940 return pattern_stmt;
2944 /* Recognize cases in which an operation is performed in one type WTYPE
2945 but could be done more efficiently in a narrower type NTYPE. For example,
2946 if we have:
2948 ATYPE a; // narrower than NTYPE
2949 BTYPE b; // narrower than NTYPE
2950 WTYPE aw = (WTYPE) a;
2951 WTYPE bw = (WTYPE) b;
2952 WTYPE res = aw + bw; // only uses of aw and bw
2954 then it would be more efficient to do:
2956 NTYPE an = (NTYPE) a;
2957 NTYPE bn = (NTYPE) b;
2958 NTYPE resn = an + bn;
2959 WTYPE res = (WTYPE) resn;
2961 Other situations include things like:
2963 ATYPE a; // NTYPE or narrower
2964 WTYPE aw = (WTYPE) a;
2965 WTYPE res = aw + b;
2967 when only "(NTYPE) res" is significant. In that case it's more efficient
2968 to truncate "b" and do the operation on NTYPE instead:
2970 NTYPE an = (NTYPE) a;
2971 NTYPE bn = (NTYPE) b; // truncation
2972 NTYPE resn = an + bn;
2973 WTYPE res = (WTYPE) resn;
2975 All users of "res" should then use "resn" instead, making the final
2976 statement dead (not marked as relevant). The final statement is still
2977 needed to maintain the type correctness of the IR.
2979 vect_determine_precisions has already determined the minimum
2980 precison of the operation and the minimum precision required
2981 by users of the result. */
2983 static gimple *
2984 vect_recog_over_widening_pattern (vec_info *vinfo,
2985 stmt_vec_info last_stmt_info, tree *type_out)
2987 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2988 if (!last_stmt)
2989 return NULL;
2991 /* See whether we have found that this operation can be done on a
2992 narrower type without changing its semantics. */
2993 unsigned int new_precision = last_stmt_info->operation_precision;
2994 if (!new_precision)
2995 return NULL;
2997 tree lhs = gimple_assign_lhs (last_stmt);
2998 tree type = TREE_TYPE (lhs);
2999 tree_code code = gimple_assign_rhs_code (last_stmt);
3001 /* Punt for reductions where we don't handle the type conversions. */
3002 if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
3003 return NULL;
3005 /* Keep the first operand of a COND_EXPR as-is: only the other two
3006 operands are interesting. */
3007 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
3009 /* Check the operands. */
3010 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
3011 auto_vec <vect_unpromoted_value, 3> unprom (nops);
3012 unprom.quick_grow_cleared (nops);
3013 unsigned int min_precision = 0;
3014 bool single_use_p = false;
3015 for (unsigned int i = 0; i < nops; ++i)
3017 tree op = gimple_op (last_stmt, first_op + i);
3018 if (TREE_CODE (op) == INTEGER_CST)
3019 unprom[i].set_op (op, vect_constant_def);
3020 else if (TREE_CODE (op) == SSA_NAME)
3022 bool op_single_use_p = true;
3023 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
3024 &op_single_use_p))
3025 return NULL;
3026 /* If:
3028 (1) N bits of the result are needed;
3029 (2) all inputs are widened from M<N bits; and
3030 (3) one operand OP is a single-use SSA name
3032 we can shift the M->N widening from OP to the output
3033 without changing the number or type of extensions involved.
3034 This then reduces the number of copies of STMT_INFO.
3036 If instead of (3) more than one operand is a single-use SSA name,
3037 shifting the extension to the output is even more of a win.
3039 If instead:
3041 (1) N bits of the result are needed;
3042 (2) one operand OP2 is widened from M2<N bits;
3043 (3) another operand OP1 is widened from M1<M2 bits; and
3044 (4) both OP1 and OP2 are single-use
3046 the choice is between:
3048 (a) truncating OP2 to M1, doing the operation on M1,
3049 and then widening the result to N
3051 (b) widening OP1 to M2, doing the operation on M2, and then
3052 widening the result to N
3054 Both shift the M2->N widening of the inputs to the output.
3055 (a) additionally shifts the M1->M2 widening to the output;
3056 it requires fewer copies of STMT_INFO but requires an extra
3057 M2->M1 truncation.
3059 Which is better will depend on the complexity and cost of
3060 STMT_INFO, which is hard to predict at this stage. However,
3061 a clear tie-breaker in favor of (b) is the fact that the
3062 truncation in (a) increases the length of the operation chain.
3064 If instead of (4) only one of OP1 or OP2 is single-use,
3065 (b) is still a win over doing the operation in N bits:
3066 it still shifts the M2->N widening on the single-use operand
3067 to the output and reduces the number of STMT_INFO copies.
3069 If neither operand is single-use then operating on fewer than
3070 N bits might lead to more extensions overall. Whether it does
3071 or not depends on global information about the vectorization
3072 region, and whether that's a good trade-off would again
3073 depend on the complexity and cost of the statements involved,
3074 as well as things like register pressure that are not normally
3075 modelled at this stage. We therefore ignore these cases
3076 and just optimize the clear single-use wins above.
3078 Thus we take the maximum precision of the unpromoted operands
3079 and record whether any operand is single-use. */
3080 if (unprom[i].dt == vect_internal_def)
3082 min_precision = MAX (min_precision,
3083 TYPE_PRECISION (unprom[i].type));
3084 single_use_p |= op_single_use_p;
3087 else
3088 return NULL;
3091 /* Although the operation could be done in operation_precision, we have
3092 to balance that against introducing extra truncations or extensions.
3093 Calculate the minimum precision that can be handled efficiently.
3095 The loop above determined that the operation could be handled
3096 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
3097 extension from the inputs to the output without introducing more
3098 instructions, and would reduce the number of instructions required
3099 for STMT_INFO itself.
3101 vect_determine_precisions has also determined that the result only
3102 needs min_output_precision bits. Truncating by a factor of N times
3103 requires a tree of N - 1 instructions, so if TYPE is N times wider
3104 than min_output_precision, doing the operation in TYPE and truncating
3105 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
3106 In contrast:
3108 - truncating the input to a unary operation and doing the operation
3109 in the new type requires at most N - 1 + 1 = N instructions per
3110 output vector
3112 - doing the same for a binary operation requires at most
3113 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
3115 Both unary and binary operations require fewer instructions than
3116 this if the operands were extended from a suitable truncated form.
3117 Thus there is usually nothing to lose by doing operations in
3118 min_output_precision bits, but there can be something to gain. */
3119 if (!single_use_p)
3120 min_precision = last_stmt_info->min_output_precision;
3121 else
3122 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
3124 /* Apply the minimum efficient precision we just calculated. */
3125 if (new_precision < min_precision)
3126 new_precision = min_precision;
3127 new_precision = vect_element_precision (new_precision);
3128 if (new_precision >= TYPE_PRECISION (type))
3129 return NULL;
3131 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
3133 *type_out = get_vectype_for_scalar_type (vinfo, type);
3134 if (!*type_out)
3135 return NULL;
3137 /* We've found a viable pattern. Get the new type of the operation. */
3138 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
3139 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
3141 /* If we're truncating an operation, we need to make sure that we
3142 don't introduce new undefined overflow. The codes tested here are
3143 a subset of those accepted by vect_truncatable_operation_p. */
3144 tree op_type = new_type;
3145 if (TYPE_OVERFLOW_UNDEFINED (new_type)
3146 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
3147 op_type = build_nonstandard_integer_type (new_precision, true);
3149 /* We specifically don't check here whether the target supports the
3150 new operation, since it might be something that a later pattern
3151 wants to rewrite anyway. If targets have a minimum element size
3152 for some optabs, we should pattern-match smaller ops to larger ops
3153 where beneficial. */
3154 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3155 tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
3156 if (!new_vectype || !op_vectype)
3157 return NULL;
3159 if (dump_enabled_p ())
3160 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
3161 type, new_type);
3163 /* Calculate the rhs operands for an operation on OP_TYPE. */
3164 tree ops[3] = {};
3165 for (unsigned int i = 1; i < first_op; ++i)
3166 ops[i - 1] = gimple_op (last_stmt, i);
3167 vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
3168 op_type, &unprom[0], op_vectype);
3170 /* Use the operation to produce a result of type OP_TYPE. */
3171 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
3172 gimple *pattern_stmt = gimple_build_assign (new_var, code,
3173 ops[0], ops[1], ops[2]);
3174 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3176 if (dump_enabled_p ())
3177 dump_printf_loc (MSG_NOTE, vect_location,
3178 "created pattern stmt: %G", pattern_stmt);
3180 /* Convert back to the original signedness, if OP_TYPE is different
3181 from NEW_TYPE. */
3182 if (op_type != new_type)
3183 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
3184 pattern_stmt, op_vectype);
3186 /* Promote the result to the original type. */
3187 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
3188 pattern_stmt, new_vectype);
3190 return pattern_stmt;
3193 /* Recognize the following patterns:
3195 ATYPE a; // narrower than TYPE
3196 BTYPE b; // narrower than TYPE
3198 1) Multiply high with scaling
3199 TYPE res = ((TYPE) a * (TYPE) b) >> c;
3200 Here, c is bitsize (TYPE) / 2 - 1.
3202 2) ... or also with rounding
3203 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
3204 Here, d is bitsize (TYPE) / 2 - 2.
3206 3) Normal multiply high
3207 TYPE res = ((TYPE) a * (TYPE) b) >> e;
3208 Here, e is bitsize (TYPE) / 2.
3210 where only the bottom half of res is used. */
3212 static gimple *
3213 vect_recog_mulhs_pattern (vec_info *vinfo,
3214 stmt_vec_info last_stmt_info, tree *type_out)
3216 /* Check for a right shift. */
3217 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3218 if (!last_stmt
3219 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
3220 return NULL;
3222 /* Check that the shift result is wider than the users of the
3223 result need (i.e. that narrowing would be a natural choice). */
3224 tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
3225 unsigned int target_precision
3226 = vect_element_precision (last_stmt_info->min_output_precision);
3227 if (!INTEGRAL_TYPE_P (lhs_type)
3228 || target_precision >= TYPE_PRECISION (lhs_type))
3229 return NULL;
3231 /* Look through any change in sign on the outer shift input. */
3232 vect_unpromoted_value unprom_rshift_input;
3233 tree rshift_input = vect_look_through_possible_promotion
3234 (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
3235 if (!rshift_input
3236 || TYPE_PRECISION (TREE_TYPE (rshift_input))
3237 != TYPE_PRECISION (lhs_type))
3238 return NULL;
3240 /* Get the definition of the shift input. */
3241 stmt_vec_info rshift_input_stmt_info
3242 = vect_get_internal_def (vinfo, rshift_input);
3243 if (!rshift_input_stmt_info)
3244 return NULL;
3245 gassign *rshift_input_stmt
3246 = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
3247 if (!rshift_input_stmt)
3248 return NULL;
3250 stmt_vec_info mulh_stmt_info;
3251 tree scale_term;
3252 bool rounding_p = false;
3254 /* Check for the presence of the rounding term. */
3255 if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
3257 /* Check that the outer shift was by 1. */
3258 if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
3259 return NULL;
3261 /* Check that the second operand of the PLUS_EXPR is 1. */
3262 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
3263 return NULL;
3265 /* Look through any change in sign on the addition input. */
3266 vect_unpromoted_value unprom_plus_input;
3267 tree plus_input = vect_look_through_possible_promotion
3268 (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
3269 if (!plus_input
3270 || TYPE_PRECISION (TREE_TYPE (plus_input))
3271 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
3272 return NULL;
3274 /* Get the definition of the multiply-high-scale part. */
3275 stmt_vec_info plus_input_stmt_info
3276 = vect_get_internal_def (vinfo, plus_input);
3277 if (!plus_input_stmt_info)
3278 return NULL;
3279 gassign *plus_input_stmt
3280 = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
3281 if (!plus_input_stmt
3282 || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
3283 return NULL;
3285 /* Look through any change in sign on the scaling input. */
3286 vect_unpromoted_value unprom_scale_input;
3287 tree scale_input = vect_look_through_possible_promotion
3288 (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
3289 if (!scale_input
3290 || TYPE_PRECISION (TREE_TYPE (scale_input))
3291 != TYPE_PRECISION (TREE_TYPE (plus_input)))
3292 return NULL;
3294 /* Get the definition of the multiply-high part. */
3295 mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
3296 if (!mulh_stmt_info)
3297 return NULL;
3299 /* Get the scaling term. */
3300 scale_term = gimple_assign_rhs2 (plus_input_stmt);
3301 rounding_p = true;
3303 else
3305 mulh_stmt_info = rshift_input_stmt_info;
3306 scale_term = gimple_assign_rhs2 (last_stmt);
3309 /* Check that the scaling factor is constant. */
3310 if (TREE_CODE (scale_term) != INTEGER_CST)
3311 return NULL;
3313 /* Check whether the scaling input term can be seen as two widened
3314 inputs multiplied together. */
3315 vect_unpromoted_value unprom_mult[2];
3316 tree new_type;
3317 unsigned int nops
3318 = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
3319 false, 2, unprom_mult, &new_type);
3320 if (nops != 2)
3321 return NULL;
3323 /* Adjust output precision. */
3324 if (TYPE_PRECISION (new_type) < target_precision)
3325 new_type = build_nonstandard_integer_type
3326 (target_precision, TYPE_UNSIGNED (new_type));
3328 unsigned mult_precision = TYPE_PRECISION (new_type);
3329 internal_fn ifn;
3330 /* Check that the scaling factor is expected. Instead of
3331 target_precision, we should use the one that we actually
3332 use for internal function. */
3333 if (rounding_p)
3335 /* Check pattern 2). */
3336 if (wi::to_widest (scale_term) + mult_precision + 2
3337 != TYPE_PRECISION (lhs_type))
3338 return NULL;
3340 ifn = IFN_MULHRS;
3342 else
3344 /* Check for pattern 1). */
3345 if (wi::to_widest (scale_term) + mult_precision + 1
3346 == TYPE_PRECISION (lhs_type))
3347 ifn = IFN_MULHS;
3348 /* Check for pattern 3). */
3349 else if (wi::to_widest (scale_term) + mult_precision
3350 == TYPE_PRECISION (lhs_type))
3351 ifn = IFN_MULH;
3352 else
3353 return NULL;
3356 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
3358 /* Check for target support. */
3359 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3360 if (!new_vectype
3361 || !direct_internal_fn_supported_p
3362 (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
3363 return NULL;
3365 /* The IR requires a valid vector type for the cast result, even though
3366 it's likely to be discarded. */
3367 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
3368 if (!*type_out)
3369 return NULL;
3371 /* Generate the IFN_MULHRS call. */
3372 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
3373 tree new_ops[2];
3374 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
3375 unprom_mult, new_vectype);
3376 gcall *mulhrs_stmt
3377 = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
3378 gimple_call_set_lhs (mulhrs_stmt, new_var);
3379 gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
3381 if (dump_enabled_p ())
3382 dump_printf_loc (MSG_NOTE, vect_location,
3383 "created pattern stmt: %G", (gimple *) mulhrs_stmt);
3385 return vect_convert_output (vinfo, last_stmt_info, lhs_type,
3386 mulhrs_stmt, new_vectype);
3389 /* Recognize the patterns:
3391 ATYPE a; // narrower than TYPE
3392 BTYPE b; // narrower than TYPE
3393 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
3394 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
3396 where only the bottom half of avg is used. Try to transform them into:
3398 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
3399 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
3401 followed by:
3403 TYPE avg = (TYPE) avg';
3405 where NTYPE is no wider than half of TYPE. Since only the bottom half
3406 of avg is used, all or part of the cast of avg' should become redundant.
3408 If there is no target support available, generate code to distribute rshift
3409 over plus and add a carry. */
3411 static gimple *
3412 vect_recog_average_pattern (vec_info *vinfo,
3413 stmt_vec_info last_stmt_info, tree *type_out)
3415 /* Check for a shift right by one bit. */
3416 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3417 if (!last_stmt
3418 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
3419 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
3420 return NULL;
3422 /* Check that the shift result is wider than the users of the
3423 result need (i.e. that narrowing would be a natural choice). */
3424 tree lhs = gimple_assign_lhs (last_stmt);
3425 tree type = TREE_TYPE (lhs);
3426 unsigned int target_precision
3427 = vect_element_precision (last_stmt_info->min_output_precision);
3428 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
3429 return NULL;
3431 /* Look through any change in sign on the shift input. */
3432 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
3433 vect_unpromoted_value unprom_plus;
3434 rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
3435 &unprom_plus);
3436 if (!rshift_rhs
3437 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
3438 return NULL;
3440 /* Get the definition of the shift input. */
3441 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
3442 if (!plus_stmt_info)
3443 return NULL;
3445 /* Check whether the shift input can be seen as a tree of additions on
3446 2 or 3 widened inputs.
3448 Note that the pattern should be a win even if the result of one or
3449 more additions is reused elsewhere: if the pattern matches, we'd be
3450 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
3451 internal_fn ifn = IFN_AVG_FLOOR;
3452 vect_unpromoted_value unprom[3];
3453 tree new_type;
3454 unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
3455 IFN_VEC_WIDEN_PLUS, false, 3,
3456 unprom, &new_type);
3457 if (nops == 0)
3458 return NULL;
3459 if (nops == 3)
3461 /* Check that one operand is 1. */
3462 unsigned int i;
3463 for (i = 0; i < 3; ++i)
3464 if (integer_onep (unprom[i].op))
3465 break;
3466 if (i == 3)
3467 return NULL;
3468 /* Throw away the 1 operand and keep the other two. */
3469 if (i < 2)
3470 unprom[i] = unprom[2];
3471 ifn = IFN_AVG_CEIL;
3474 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
3476 /* We know that:
3478 (a) the operation can be viewed as:
3480 TYPE widened0 = (TYPE) UNPROM[0];
3481 TYPE widened1 = (TYPE) UNPROM[1];
3482 TYPE tmp1 = widened0 + widened1 {+ 1};
3483 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
3485 (b) the first two statements are equivalent to:
3487 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
3488 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
3490 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
3491 where sensible;
3493 (d) all the operations can be performed correctly at twice the width of
3494 NEW_TYPE, due to the nature of the average operation; and
3496 (e) users of the result of the right shift need only TARGET_PRECISION
3497 bits, where TARGET_PRECISION is no more than half of TYPE's
3498 precision.
3500 Under these circumstances, the only situation in which NEW_TYPE
3501 could be narrower than TARGET_PRECISION is if widened0, widened1
3502 and an addition result are all used more than once. Thus we can
3503 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
3504 as "free", whereas widening the result of the average instruction
3505 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
3506 therefore better not to go narrower than TARGET_PRECISION. */
3507 if (TYPE_PRECISION (new_type) < target_precision)
3508 new_type = build_nonstandard_integer_type (target_precision,
3509 TYPE_UNSIGNED (new_type));
3511 /* Check for target support. */
3512 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3513 if (!new_vectype)
3514 return NULL;
3516 bool fallback_p = false;
3518 if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
3520 else if (TYPE_UNSIGNED (new_type)
3521 && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
3522 && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
3523 && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
3524 && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
3525 fallback_p = true;
3526 else
3527 return NULL;
3529 /* The IR requires a valid vector type for the cast result, even though
3530 it's likely to be discarded. */
3531 *type_out = get_vectype_for_scalar_type (vinfo, type);
3532 if (!*type_out)
3533 return NULL;
3535 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
3536 tree new_ops[2];
3537 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
3538 unprom, new_vectype);
3540 if (fallback_p)
3542 /* As a fallback, generate code for following sequence:
3544 shifted_op0 = new_ops[0] >> 1;
3545 shifted_op1 = new_ops[1] >> 1;
3546 sum_of_shifted = shifted_op0 + shifted_op1;
3547 unmasked_carry = new_ops[0] and/or new_ops[1];
3548 carry = unmasked_carry & 1;
3549 new_var = sum_of_shifted + carry;
3552 tree one_cst = build_one_cst (new_type);
3553 gassign *g;
3555 tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
3556 g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
3557 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3559 tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
3560 g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
3561 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3563 tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
3564 g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
3565 shifted_op0, shifted_op1);
3566 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3568 tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
3569 tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
3570 g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
3571 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3573 tree carry = vect_recog_temp_ssa_var (new_type, NULL);
3574 g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
3575 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3577 g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
3578 return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
3581 /* Generate the IFN_AVG* call. */
3582 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
3583 new_ops[1]);
3584 gimple_call_set_lhs (average_stmt, new_var);
3585 gimple_set_location (average_stmt, gimple_location (last_stmt));
3587 if (dump_enabled_p ())
3588 dump_printf_loc (MSG_NOTE, vect_location,
3589 "created pattern stmt: %G", (gimple *) average_stmt);
3591 return vect_convert_output (vinfo, last_stmt_info,
3592 type, average_stmt, new_vectype);
3595 /* Recognize cases in which the input to a cast is wider than its
3596 output, and the input is fed by a widening operation. Fold this
3597 by removing the unnecessary intermediate widening. E.g.:
3599 unsigned char a;
3600 unsigned int b = (unsigned int) a;
3601 unsigned short c = (unsigned short) b;
3605 unsigned short c = (unsigned short) a;
3607 Although this is rare in input IR, it is an expected side-effect
3608 of the over-widening pattern above.
3610 This is beneficial also for integer-to-float conversions, if the
3611 widened integer has more bits than the float, and if the unwidened
3612 input doesn't. */
3614 static gimple *
3615 vect_recog_cast_forwprop_pattern (vec_info *vinfo,
3616 stmt_vec_info last_stmt_info, tree *type_out)
3618 /* Check for a cast, including an integer-to-float conversion. */
3619 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3620 if (!last_stmt)
3621 return NULL;
3622 tree_code code = gimple_assign_rhs_code (last_stmt);
3623 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
3624 return NULL;
3626 /* Make sure that the rhs is a scalar with a natural bitsize. */
3627 tree lhs = gimple_assign_lhs (last_stmt);
3628 if (!lhs)
3629 return NULL;
3630 tree lhs_type = TREE_TYPE (lhs);
3631 scalar_mode lhs_mode;
3632 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
3633 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
3634 return NULL;
3636 /* Check for a narrowing operation (from a vector point of view). */
3637 tree rhs = gimple_assign_rhs1 (last_stmt);
3638 tree rhs_type = TREE_TYPE (rhs);
3639 if (!INTEGRAL_TYPE_P (rhs_type)
3640 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
3641 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
3642 return NULL;
3644 /* Try to find an unpromoted input. */
3645 vect_unpromoted_value unprom;
3646 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
3647 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
3648 return NULL;
3650 /* If the bits above RHS_TYPE matter, make sure that they're the
3651 same when extending from UNPROM as they are when extending from RHS. */
3652 if (!INTEGRAL_TYPE_P (lhs_type)
3653 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
3654 return NULL;
3656 /* We can get the same result by casting UNPROM directly, to avoid
3657 the unnecessary widening and narrowing. */
3658 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
3660 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
3661 if (!*type_out)
3662 return NULL;
3664 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
3665 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
3666 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3668 return pattern_stmt;
3671 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
3672 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
3674 static gimple *
3675 vect_recog_widen_shift_pattern (vec_info *vinfo,
3676 stmt_vec_info last_stmt_info, tree *type_out)
3678 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
3679 LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
3680 "vect_recog_widen_shift_pattern");
3683 /* Detect a rotate pattern wouldn't be otherwise vectorized:
3685 type a_t, b_t, c_t;
3687 S0 a_t = b_t r<< c_t;
3689 Input/Output:
3691 * STMT_VINFO: The stmt from which the pattern search begins,
3692 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
3693 with a sequence:
3695 S1 d_t = -c_t;
3696 S2 e_t = d_t & (B - 1);
3697 S3 f_t = b_t << c_t;
3698 S4 g_t = b_t >> e_t;
3699 S0 a_t = f_t | g_t;
3701 where B is element bitsize of type.
3703 Output:
3705 * TYPE_OUT: The type of the output of this pattern.
3707 * Return value: A new stmt that will be used to replace the rotate
3708 S0 stmt. */
3710 static gimple *
3711 vect_recog_rotate_pattern (vec_info *vinfo,
3712 stmt_vec_info stmt_vinfo, tree *type_out)
3714 gimple *last_stmt = stmt_vinfo->stmt;
3715 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
3716 gimple *pattern_stmt, *def_stmt;
3717 enum tree_code rhs_code;
3718 enum vect_def_type dt;
3719 optab optab1, optab2;
3720 edge ext_def = NULL;
3721 bool bswap16_p = false;
3723 if (is_gimple_assign (last_stmt))
3725 rhs_code = gimple_assign_rhs_code (last_stmt);
3726 switch (rhs_code)
3728 case LROTATE_EXPR:
3729 case RROTATE_EXPR:
3730 break;
3731 default:
3732 return NULL;
3735 lhs = gimple_assign_lhs (last_stmt);
3736 oprnd0 = gimple_assign_rhs1 (last_stmt);
3737 type = TREE_TYPE (oprnd0);
3738 oprnd1 = gimple_assign_rhs2 (last_stmt);
3740 else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
3742 /* __builtin_bswap16 (x) is another form of x r>> 8.
3743 The vectorizer has bswap support, but only if the argument isn't
3744 promoted. */
3745 lhs = gimple_call_lhs (last_stmt);
3746 oprnd0 = gimple_call_arg (last_stmt, 0);
3747 type = TREE_TYPE (oprnd0);
3748 if (!lhs
3749 || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
3750 || TYPE_PRECISION (type) <= 16
3751 || TREE_CODE (oprnd0) != SSA_NAME
3752 || BITS_PER_UNIT != 8)
3753 return NULL;
3755 stmt_vec_info def_stmt_info;
3756 if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
3757 return NULL;
3759 if (dt != vect_internal_def)
3760 return NULL;
3762 if (gimple_assign_cast_p (def_stmt))
3764 def = gimple_assign_rhs1 (def_stmt);
3765 if (INTEGRAL_TYPE_P (TREE_TYPE (def))
3766 && TYPE_PRECISION (TREE_TYPE (def)) == 16)
3767 oprnd0 = def;
3770 type = TREE_TYPE (lhs);
3771 vectype = get_vectype_for_scalar_type (vinfo, type);
3772 if (vectype == NULL_TREE)
3773 return NULL;
3775 if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
3777 /* The encoding uses one stepped pattern for each byte in the
3778 16-bit word. */
3779 vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
3780 for (unsigned i = 0; i < 3; ++i)
3781 for (unsigned j = 0; j < 2; ++j)
3782 elts.quick_push ((i + 1) * 2 - j - 1);
3784 vec_perm_indices indices (elts, 1,
3785 TYPE_VECTOR_SUBPARTS (char_vectype));
3786 machine_mode vmode = TYPE_MODE (char_vectype);
3787 if (can_vec_perm_const_p (vmode, vmode, indices))
3789 /* vectorizable_bswap can handle the __builtin_bswap16 if we
3790 undo the argument promotion. */
3791 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
3793 def = vect_recog_temp_ssa_var (type, NULL);
3794 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3795 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3796 oprnd0 = def;
3799 /* Pattern detected. */
3800 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3802 *type_out = vectype;
3804 /* Pattern supported. Create a stmt to be used to replace the
3805 pattern, with the unpromoted argument. */
3806 var = vect_recog_temp_ssa_var (type, NULL);
3807 pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
3808 1, oprnd0);
3809 gimple_call_set_lhs (pattern_stmt, var);
3810 gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
3811 gimple_call_fntype (last_stmt));
3812 return pattern_stmt;
3816 oprnd1 = build_int_cst (integer_type_node, 8);
3817 rhs_code = LROTATE_EXPR;
3818 bswap16_p = true;
3820 else
3821 return NULL;
3823 if (TREE_CODE (oprnd0) != SSA_NAME
3824 || !INTEGRAL_TYPE_P (type)
3825 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type))
3826 return NULL;
3828 stmt_vec_info def_stmt_info;
3829 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
3830 return NULL;
3832 if (dt != vect_internal_def
3833 && dt != vect_constant_def
3834 && dt != vect_external_def)
3835 return NULL;
3837 vectype = get_vectype_for_scalar_type (vinfo, type);
3838 if (vectype == NULL_TREE)
3839 return NULL;
3841 /* If vector/vector or vector/scalar rotate is supported by the target,
3842 don't do anything here. */
3843 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
3844 if (optab1
3845 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3847 use_rotate:
3848 if (bswap16_p)
3850 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
3852 def = vect_recog_temp_ssa_var (type, NULL);
3853 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3854 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3855 oprnd0 = def;
3858 /* Pattern detected. */
3859 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3861 *type_out = vectype;
3863 /* Pattern supported. Create a stmt to be used to replace the
3864 pattern. */
3865 var = vect_recog_temp_ssa_var (type, NULL);
3866 pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
3867 oprnd1);
3868 return pattern_stmt;
3870 return NULL;
3873 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
3875 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
3876 if (optab2
3877 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3878 goto use_rotate;
3881 tree utype = unsigned_type_for (type);
3882 tree uvectype = get_vectype_for_scalar_type (vinfo, utype);
3883 if (!uvectype)
3884 return NULL;
3886 /* If vector/vector or vector/scalar shifts aren't supported by the target,
3887 don't do anything here either. */
3888 optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_vector);
3889 optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_vector);
3890 if (!optab1
3891 || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
3892 || !optab2
3893 || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
3895 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
3896 return NULL;
3897 optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_scalar);
3898 optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_scalar);
3899 if (!optab1
3900 || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
3901 || !optab2
3902 || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
3903 return NULL;
3906 *type_out = vectype;
3908 if (!useless_type_conversion_p (utype, TREE_TYPE (oprnd0)))
3910 def = vect_recog_temp_ssa_var (utype, NULL);
3911 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3912 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3913 oprnd0 = def;
3916 if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
3917 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
3919 def = NULL_TREE;
3920 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (utype);
3921 if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
3922 def = oprnd1;
3923 else if (def_stmt && gimple_assign_cast_p (def_stmt))
3925 tree rhs1 = gimple_assign_rhs1 (def_stmt);
3926 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
3927 && TYPE_PRECISION (TREE_TYPE (rhs1))
3928 == TYPE_PRECISION (type))
3929 def = rhs1;
3932 if (def == NULL_TREE)
3934 def = vect_recog_temp_ssa_var (utype, NULL);
3935 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
3936 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3938 stype = TREE_TYPE (def);
3940 if (TREE_CODE (def) == INTEGER_CST)
3942 if (!tree_fits_uhwi_p (def)
3943 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
3944 || integer_zerop (def))
3945 return NULL;
3946 def2 = build_int_cst (stype,
3947 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
3949 else
3951 tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
3953 if (vecstype == NULL_TREE)
3954 return NULL;
3955 def2 = vect_recog_temp_ssa_var (stype, NULL);
3956 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
3957 if (ext_def)
3959 basic_block new_bb
3960 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
3961 gcc_assert (!new_bb);
3963 else
3964 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
3966 def2 = vect_recog_temp_ssa_var (stype, NULL);
3967 tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
3968 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
3969 gimple_assign_lhs (def_stmt), mask);
3970 if (ext_def)
3972 basic_block new_bb
3973 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
3974 gcc_assert (!new_bb);
3976 else
3977 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
3980 var1 = vect_recog_temp_ssa_var (utype, NULL);
3981 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
3982 ? LSHIFT_EXPR : RSHIFT_EXPR,
3983 oprnd0, def);
3984 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3986 var2 = vect_recog_temp_ssa_var (utype, NULL);
3987 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
3988 ? RSHIFT_EXPR : LSHIFT_EXPR,
3989 oprnd0, def2);
3990 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3992 /* Pattern detected. */
3993 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3995 /* Pattern supported. Create a stmt to be used to replace the pattern. */
3996 var = vect_recog_temp_ssa_var (utype, NULL);
3997 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
3999 if (!useless_type_conversion_p (type, utype))
4001 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, uvectype);
4002 tree result = vect_recog_temp_ssa_var (type, NULL);
4003 pattern_stmt = gimple_build_assign (result, NOP_EXPR, var);
4005 return pattern_stmt;
4008 /* Detect a vector by vector shift pattern that wouldn't be otherwise
4009 vectorized:
4011 type a_t;
4012 TYPE b_T, res_T;
4014 S1 a_t = ;
4015 S2 b_T = ;
4016 S3 res_T = b_T op a_t;
4018 where type 'TYPE' is a type with different size than 'type',
4019 and op is <<, >> or rotate.
4021 Also detect cases:
4023 type a_t;
4024 TYPE b_T, c_T, res_T;
4026 S0 c_T = ;
4027 S1 a_t = (type) c_T;
4028 S2 b_T = ;
4029 S3 res_T = b_T op a_t;
4031 Input/Output:
4033 * STMT_VINFO: The stmt from which the pattern search begins,
4034 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
4035 with a shift/rotate which has same type on both operands, in the
4036 second case just b_T op c_T, in the first case with added cast
4037 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
4039 Output:
4041 * TYPE_OUT: The type of the output of this pattern.
4043 * Return value: A new stmt that will be used to replace the shift/rotate
4044 S3 stmt. */
4046 static gimple *
4047 vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
4048 stmt_vec_info stmt_vinfo,
4049 tree *type_out)
4051 gimple *last_stmt = stmt_vinfo->stmt;
4052 tree oprnd0, oprnd1, lhs, var;
4053 gimple *pattern_stmt;
4054 enum tree_code rhs_code;
4056 if (!is_gimple_assign (last_stmt))
4057 return NULL;
4059 rhs_code = gimple_assign_rhs_code (last_stmt);
4060 switch (rhs_code)
4062 case LSHIFT_EXPR:
4063 case RSHIFT_EXPR:
4064 case LROTATE_EXPR:
4065 case RROTATE_EXPR:
4066 break;
4067 default:
4068 return NULL;
4071 lhs = gimple_assign_lhs (last_stmt);
4072 oprnd0 = gimple_assign_rhs1 (last_stmt);
4073 oprnd1 = gimple_assign_rhs2 (last_stmt);
4074 if (TREE_CODE (oprnd0) != SSA_NAME
4075 || TREE_CODE (oprnd1) != SSA_NAME
4076 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
4077 || !INTEGRAL_TYPE_P (TREE_TYPE (oprnd0))
4078 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
4079 || TYPE_PRECISION (TREE_TYPE (lhs))
4080 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
4081 return NULL;
4083 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
4084 if (!def_vinfo)
4085 return NULL;
4087 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
4088 if (*type_out == NULL_TREE)
4089 return NULL;
4091 tree def = NULL_TREE;
4092 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
4093 if (def_stmt && gimple_assign_cast_p (def_stmt))
4095 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4096 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
4097 && TYPE_PRECISION (TREE_TYPE (rhs1))
4098 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
4100 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
4101 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
4102 def = rhs1;
4103 else
4105 tree mask
4106 = build_low_bits_mask (TREE_TYPE (rhs1),
4107 TYPE_PRECISION (TREE_TYPE (oprnd1)));
4108 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4109 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
4110 tree vecstype = get_vectype_for_scalar_type (vinfo,
4111 TREE_TYPE (rhs1));
4112 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
4117 if (def == NULL_TREE)
4119 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
4120 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
4121 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4124 /* Pattern detected. */
4125 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
4127 /* Pattern supported. Create a stmt to be used to replace the pattern. */
4128 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
4129 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
4131 return pattern_stmt;
4134 /* Return true iff the target has a vector optab implementing the operation
4135 CODE on type VECTYPE. */
4137 static bool
4138 target_has_vecop_for_code (tree_code code, tree vectype)
4140 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
4141 return voptab
4142 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
4145 /* Verify that the target has optabs of VECTYPE to perform all the steps
4146 needed by the multiplication-by-immediate synthesis algorithm described by
4147 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
4148 present. Return true iff the target supports all the steps. */
4150 static bool
4151 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
4152 tree vectype, bool synth_shift_p)
4154 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
4155 return false;
4157 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
4158 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
4160 if (var == negate_variant
4161 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
4162 return false;
4164 /* If we must synthesize shifts with additions make sure that vector
4165 addition is available. */
4166 if ((var == add_variant || synth_shift_p) && !supports_vplus)
4167 return false;
4169 for (int i = 1; i < alg->ops; i++)
4171 switch (alg->op[i])
4173 case alg_shift:
4174 break;
4175 case alg_add_t_m2:
4176 case alg_add_t2_m:
4177 case alg_add_factor:
4178 if (!supports_vplus)
4179 return false;
4180 break;
4181 case alg_sub_t_m2:
4182 case alg_sub_t2_m:
4183 case alg_sub_factor:
4184 if (!supports_vminus)
4185 return false;
4186 break;
4187 case alg_unknown:
4188 case alg_m:
4189 case alg_zero:
4190 case alg_impossible:
4191 return false;
4192 default:
4193 gcc_unreachable ();
4197 return true;
4200 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
4201 putting the final result in DEST. Append all statements but the last into
4202 VINFO. Return the last statement. */
4204 static gimple *
4205 synth_lshift_by_additions (vec_info *vinfo,
4206 tree dest, tree op, HOST_WIDE_INT amnt,
4207 stmt_vec_info stmt_info)
4209 HOST_WIDE_INT i;
4210 tree itype = TREE_TYPE (op);
4211 tree prev_res = op;
4212 gcc_assert (amnt >= 0);
4213 for (i = 0; i < amnt; i++)
4215 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
4216 : dest;
4217 gimple *stmt
4218 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
4219 prev_res = tmp_var;
4220 if (i < amnt - 1)
4221 append_pattern_def_seq (vinfo, stmt_info, stmt);
4222 else
4223 return stmt;
4225 gcc_unreachable ();
4226 return NULL;
4229 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
4230 CODE to operands OP1 and OP2, creating a new temporary SSA var in
4231 the process if necessary. Append the resulting assignment statements
4232 to the sequence in STMT_VINFO. Return the SSA variable that holds the
4233 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
4234 left shifts using additions. */
4236 static tree
4237 apply_binop_and_append_stmt (vec_info *vinfo,
4238 tree_code code, tree op1, tree op2,
4239 stmt_vec_info stmt_vinfo, bool synth_shift_p)
4241 if (integer_zerop (op2)
4242 && (code == LSHIFT_EXPR
4243 || code == PLUS_EXPR))
4245 gcc_assert (TREE_CODE (op1) == SSA_NAME);
4246 return op1;
4249 gimple *stmt;
4250 tree itype = TREE_TYPE (op1);
4251 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
4253 if (code == LSHIFT_EXPR
4254 && synth_shift_p)
4256 stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
4257 TREE_INT_CST_LOW (op2), stmt_vinfo);
4258 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4259 return tmp_var;
4262 stmt = gimple_build_assign (tmp_var, code, op1, op2);
4263 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4264 return tmp_var;
4267 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
4268 and simple arithmetic operations to be vectorized. Record the statements
4269 produced in STMT_VINFO and return the last statement in the sequence or
4270 NULL if it's not possible to synthesize such a multiplication.
4271 This function mirrors the behavior of expand_mult_const in expmed.cc but
4272 works on tree-ssa form. */
4274 static gimple *
4275 vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
4276 stmt_vec_info stmt_vinfo)
4278 tree itype = TREE_TYPE (op);
4279 machine_mode mode = TYPE_MODE (itype);
4280 struct algorithm alg;
4281 mult_variant variant;
4282 if (!tree_fits_shwi_p (val))
4283 return NULL;
4285 /* Multiplication synthesis by shifts, adds and subs can introduce
4286 signed overflow where the original operation didn't. Perform the
4287 operations on an unsigned type and cast back to avoid this.
4288 In the future we may want to relax this for synthesis algorithms
4289 that we can prove do not cause unexpected overflow. */
4290 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
4292 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
4293 tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
4294 if (!vectype)
4295 return NULL;
4297 /* Targets that don't support vector shifts but support vector additions
4298 can synthesize shifts that way. */
4299 bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
4301 HOST_WIDE_INT hwval = tree_to_shwi (val);
4302 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
4303 The vectorizer's benefit analysis will decide whether it's beneficial
4304 to do this. */
4305 bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
4306 ? TYPE_MODE (vectype) : mode,
4307 hwval, &alg, &variant, MAX_COST);
4308 if (!possible)
4309 return NULL;
4311 if (!target_supports_mult_synth_alg (&alg, variant, vectype, synth_shift_p))
4312 return NULL;
4314 tree accumulator;
4316 /* Clear out the sequence of statements so we can populate it below. */
4317 gimple *stmt = NULL;
4319 if (cast_to_unsigned_p)
4321 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
4322 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
4323 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4324 op = tmp_op;
4327 if (alg.op[0] == alg_zero)
4328 accumulator = build_int_cst (multtype, 0);
4329 else
4330 accumulator = op;
4332 bool needs_fixup = (variant == negate_variant)
4333 || (variant == add_variant);
4335 for (int i = 1; i < alg.ops; i++)
4337 tree shft_log = build_int_cst (multtype, alg.log[i]);
4338 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4339 tree tmp_var = NULL_TREE;
4341 switch (alg.op[i])
4343 case alg_shift:
4344 if (synth_shift_p)
4345 stmt
4346 = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
4347 alg.log[i], stmt_vinfo);
4348 else
4349 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
4350 shft_log);
4351 break;
4352 case alg_add_t_m2:
4353 tmp_var
4354 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
4355 stmt_vinfo, synth_shift_p);
4356 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
4357 tmp_var);
4358 break;
4359 case alg_sub_t_m2:
4360 tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
4361 shft_log, stmt_vinfo,
4362 synth_shift_p);
4363 /* In some algorithms the first step involves zeroing the
4364 accumulator. If subtracting from such an accumulator
4365 just emit the negation directly. */
4366 if (integer_zerop (accumulator))
4367 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
4368 else
4369 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
4370 tmp_var);
4371 break;
4372 case alg_add_t2_m:
4373 tmp_var
4374 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4375 shft_log, stmt_vinfo, synth_shift_p);
4376 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
4377 break;
4378 case alg_sub_t2_m:
4379 tmp_var
4380 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4381 shft_log, stmt_vinfo, synth_shift_p);
4382 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
4383 break;
4384 case alg_add_factor:
4385 tmp_var
4386 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4387 shft_log, stmt_vinfo, synth_shift_p);
4388 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
4389 tmp_var);
4390 break;
4391 case alg_sub_factor:
4392 tmp_var
4393 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4394 shft_log, stmt_vinfo, synth_shift_p);
4395 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
4396 accumulator);
4397 break;
4398 default:
4399 gcc_unreachable ();
4401 /* We don't want to append the last stmt in the sequence to stmt_vinfo
4402 but rather return it directly. */
4404 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
4405 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4406 accumulator = accum_tmp;
4408 if (variant == negate_variant)
4410 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4411 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
4412 accumulator = accum_tmp;
4413 if (cast_to_unsigned_p)
4414 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4416 else if (variant == add_variant)
4418 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4419 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
4420 accumulator = accum_tmp;
4421 if (cast_to_unsigned_p)
4422 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4424 /* Move back to a signed if needed. */
4425 if (cast_to_unsigned_p)
4427 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
4428 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
4431 return stmt;
4434 /* Detect multiplication by constant and convert it into a sequence of
4435 shifts and additions, subtractions, negations. We reuse the
4436 choose_mult_variant algorithms from expmed.cc
4438 Input/Output:
4440 STMT_VINFO: The stmt from which the pattern search begins,
4441 i.e. the mult stmt.
4443 Output:
4445 * TYPE_OUT: The type of the output of this pattern.
4447 * Return value: A new stmt that will be used to replace
4448 the multiplication. */
4450 static gimple *
4451 vect_recog_mult_pattern (vec_info *vinfo,
4452 stmt_vec_info stmt_vinfo, tree *type_out)
4454 gimple *last_stmt = stmt_vinfo->stmt;
4455 tree oprnd0, oprnd1, vectype, itype;
4456 gimple *pattern_stmt;
4458 if (!is_gimple_assign (last_stmt))
4459 return NULL;
4461 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
4462 return NULL;
4464 oprnd0 = gimple_assign_rhs1 (last_stmt);
4465 oprnd1 = gimple_assign_rhs2 (last_stmt);
4466 itype = TREE_TYPE (oprnd0);
4468 if (TREE_CODE (oprnd0) != SSA_NAME
4469 || TREE_CODE (oprnd1) != INTEGER_CST
4470 || !INTEGRAL_TYPE_P (itype)
4471 || !type_has_mode_precision_p (itype))
4472 return NULL;
4474 vectype = get_vectype_for_scalar_type (vinfo, itype);
4475 if (vectype == NULL_TREE)
4476 return NULL;
4478 /* If the target can handle vectorized multiplication natively,
4479 don't attempt to optimize this. */
4480 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
4481 if (mul_optab != unknown_optab)
4483 machine_mode vec_mode = TYPE_MODE (vectype);
4484 int icode = (int) optab_handler (mul_optab, vec_mode);
4485 if (icode != CODE_FOR_nothing)
4486 return NULL;
4489 pattern_stmt = vect_synth_mult_by_constant (vinfo,
4490 oprnd0, oprnd1, stmt_vinfo);
4491 if (!pattern_stmt)
4492 return NULL;
4494 /* Pattern detected. */
4495 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
4497 *type_out = vectype;
4499 return pattern_stmt;
4502 /* Detect a signed division by a constant that wouldn't be
4503 otherwise vectorized:
4505 type a_t, b_t;
4507 S1 a_t = b_t / N;
4509 where type 'type' is an integral type and N is a constant.
4511 Similarly handle modulo by a constant:
4513 S4 a_t = b_t % N;
4515 Input/Output:
4517 * STMT_VINFO: The stmt from which the pattern search begins,
4518 i.e. the division stmt. S1 is replaced by if N is a power
4519 of two constant and type is signed:
4520 S3 y_t = b_t < 0 ? N - 1 : 0;
4521 S2 x_t = b_t + y_t;
4522 S1' a_t = x_t >> log2 (N);
4524 S4 is replaced if N is a power of two constant and
4525 type is signed by (where *_T temporaries have unsigned type):
4526 S9 y_T = b_t < 0 ? -1U : 0U;
4527 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
4528 S7 z_t = (type) z_T;
4529 S6 w_t = b_t + z_t;
4530 S5 x_t = w_t & (N - 1);
4531 S4' a_t = x_t - z_t;
4533 Output:
4535 * TYPE_OUT: The type of the output of this pattern.
4537 * Return value: A new stmt that will be used to replace the division
4538 S1 or modulo S4 stmt. */
4540 static gimple *
4541 vect_recog_divmod_pattern (vec_info *vinfo,
4542 stmt_vec_info stmt_vinfo, tree *type_out)
4544 gimple *last_stmt = stmt_vinfo->stmt;
4545 tree oprnd0, oprnd1, vectype, itype, cond;
4546 gimple *pattern_stmt, *def_stmt;
4547 enum tree_code rhs_code;
4548 optab optab;
4549 tree q, cst;
4550 int dummy_int, prec;
4552 if (!is_gimple_assign (last_stmt))
4553 return NULL;
4555 rhs_code = gimple_assign_rhs_code (last_stmt);
4556 switch (rhs_code)
4558 case TRUNC_DIV_EXPR:
4559 case EXACT_DIV_EXPR:
4560 case TRUNC_MOD_EXPR:
4561 break;
4562 default:
4563 return NULL;
4566 oprnd0 = gimple_assign_rhs1 (last_stmt);
4567 oprnd1 = gimple_assign_rhs2 (last_stmt);
4568 itype = TREE_TYPE (oprnd0);
4569 if (TREE_CODE (oprnd0) != SSA_NAME
4570 || TREE_CODE (oprnd1) != INTEGER_CST
4571 || TREE_CODE (itype) != INTEGER_TYPE
4572 || !type_has_mode_precision_p (itype))
4573 return NULL;
4575 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
4576 vectype = get_vectype_for_scalar_type (vinfo, itype);
4577 if (vectype == NULL_TREE)
4578 return NULL;
4580 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
4582 /* If the target can handle vectorized division or modulo natively,
4583 don't attempt to optimize this, since native division is likely
4584 to give smaller code. */
4585 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
4586 if (optab != unknown_optab)
4588 machine_mode vec_mode = TYPE_MODE (vectype);
4589 int icode = (int) optab_handler (optab, vec_mode);
4590 if (icode != CODE_FOR_nothing)
4591 return NULL;
4595 prec = TYPE_PRECISION (itype);
4596 if (integer_pow2p (oprnd1))
4598 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
4599 return NULL;
4601 /* Pattern detected. */
4602 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
4604 *type_out = vectype;
4606 /* Check if the target supports this internal function. */
4607 internal_fn ifn = IFN_DIV_POW2;
4608 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
4610 tree shift = build_int_cst (itype, tree_log2 (oprnd1));
4612 tree var_div = vect_recog_temp_ssa_var (itype, NULL);
4613 gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
4614 gimple_call_set_lhs (div_stmt, var_div);
4616 if (rhs_code == TRUNC_MOD_EXPR)
4618 append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
4619 def_stmt
4620 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4621 LSHIFT_EXPR, var_div, shift);
4622 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4623 pattern_stmt
4624 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4625 MINUS_EXPR, oprnd0,
4626 gimple_assign_lhs (def_stmt));
4628 else
4629 pattern_stmt = div_stmt;
4630 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
4632 return pattern_stmt;
4635 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
4636 build_int_cst (itype, 0));
4637 if (rhs_code == TRUNC_DIV_EXPR
4638 || rhs_code == EXACT_DIV_EXPR)
4640 tree var = vect_recog_temp_ssa_var (itype, NULL);
4641 tree shift;
4642 def_stmt
4643 = gimple_build_assign (var, COND_EXPR, cond,
4644 fold_build2 (MINUS_EXPR, itype, oprnd1,
4645 build_int_cst (itype, 1)),
4646 build_int_cst (itype, 0));
4647 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4648 var = vect_recog_temp_ssa_var (itype, NULL);
4649 def_stmt
4650 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
4651 gimple_assign_lhs (def_stmt));
4652 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4654 shift = build_int_cst (itype, tree_log2 (oprnd1));
4655 pattern_stmt
4656 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4657 RSHIFT_EXPR, var, shift);
4659 else
4661 tree signmask;
4662 if (compare_tree_int (oprnd1, 2) == 0)
4664 signmask = vect_recog_temp_ssa_var (itype, NULL);
4665 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
4666 build_int_cst (itype, 1),
4667 build_int_cst (itype, 0));
4668 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4670 else
4672 tree utype
4673 = build_nonstandard_integer_type (prec, 1);
4674 tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
4675 tree shift
4676 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
4677 - tree_log2 (oprnd1));
4678 tree var = vect_recog_temp_ssa_var (utype, NULL);
4680 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
4681 build_int_cst (utype, -1),
4682 build_int_cst (utype, 0));
4683 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
4684 var = vect_recog_temp_ssa_var (utype, NULL);
4685 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
4686 gimple_assign_lhs (def_stmt),
4687 shift);
4688 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
4689 signmask = vect_recog_temp_ssa_var (itype, NULL);
4690 def_stmt
4691 = gimple_build_assign (signmask, NOP_EXPR, var);
4692 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4694 def_stmt
4695 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4696 PLUS_EXPR, oprnd0, signmask);
4697 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4698 def_stmt
4699 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4700 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
4701 fold_build2 (MINUS_EXPR, itype, oprnd1,
4702 build_int_cst (itype, 1)));
4703 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4705 pattern_stmt
4706 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4707 MINUS_EXPR, gimple_assign_lhs (def_stmt),
4708 signmask);
4711 return pattern_stmt;
4714 if ((cst = uniform_integer_cst_p (oprnd1))
4715 && TYPE_UNSIGNED (itype)
4716 && rhs_code == TRUNC_DIV_EXPR
4717 && vectype
4718 && targetm.vectorize.preferred_div_as_shifts_over_mult (vectype))
4720 /* We can use the relationship:
4722 x // N == ((x+N+2) // (N+1) + x) // (N+1) for 0 <= x < N(N+3)
4724 to optimize cases where N+1 is a power of 2, and where // (N+1)
4725 is therefore a shift right. When operating in modes that are
4726 multiples of a byte in size, there are two cases:
4728 (1) N(N+3) is not representable, in which case the question
4729 becomes whether the replacement expression overflows.
4730 It is enough to test that x+N+2 does not overflow,
4731 i.e. that x < MAX-(N+1).
4733 (2) N(N+3) is representable, in which case it is the (only)
4734 bound that we need to check.
4736 ??? For now we just handle the case where // (N+1) is a shift
4737 right by half the precision, since some architectures can
4738 optimize the associated addition and shift combinations
4739 into single instructions. */
4741 auto wcst = wi::to_wide (cst);
4742 int pow = wi::exact_log2 (wcst + 1);
4743 if (pow == prec / 2)
4745 gimple *stmt = SSA_NAME_DEF_STMT (oprnd0);
4747 gimple_ranger ranger;
4748 int_range_max r;
4750 /* Check that no overflow will occur. If we don't have range
4751 information we can't perform the optimization. */
4753 if (ranger.range_of_expr (r, oprnd0, stmt) && !r.undefined_p ())
4755 wide_int max = r.upper_bound ();
4756 wide_int one = wi::shwi (1, prec);
4757 wide_int adder = wi::add (one, wi::lshift (one, pow));
4758 wi::overflow_type ovf;
4759 wi::add (max, adder, UNSIGNED, &ovf);
4760 if (ovf == wi::OVF_NONE)
4762 *type_out = vectype;
4763 tree tadder = wide_int_to_tree (itype, adder);
4764 tree rshift = wide_int_to_tree (itype, pow);
4766 tree new_lhs1 = vect_recog_temp_ssa_var (itype, NULL);
4767 gassign *patt1
4768 = gimple_build_assign (new_lhs1, PLUS_EXPR, oprnd0, tadder);
4769 append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
4771 tree new_lhs2 = vect_recog_temp_ssa_var (itype, NULL);
4772 patt1 = gimple_build_assign (new_lhs2, RSHIFT_EXPR, new_lhs1,
4773 rshift);
4774 append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
4776 tree new_lhs3 = vect_recog_temp_ssa_var (itype, NULL);
4777 patt1 = gimple_build_assign (new_lhs3, PLUS_EXPR, new_lhs2,
4778 oprnd0);
4779 append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
4781 tree new_lhs4 = vect_recog_temp_ssa_var (itype, NULL);
4782 pattern_stmt = gimple_build_assign (new_lhs4, RSHIFT_EXPR,
4783 new_lhs3, rshift);
4785 return pattern_stmt;
4791 if (prec > HOST_BITS_PER_WIDE_INT
4792 || integer_zerop (oprnd1))
4793 return NULL;
4795 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
4796 return NULL;
4798 if (TYPE_UNSIGNED (itype))
4800 unsigned HOST_WIDE_INT mh, ml;
4801 int pre_shift, post_shift;
4802 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
4803 & GET_MODE_MASK (itype_mode));
4804 tree t1, t2, t3, t4;
4806 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
4807 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
4808 return NULL;
4810 /* Find a suitable multiplier and right shift count
4811 instead of multiplying with D. */
4812 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
4814 /* If the suggested multiplier is more than SIZE bits, we can do better
4815 for even divisors, using an initial right shift. */
4816 if (mh != 0 && (d & 1) == 0)
4818 pre_shift = ctz_or_zero (d);
4819 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
4820 &ml, &post_shift, &dummy_int);
4821 gcc_assert (!mh);
4823 else
4824 pre_shift = 0;
4826 if (mh != 0)
4828 if (post_shift - 1 >= prec)
4829 return NULL;
4831 /* t1 = oprnd0 h* ml;
4832 t2 = oprnd0 - t1;
4833 t3 = t2 >> 1;
4834 t4 = t1 + t3;
4835 q = t4 >> (post_shift - 1); */
4836 t1 = vect_recog_temp_ssa_var (itype, NULL);
4837 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
4838 build_int_cst (itype, ml));
4839 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4841 t2 = vect_recog_temp_ssa_var (itype, NULL);
4842 def_stmt
4843 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
4844 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4846 t3 = vect_recog_temp_ssa_var (itype, NULL);
4847 def_stmt
4848 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
4849 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4851 t4 = vect_recog_temp_ssa_var (itype, NULL);
4852 def_stmt
4853 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
4855 if (post_shift != 1)
4857 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4859 q = vect_recog_temp_ssa_var (itype, NULL);
4860 pattern_stmt
4861 = gimple_build_assign (q, RSHIFT_EXPR, t4,
4862 build_int_cst (itype, post_shift - 1));
4864 else
4866 q = t4;
4867 pattern_stmt = def_stmt;
4870 else
4872 if (pre_shift >= prec || post_shift >= prec)
4873 return NULL;
4875 /* t1 = oprnd0 >> pre_shift;
4876 t2 = t1 h* ml;
4877 q = t2 >> post_shift; */
4878 if (pre_shift)
4880 t1 = vect_recog_temp_ssa_var (itype, NULL);
4881 def_stmt
4882 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
4883 build_int_cst (NULL, pre_shift));
4884 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4886 else
4887 t1 = oprnd0;
4889 t2 = vect_recog_temp_ssa_var (itype, NULL);
4890 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
4891 build_int_cst (itype, ml));
4893 if (post_shift)
4895 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4897 q = vect_recog_temp_ssa_var (itype, NULL);
4898 def_stmt
4899 = gimple_build_assign (q, RSHIFT_EXPR, t2,
4900 build_int_cst (itype, post_shift));
4902 else
4903 q = t2;
4905 pattern_stmt = def_stmt;
4908 else
4910 unsigned HOST_WIDE_INT ml;
4911 int post_shift;
4912 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
4913 unsigned HOST_WIDE_INT abs_d;
4914 bool add = false;
4915 tree t1, t2, t3, t4;
4917 /* Give up for -1. */
4918 if (d == -1)
4919 return NULL;
4921 /* Since d might be INT_MIN, we have to cast to
4922 unsigned HOST_WIDE_INT before negating to avoid
4923 undefined signed overflow. */
4924 abs_d = (d >= 0
4925 ? (unsigned HOST_WIDE_INT) d
4926 : - (unsigned HOST_WIDE_INT) d);
4928 /* n rem d = n rem -d */
4929 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
4931 d = abs_d;
4932 oprnd1 = build_int_cst (itype, abs_d);
4934 if (HOST_BITS_PER_WIDE_INT >= prec
4935 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
4936 /* This case is not handled correctly below. */
4937 return NULL;
4939 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
4940 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
4942 add = true;
4943 ml |= HOST_WIDE_INT_M1U << (prec - 1);
4945 if (post_shift >= prec)
4946 return NULL;
4948 /* t1 = oprnd0 h* ml; */
4949 t1 = vect_recog_temp_ssa_var (itype, NULL);
4950 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
4951 build_int_cst (itype, ml));
4953 if (add)
4955 /* t2 = t1 + oprnd0; */
4956 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4957 t2 = vect_recog_temp_ssa_var (itype, NULL);
4958 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
4960 else
4961 t2 = t1;
4963 if (post_shift)
4965 /* t3 = t2 >> post_shift; */
4966 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4967 t3 = vect_recog_temp_ssa_var (itype, NULL);
4968 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
4969 build_int_cst (itype, post_shift));
4971 else
4972 t3 = t2;
4974 int msb = 1;
4975 value_range r;
4976 get_range_query (cfun)->range_of_expr (r, oprnd0);
4977 if (!r.varying_p () && !r.undefined_p ())
4979 if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
4980 msb = 0;
4981 else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
4982 msb = -1;
4985 if (msb == 0 && d >= 0)
4987 /* q = t3; */
4988 q = t3;
4989 pattern_stmt = def_stmt;
4991 else
4993 /* t4 = oprnd0 >> (prec - 1);
4994 or if we know from VRP that oprnd0 >= 0
4995 t4 = 0;
4996 or if we know from VRP that oprnd0 < 0
4997 t4 = -1; */
4998 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4999 t4 = vect_recog_temp_ssa_var (itype, NULL);
5000 if (msb != 1)
5001 def_stmt = gimple_build_assign (t4, INTEGER_CST,
5002 build_int_cst (itype, msb));
5003 else
5004 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
5005 build_int_cst (itype, prec - 1));
5006 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5008 /* q = t3 - t4; or q = t4 - t3; */
5009 q = vect_recog_temp_ssa_var (itype, NULL);
5010 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
5011 d < 0 ? t3 : t4);
5015 if (rhs_code == TRUNC_MOD_EXPR)
5017 tree r, t1;
5019 /* We divided. Now finish by:
5020 t1 = q * oprnd1;
5021 r = oprnd0 - t1; */
5022 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
5024 t1 = vect_recog_temp_ssa_var (itype, NULL);
5025 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
5026 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5028 r = vect_recog_temp_ssa_var (itype, NULL);
5029 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
5032 /* Pattern detected. */
5033 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
5035 *type_out = vectype;
5036 return pattern_stmt;
5039 /* Function vect_recog_mixed_size_cond_pattern
5041 Try to find the following pattern:
5043 type x_t, y_t;
5044 TYPE a_T, b_T, c_T;
5045 loop:
5046 S1 a_T = x_t CMP y_t ? b_T : c_T;
5048 where type 'TYPE' is an integral type which has different size
5049 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
5050 than 'type', the constants need to fit into an integer type
5051 with the same width as 'type') or results of conversion from 'type'.
5053 Input:
5055 * STMT_VINFO: The stmt from which the pattern search begins.
5057 Output:
5059 * TYPE_OUT: The type of the output of this pattern.
5061 * Return value: A new stmt that will be used to replace the pattern.
5062 Additionally a def_stmt is added.
5064 a_it = x_t CMP y_t ? b_it : c_it;
5065 a_T = (TYPE) a_it; */
5067 static gimple *
5068 vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
5069 stmt_vec_info stmt_vinfo, tree *type_out)
5071 gimple *last_stmt = stmt_vinfo->stmt;
5072 tree cond_expr, then_clause, else_clause;
5073 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
5074 gimple *pattern_stmt, *def_stmt;
5075 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
5076 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
5077 bool promotion;
5078 tree comp_scalar_type;
5080 if (!is_gimple_assign (last_stmt)
5081 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
5082 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
5083 return NULL;
5085 cond_expr = gimple_assign_rhs1 (last_stmt);
5086 then_clause = gimple_assign_rhs2 (last_stmt);
5087 else_clause = gimple_assign_rhs3 (last_stmt);
5089 if (!COMPARISON_CLASS_P (cond_expr))
5090 return NULL;
5092 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
5093 comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
5094 if (comp_vectype == NULL_TREE)
5095 return NULL;
5097 type = TREE_TYPE (gimple_assign_lhs (last_stmt));
5098 if (types_compatible_p (type, comp_scalar_type)
5099 || ((TREE_CODE (then_clause) != INTEGER_CST
5100 || TREE_CODE (else_clause) != INTEGER_CST)
5101 && !INTEGRAL_TYPE_P (comp_scalar_type))
5102 || !INTEGRAL_TYPE_P (type))
5103 return NULL;
5105 if ((TREE_CODE (then_clause) != INTEGER_CST
5106 && !type_conversion_p (vinfo, then_clause, false,
5107 &orig_type0, &def_stmt0, &promotion))
5108 || (TREE_CODE (else_clause) != INTEGER_CST
5109 && !type_conversion_p (vinfo, else_clause, false,
5110 &orig_type1, &def_stmt1, &promotion)))
5111 return NULL;
5113 if (orig_type0 && orig_type1
5114 && !types_compatible_p (orig_type0, orig_type1))
5115 return NULL;
5117 if (orig_type0)
5119 if (!types_compatible_p (orig_type0, comp_scalar_type))
5120 return NULL;
5121 then_clause = gimple_assign_rhs1 (def_stmt0);
5122 itype = orig_type0;
5125 if (orig_type1)
5127 if (!types_compatible_p (orig_type1, comp_scalar_type))
5128 return NULL;
5129 else_clause = gimple_assign_rhs1 (def_stmt1);
5130 itype = orig_type1;
5134 HOST_WIDE_INT cmp_mode_size
5135 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
5137 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
5138 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
5139 return NULL;
5141 vectype = get_vectype_for_scalar_type (vinfo, type);
5142 if (vectype == NULL_TREE)
5143 return NULL;
5145 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
5146 return NULL;
5148 if (itype == NULL_TREE)
5149 itype = build_nonstandard_integer_type (cmp_mode_size,
5150 TYPE_UNSIGNED (type));
5152 if (itype == NULL_TREE
5153 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
5154 return NULL;
5156 vecitype = get_vectype_for_scalar_type (vinfo, itype);
5157 if (vecitype == NULL_TREE)
5158 return NULL;
5160 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
5161 return NULL;
5163 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
5165 if ((TREE_CODE (then_clause) == INTEGER_CST
5166 && !int_fits_type_p (then_clause, itype))
5167 || (TREE_CODE (else_clause) == INTEGER_CST
5168 && !int_fits_type_p (else_clause, itype)))
5169 return NULL;
5172 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5173 COND_EXPR, unshare_expr (cond_expr),
5174 fold_convert (itype, then_clause),
5175 fold_convert (itype, else_clause));
5176 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
5177 NOP_EXPR, gimple_assign_lhs (def_stmt));
5179 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
5180 *type_out = vectype;
5182 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
5184 return pattern_stmt;
5188 /* Helper function of vect_recog_bool_pattern. Called recursively, return
5189 true if bool VAR can and should be optimized that way. Assume it shouldn't
5190 in case it's a result of a comparison which can be directly vectorized into
5191 a vector comparison. Fills in STMTS with all stmts visited during the
5192 walk. */
5194 static bool
5195 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
5197 tree rhs1;
5198 enum tree_code rhs_code;
5200 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
5201 if (!def_stmt_info)
5202 return false;
5204 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
5205 if (!def_stmt)
5206 return false;
5208 if (stmts.contains (def_stmt))
5209 return true;
5211 rhs1 = gimple_assign_rhs1 (def_stmt);
5212 rhs_code = gimple_assign_rhs_code (def_stmt);
5213 switch (rhs_code)
5215 case SSA_NAME:
5216 if (! check_bool_pattern (rhs1, vinfo, stmts))
5217 return false;
5218 break;
5220 CASE_CONVERT:
5221 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
5222 return false;
5223 if (! check_bool_pattern (rhs1, vinfo, stmts))
5224 return false;
5225 break;
5227 case BIT_NOT_EXPR:
5228 if (! check_bool_pattern (rhs1, vinfo, stmts))
5229 return false;
5230 break;
5232 case BIT_AND_EXPR:
5233 case BIT_IOR_EXPR:
5234 case BIT_XOR_EXPR:
5235 if (! check_bool_pattern (rhs1, vinfo, stmts)
5236 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
5237 return false;
5238 break;
5240 default:
5241 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5243 tree vecitype, comp_vectype;
5245 /* If the comparison can throw, then is_gimple_condexpr will be
5246 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
5247 if (stmt_could_throw_p (cfun, def_stmt))
5248 return false;
5250 comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
5251 if (comp_vectype == NULL_TREE)
5252 return false;
5254 tree mask_type = get_mask_type_for_scalar_type (vinfo,
5255 TREE_TYPE (rhs1));
5256 if (mask_type
5257 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
5258 return false;
5260 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
5262 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
5263 tree itype
5264 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
5265 vecitype = get_vectype_for_scalar_type (vinfo, itype);
5266 if (vecitype == NULL_TREE)
5267 return false;
5269 else
5270 vecitype = comp_vectype;
5271 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
5272 return false;
5274 else
5275 return false;
5276 break;
5279 bool res = stmts.add (def_stmt);
5280 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
5281 gcc_assert (!res);
5283 return true;
5287 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
5288 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
5289 pattern sequence. */
5291 static tree
5292 adjust_bool_pattern_cast (vec_info *vinfo,
5293 tree type, tree var, stmt_vec_info stmt_info)
5295 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
5296 NOP_EXPR, var);
5297 append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
5298 get_vectype_for_scalar_type (vinfo, type));
5299 return gimple_assign_lhs (cast_stmt);
5302 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
5303 VAR is an SSA_NAME that should be transformed from bool to a wider integer
5304 type, OUT_TYPE is the desired final integer type of the whole pattern.
5305 STMT_INFO is the info of the pattern root and is where pattern stmts should
5306 be associated with. DEFS is a map of pattern defs. */
5308 static void
5309 adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
5310 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
5312 gimple *stmt = SSA_NAME_DEF_STMT (var);
5313 enum tree_code rhs_code, def_rhs_code;
5314 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
5315 location_t loc;
5316 gimple *pattern_stmt, *def_stmt;
5317 tree trueval = NULL_TREE;
5319 rhs1 = gimple_assign_rhs1 (stmt);
5320 rhs2 = gimple_assign_rhs2 (stmt);
5321 rhs_code = gimple_assign_rhs_code (stmt);
5322 loc = gimple_location (stmt);
5323 switch (rhs_code)
5325 case SSA_NAME:
5326 CASE_CONVERT:
5327 irhs1 = *defs.get (rhs1);
5328 itype = TREE_TYPE (irhs1);
5329 pattern_stmt
5330 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5331 SSA_NAME, irhs1);
5332 break;
5334 case BIT_NOT_EXPR:
5335 irhs1 = *defs.get (rhs1);
5336 itype = TREE_TYPE (irhs1);
5337 pattern_stmt
5338 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5339 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
5340 break;
5342 case BIT_AND_EXPR:
5343 /* Try to optimize x = y & (a < b ? 1 : 0); into
5344 x = (a < b ? y : 0);
5346 E.g. for:
5347 bool a_b, b_b, c_b;
5348 TYPE d_T;
5350 S1 a_b = x1 CMP1 y1;
5351 S2 b_b = x2 CMP2 y2;
5352 S3 c_b = a_b & b_b;
5353 S4 d_T = (TYPE) c_b;
5355 we would normally emit:
5357 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5358 S2' b_T = x2 CMP2 y2 ? 1 : 0;
5359 S3' c_T = a_T & b_T;
5360 S4' d_T = c_T;
5362 but we can save one stmt by using the
5363 result of one of the COND_EXPRs in the other COND_EXPR and leave
5364 BIT_AND_EXPR stmt out:
5366 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5367 S3' c_T = x2 CMP2 y2 ? a_T : 0;
5368 S4' f_T = c_T;
5370 At least when VEC_COND_EXPR is implemented using masks
5371 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
5372 computes the comparison masks and ands it, in one case with
5373 all ones vector, in the other case with a vector register.
5374 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
5375 often more expensive. */
5376 def_stmt = SSA_NAME_DEF_STMT (rhs2);
5377 def_rhs_code = gimple_assign_rhs_code (def_stmt);
5378 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
5380 irhs1 = *defs.get (rhs1);
5381 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
5382 if (TYPE_PRECISION (TREE_TYPE (irhs1))
5383 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
5385 rhs_code = def_rhs_code;
5386 rhs1 = def_rhs1;
5387 rhs2 = gimple_assign_rhs2 (def_stmt);
5388 trueval = irhs1;
5389 goto do_compare;
5391 else
5392 irhs2 = *defs.get (rhs2);
5393 goto and_ior_xor;
5395 def_stmt = SSA_NAME_DEF_STMT (rhs1);
5396 def_rhs_code = gimple_assign_rhs_code (def_stmt);
5397 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
5399 irhs2 = *defs.get (rhs2);
5400 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
5401 if (TYPE_PRECISION (TREE_TYPE (irhs2))
5402 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
5404 rhs_code = def_rhs_code;
5405 rhs1 = def_rhs1;
5406 rhs2 = gimple_assign_rhs2 (def_stmt);
5407 trueval = irhs2;
5408 goto do_compare;
5410 else
5411 irhs1 = *defs.get (rhs1);
5412 goto and_ior_xor;
5414 /* FALLTHRU */
5415 case BIT_IOR_EXPR:
5416 case BIT_XOR_EXPR:
5417 irhs1 = *defs.get (rhs1);
5418 irhs2 = *defs.get (rhs2);
5419 and_ior_xor:
5420 if (TYPE_PRECISION (TREE_TYPE (irhs1))
5421 != TYPE_PRECISION (TREE_TYPE (irhs2)))
5423 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
5424 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
5425 int out_prec = TYPE_PRECISION (out_type);
5426 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
5427 irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
5428 stmt_info);
5429 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
5430 irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
5431 stmt_info);
5432 else
5434 irhs1 = adjust_bool_pattern_cast (vinfo,
5435 out_type, irhs1, stmt_info);
5436 irhs2 = adjust_bool_pattern_cast (vinfo,
5437 out_type, irhs2, stmt_info);
5440 itype = TREE_TYPE (irhs1);
5441 pattern_stmt
5442 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5443 rhs_code, irhs1, irhs2);
5444 break;
5446 default:
5447 do_compare:
5448 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
5449 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
5450 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
5451 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
5452 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
5454 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
5455 itype
5456 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
5458 else
5459 itype = TREE_TYPE (rhs1);
5460 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
5461 if (trueval == NULL_TREE)
5462 trueval = build_int_cst (itype, 1);
5463 else
5464 gcc_checking_assert (useless_type_conversion_p (itype,
5465 TREE_TYPE (trueval)));
5466 pattern_stmt
5467 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5468 COND_EXPR, cond_expr, trueval,
5469 build_int_cst (itype, 0));
5470 break;
5473 gimple_set_location (pattern_stmt, loc);
5474 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
5475 get_vectype_for_scalar_type (vinfo, itype));
5476 defs.put (var, gimple_assign_lhs (pattern_stmt));
5479 /* Comparison function to qsort a vector of gimple stmts after UID. */
5481 static int
5482 sort_after_uid (const void *p1, const void *p2)
5484 const gimple *stmt1 = *(const gimple * const *)p1;
5485 const gimple *stmt2 = *(const gimple * const *)p2;
5486 return gimple_uid (stmt1) - gimple_uid (stmt2);
5489 /* Create pattern stmts for all stmts participating in the bool pattern
5490 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
5491 OUT_TYPE. Return the def of the pattern root. */
5493 static tree
5494 adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
5495 tree out_type, stmt_vec_info stmt_info)
5497 /* Gather original stmts in the bool pattern in their order of appearance
5498 in the IL. */
5499 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
5500 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
5501 i != bool_stmt_set.end (); ++i)
5502 bool_stmts.quick_push (*i);
5503 bool_stmts.qsort (sort_after_uid);
5505 /* Now process them in that order, producing pattern stmts. */
5506 hash_map <tree, tree> defs;
5507 for (unsigned i = 0; i < bool_stmts.length (); ++i)
5508 adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
5509 out_type, stmt_info, defs);
5511 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
5512 gimple *pattern_stmt
5513 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5514 return gimple_assign_lhs (pattern_stmt);
5517 /* Return the proper type for converting bool VAR into
5518 an integer value or NULL_TREE if no such type exists.
5519 The type is chosen so that the converted value has the
5520 same number of elements as VAR's vector type. */
5522 static tree
5523 integer_type_for_mask (tree var, vec_info *vinfo)
5525 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
5526 return NULL_TREE;
5528 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
5529 if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
5530 return NULL_TREE;
5532 return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
5535 /* Function vect_recog_gcond_pattern
5537 Try to find pattern like following:
5539 if (a op b)
5541 where operator 'op' is not != and convert it to an adjusted boolean pattern
5543 mask = a op b
5544 if (mask != 0)
5546 and set the mask type on MASK.
5548 Input:
5550 * STMT_VINFO: The stmt at the end from which the pattern
5551 search begins, i.e. cast of a bool to
5552 an integer type.
5554 Output:
5556 * TYPE_OUT: The type of the output of this pattern.
5558 * Return value: A new stmt that will be used to replace the pattern. */
5560 static gimple *
5561 vect_recog_gcond_pattern (vec_info *vinfo,
5562 stmt_vec_info stmt_vinfo, tree *type_out)
5564 /* Currently we only support this for loop vectorization and when multiple
5565 exits. */
5566 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5567 if (!loop_vinfo || !LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
5568 return NULL;
5570 gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
5571 gcond* cond = NULL;
5572 if (!(cond = dyn_cast <gcond *> (last_stmt)))
5573 return NULL;
5575 auto lhs = gimple_cond_lhs (cond);
5576 auto rhs = gimple_cond_rhs (cond);
5577 auto code = gimple_cond_code (cond);
5579 tree scalar_type = TREE_TYPE (lhs);
5580 if (VECTOR_TYPE_P (scalar_type))
5581 return NULL;
5583 if (code == NE_EXPR
5584 && zerop (rhs)
5585 && VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
5586 return NULL;
5588 tree vecitype = get_vectype_for_scalar_type (vinfo, scalar_type);
5589 if (vecitype == NULL_TREE)
5590 return NULL;
5592 tree vectype = truth_type_for (vecitype);
5594 tree new_lhs = vect_recog_temp_ssa_var (boolean_type_node, NULL);
5595 gimple *new_stmt = gimple_build_assign (new_lhs, code, lhs, rhs);
5596 append_pattern_def_seq (vinfo, stmt_vinfo, new_stmt, vectype, scalar_type);
5598 gimple *pattern_stmt
5599 = gimple_build_cond (NE_EXPR, new_lhs,
5600 build_int_cst (TREE_TYPE (new_lhs), 0),
5601 NULL_TREE, NULL_TREE);
5602 *type_out = vectype;
5603 vect_pattern_detected ("vect_recog_gcond_pattern", last_stmt);
5604 return pattern_stmt;
5607 /* Function vect_recog_bool_pattern
5609 Try to find pattern like following:
5611 bool a_b, b_b, c_b, d_b, e_b;
5612 TYPE f_T;
5613 loop:
5614 S1 a_b = x1 CMP1 y1;
5615 S2 b_b = x2 CMP2 y2;
5616 S3 c_b = a_b & b_b;
5617 S4 d_b = x3 CMP3 y3;
5618 S5 e_b = c_b | d_b;
5619 S6 f_T = (TYPE) e_b;
5621 where type 'TYPE' is an integral type. Or a similar pattern
5622 ending in
5624 S6 f_Y = e_b ? r_Y : s_Y;
5626 as results from if-conversion of a complex condition.
5628 Input:
5630 * STMT_VINFO: The stmt at the end from which the pattern
5631 search begins, i.e. cast of a bool to
5632 an integer type.
5634 Output:
5636 * TYPE_OUT: The type of the output of this pattern.
5638 * Return value: A new stmt that will be used to replace the pattern.
5640 Assuming size of TYPE is the same as size of all comparisons
5641 (otherwise some casts would be added where needed), the above
5642 sequence we create related pattern stmts:
5643 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5644 S3' c_T = x2 CMP2 y2 ? a_T : 0;
5645 S4' d_T = x3 CMP3 y3 ? 1 : 0;
5646 S5' e_T = c_T | d_T;
5647 S6' f_T = e_T;
5649 Instead of the above S3' we could emit:
5650 S2' b_T = x2 CMP2 y2 ? 1 : 0;
5651 S3' c_T = a_T | b_T;
5652 but the above is more efficient. */
5654 static gimple *
5655 vect_recog_bool_pattern (vec_info *vinfo,
5656 stmt_vec_info stmt_vinfo, tree *type_out)
5658 gimple *last_stmt = stmt_vinfo->stmt;
5659 enum tree_code rhs_code;
5660 tree var, lhs, rhs, vectype;
5661 gimple *pattern_stmt;
5663 if (!is_gimple_assign (last_stmt))
5664 return NULL;
5666 var = gimple_assign_rhs1 (last_stmt);
5667 lhs = gimple_assign_lhs (last_stmt);
5668 rhs_code = gimple_assign_rhs_code (last_stmt);
5670 if (rhs_code == VIEW_CONVERT_EXPR)
5671 var = TREE_OPERAND (var, 0);
5673 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
5674 return NULL;
5676 hash_set<gimple *> bool_stmts;
5678 if (CONVERT_EXPR_CODE_P (rhs_code)
5679 || rhs_code == VIEW_CONVERT_EXPR)
5681 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
5682 || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5683 return NULL;
5684 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5686 if (check_bool_pattern (var, vinfo, bool_stmts))
5688 rhs = adjust_bool_stmts (vinfo, bool_stmts,
5689 TREE_TYPE (lhs), stmt_vinfo);
5690 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5691 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
5692 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
5693 else
5694 pattern_stmt
5695 = gimple_build_assign (lhs, NOP_EXPR, rhs);
5697 else
5699 tree type = integer_type_for_mask (var, vinfo);
5700 tree cst0, cst1, tmp;
5702 if (!type)
5703 return NULL;
5705 /* We may directly use cond with narrowed type to avoid
5706 multiple cond exprs with following result packing and
5707 perform single cond with packed mask instead. In case
5708 of widening we better make cond first and then extract
5709 results. */
5710 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
5711 type = TREE_TYPE (lhs);
5713 cst0 = build_int_cst (type, 0);
5714 cst1 = build_int_cst (type, 1);
5715 tmp = vect_recog_temp_ssa_var (type, NULL);
5716 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
5718 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
5720 tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
5721 append_pattern_def_seq (vinfo, stmt_vinfo,
5722 pattern_stmt, new_vectype);
5724 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5725 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
5729 *type_out = vectype;
5730 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
5732 return pattern_stmt;
5734 else if (rhs_code == COND_EXPR
5735 && TREE_CODE (var) == SSA_NAME)
5737 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5738 if (vectype == NULL_TREE)
5739 return NULL;
5741 /* Build a scalar type for the boolean result that when
5742 vectorized matches the vector type of the result in
5743 size and number of elements. */
5744 unsigned prec
5745 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
5746 TYPE_VECTOR_SUBPARTS (vectype));
5748 tree type
5749 = build_nonstandard_integer_type (prec,
5750 TYPE_UNSIGNED (TREE_TYPE (var)));
5751 if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
5752 return NULL;
5754 if (check_bool_pattern (var, vinfo, bool_stmts))
5755 var = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
5756 else if (integer_type_for_mask (var, vinfo))
5757 return NULL;
5759 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5760 pattern_stmt
5761 = gimple_build_assign (lhs, COND_EXPR,
5762 build2 (NE_EXPR, boolean_type_node,
5763 var, build_int_cst (TREE_TYPE (var), 0)),
5764 gimple_assign_rhs2 (last_stmt),
5765 gimple_assign_rhs3 (last_stmt));
5766 *type_out = vectype;
5767 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
5769 return pattern_stmt;
5771 else if (rhs_code == SSA_NAME
5772 && STMT_VINFO_DATA_REF (stmt_vinfo))
5774 stmt_vec_info pattern_stmt_info;
5775 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5776 if (!vectype || !VECTOR_MODE_P (TYPE_MODE (vectype)))
5777 return NULL;
5779 if (check_bool_pattern (var, vinfo, bool_stmts))
5780 rhs = adjust_bool_stmts (vinfo, bool_stmts,
5781 TREE_TYPE (vectype), stmt_vinfo);
5782 else
5784 tree type = integer_type_for_mask (var, vinfo);
5785 tree cst0, cst1, new_vectype;
5787 if (!type)
5788 return NULL;
5790 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
5791 type = TREE_TYPE (vectype);
5793 cst0 = build_int_cst (type, 0);
5794 cst1 = build_int_cst (type, 1);
5795 new_vectype = get_vectype_for_scalar_type (vinfo, type);
5797 rhs = vect_recog_temp_ssa_var (type, NULL);
5798 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
5799 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
5802 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
5803 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
5805 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5806 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
5807 append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
5808 rhs = rhs2;
5810 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
5811 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
5812 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
5813 *type_out = vectype;
5814 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
5816 return pattern_stmt;
5818 else
5819 return NULL;
5823 /* A helper for vect_recog_mask_conversion_pattern. Build
5824 conversion of MASK to a type suitable for masking VECTYPE.
5825 Built statement gets required vectype and is appended to
5826 a pattern sequence of STMT_VINFO.
5828 Return converted mask. */
5830 static tree
5831 build_mask_conversion (vec_info *vinfo,
5832 tree mask, tree vectype, stmt_vec_info stmt_vinfo)
5834 gimple *stmt;
5835 tree masktype, tmp;
5837 masktype = truth_type_for (vectype);
5838 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
5839 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
5840 append_pattern_def_seq (vinfo, stmt_vinfo,
5841 stmt, masktype, TREE_TYPE (vectype));
5843 return tmp;
5847 /* Function vect_recog_mask_conversion_pattern
5849 Try to find statements which require boolean type
5850 converison. Additional conversion statements are
5851 added to handle such cases. For example:
5853 bool m_1, m_2, m_3;
5854 int i_4, i_5;
5855 double d_6, d_7;
5856 char c_1, c_2, c_3;
5858 S1 m_1 = i_4 > i_5;
5859 S2 m_2 = d_6 < d_7;
5860 S3 m_3 = m_1 & m_2;
5861 S4 c_1 = m_3 ? c_2 : c_3;
5863 Will be transformed into:
5865 S1 m_1 = i_4 > i_5;
5866 S2 m_2 = d_6 < d_7;
5867 S3'' m_2' = (_Bool[bitsize=32])m_2
5868 S3' m_3' = m_1 & m_2';
5869 S4'' m_3'' = (_Bool[bitsize=8])m_3'
5870 S4' c_1' = m_3'' ? c_2 : c_3; */
5872 static gimple *
5873 vect_recog_mask_conversion_pattern (vec_info *vinfo,
5874 stmt_vec_info stmt_vinfo, tree *type_out)
5876 gimple *last_stmt = stmt_vinfo->stmt;
5877 enum tree_code rhs_code;
5878 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
5879 tree vectype1, vectype2;
5880 stmt_vec_info pattern_stmt_info;
5881 tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
5882 tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
5884 /* Check for MASK_LOAD and MASK_STORE as well as COND_OP calls requiring mask
5885 conversion. */
5886 if (is_gimple_call (last_stmt)
5887 && gimple_call_internal_p (last_stmt))
5889 gcall *pattern_stmt;
5891 internal_fn ifn = gimple_call_internal_fn (last_stmt);
5892 int mask_argno = internal_fn_mask_index (ifn);
5893 if (mask_argno < 0)
5894 return NULL;
5896 bool store_p = internal_store_fn_p (ifn);
5897 bool load_p = internal_store_fn_p (ifn);
5898 if (store_p)
5900 int rhs_index = internal_fn_stored_value_index (ifn);
5901 tree rhs = gimple_call_arg (last_stmt, rhs_index);
5902 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
5904 else
5906 lhs = gimple_call_lhs (last_stmt);
5907 if (!lhs)
5908 return NULL;
5909 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5912 if (!vectype1)
5913 return NULL;
5915 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
5916 tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
5917 if (mask_arg_type)
5919 vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
5921 if (!vectype2
5922 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
5923 TYPE_VECTOR_SUBPARTS (vectype2)))
5924 return NULL;
5926 else if (store_p || load_p)
5927 return NULL;
5929 tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
5931 auto_vec<tree, 8> args;
5932 unsigned int nargs = gimple_call_num_args (last_stmt);
5933 args.safe_grow (nargs, true);
5934 for (unsigned int i = 0; i < nargs; ++i)
5935 args[i] = ((int) i == mask_argno
5936 ? tmp
5937 : gimple_call_arg (last_stmt, i));
5938 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
5940 if (!store_p)
5942 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5943 gimple_call_set_lhs (pattern_stmt, lhs);
5946 if (load_p || store_p)
5947 gimple_call_set_nothrow (pattern_stmt, true);
5949 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
5950 if (STMT_VINFO_DATA_REF (stmt_vinfo))
5951 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
5953 *type_out = vectype1;
5954 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
5956 return pattern_stmt;
5959 if (!is_gimple_assign (last_stmt))
5960 return NULL;
5962 gimple *pattern_stmt;
5963 lhs = gimple_assign_lhs (last_stmt);
5964 rhs1 = gimple_assign_rhs1 (last_stmt);
5965 rhs_code = gimple_assign_rhs_code (last_stmt);
5967 /* Check for cond expression requiring mask conversion. */
5968 if (rhs_code == COND_EXPR)
5970 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5972 if (TREE_CODE (rhs1) == SSA_NAME)
5974 rhs1_type = integer_type_for_mask (rhs1, vinfo);
5975 if (!rhs1_type)
5976 return NULL;
5978 else if (COMPARISON_CLASS_P (rhs1))
5980 /* Check whether we're comparing scalar booleans and (if so)
5981 whether a better mask type exists than the mask associated
5982 with boolean-sized elements. This avoids unnecessary packs
5983 and unpacks if the booleans are set from comparisons of
5984 wider types. E.g. in:
5986 int x1, x2, x3, x4, y1, y1;
5988 bool b1 = (x1 == x2);
5989 bool b2 = (x3 == x4);
5990 ... = b1 == b2 ? y1 : y2;
5992 it is better for b1 and b2 to use the mask type associated
5993 with int elements rather bool (byte) elements. */
5994 rhs1_op0 = TREE_OPERAND (rhs1, 0);
5995 rhs1_op1 = TREE_OPERAND (rhs1, 1);
5996 if (!rhs1_op0 || !rhs1_op1)
5997 return NULL;
5998 rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
5999 rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
6001 if (!rhs1_op0_type)
6002 rhs1_type = TREE_TYPE (rhs1_op0);
6003 else if (!rhs1_op1_type)
6004 rhs1_type = TREE_TYPE (rhs1_op1);
6005 else if (TYPE_PRECISION (rhs1_op0_type)
6006 != TYPE_PRECISION (rhs1_op1_type))
6008 int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
6009 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
6010 int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
6011 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
6012 if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
6014 if (abs (tmp0) > abs (tmp1))
6015 rhs1_type = rhs1_op1_type;
6016 else
6017 rhs1_type = rhs1_op0_type;
6019 else
6020 rhs1_type = build_nonstandard_integer_type
6021 (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
6023 else
6024 rhs1_type = rhs1_op0_type;
6026 else
6027 return NULL;
6029 vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
6031 if (!vectype1 || !vectype2)
6032 return NULL;
6034 /* Continue if a conversion is needed. Also continue if we have
6035 a comparison whose vector type would normally be different from
6036 VECTYPE2 when considered in isolation. In that case we'll
6037 replace the comparison with an SSA name (so that we can record
6038 its vector type) and behave as though the comparison was an SSA
6039 name from the outset. */
6040 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
6041 TYPE_VECTOR_SUBPARTS (vectype2))
6042 && !rhs1_op0_type
6043 && !rhs1_op1_type)
6044 return NULL;
6046 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
6047 in place, we can handle it in vectorizable_condition. This avoids
6048 unnecessary promotion stmts and increased vectorization factor. */
6049 if (COMPARISON_CLASS_P (rhs1)
6050 && INTEGRAL_TYPE_P (rhs1_type)
6051 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
6052 TYPE_VECTOR_SUBPARTS (vectype2)))
6054 enum vect_def_type dt;
6055 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
6056 && dt == vect_external_def
6057 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
6058 && (dt == vect_external_def
6059 || dt == vect_constant_def))
6061 tree wide_scalar_type = build_nonstandard_integer_type
6062 (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
6063 tree vectype3 = get_vectype_for_scalar_type (vinfo,
6064 wide_scalar_type);
6065 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
6066 return NULL;
6070 /* If rhs1 is a comparison we need to move it into a
6071 separate statement. */
6072 if (TREE_CODE (rhs1) != SSA_NAME)
6074 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
6075 if (rhs1_op0_type
6076 && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
6077 rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
6078 vectype2, stmt_vinfo);
6079 if (rhs1_op1_type
6080 && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
6081 rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
6082 vectype2, stmt_vinfo);
6083 pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
6084 rhs1_op0, rhs1_op1);
6085 rhs1 = tmp;
6086 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
6087 rhs1_type);
6090 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
6091 TYPE_VECTOR_SUBPARTS (vectype2)))
6092 tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
6093 else
6094 tmp = rhs1;
6096 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6097 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
6098 gimple_assign_rhs2 (last_stmt),
6099 gimple_assign_rhs3 (last_stmt));
6101 *type_out = vectype1;
6102 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
6104 return pattern_stmt;
6107 /* Now check for binary boolean operations requiring conversion for
6108 one of operands. */
6109 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
6110 return NULL;
6112 if (rhs_code != BIT_IOR_EXPR
6113 && rhs_code != BIT_XOR_EXPR
6114 && rhs_code != BIT_AND_EXPR
6115 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
6116 return NULL;
6118 rhs2 = gimple_assign_rhs2 (last_stmt);
6120 rhs1_type = integer_type_for_mask (rhs1, vinfo);
6121 rhs2_type = integer_type_for_mask (rhs2, vinfo);
6123 if (!rhs1_type || !rhs2_type
6124 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
6125 return NULL;
6127 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
6129 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
6130 if (!vectype1)
6131 return NULL;
6132 rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
6134 else
6136 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
6137 if (!vectype1)
6138 return NULL;
6139 rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
6142 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6143 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
6145 *type_out = vectype1;
6146 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
6148 return pattern_stmt;
6151 /* STMT_INFO is a load or store. If the load or store is conditional, return
6152 the boolean condition under which it occurs, otherwise return null. */
6154 static tree
6155 vect_get_load_store_mask (stmt_vec_info stmt_info)
6157 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
6159 gcc_assert (gimple_assign_single_p (def_assign));
6160 return NULL_TREE;
6163 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
6165 internal_fn ifn = gimple_call_internal_fn (def_call);
6166 int mask_index = internal_fn_mask_index (ifn);
6167 return gimple_call_arg (def_call, mask_index);
6170 gcc_unreachable ();
6173 /* Return MASK if MASK is suitable for masking an operation on vectors
6174 of type VECTYPE, otherwise convert it into such a form and return
6175 the result. Associate any conversion statements with STMT_INFO's
6176 pattern. */
6178 static tree
6179 vect_convert_mask_for_vectype (tree mask, tree vectype,
6180 stmt_vec_info stmt_info, vec_info *vinfo)
6182 tree mask_type = integer_type_for_mask (mask, vinfo);
6183 if (mask_type)
6185 tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
6186 if (mask_vectype
6187 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
6188 TYPE_VECTOR_SUBPARTS (mask_vectype)))
6189 mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
6191 return mask;
6194 /* Return the equivalent of:
6196 fold_convert (TYPE, VALUE)
6198 with the expectation that the operation will be vectorized.
6199 If new statements are needed, add them as pattern statements
6200 to STMT_INFO. */
6202 static tree
6203 vect_add_conversion_to_pattern (vec_info *vinfo,
6204 tree type, tree value, stmt_vec_info stmt_info)
6206 if (useless_type_conversion_p (type, TREE_TYPE (value)))
6207 return value;
6209 tree new_value = vect_recog_temp_ssa_var (type, NULL);
6210 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
6211 append_pattern_def_seq (vinfo, stmt_info, conversion,
6212 get_vectype_for_scalar_type (vinfo, type));
6213 return new_value;
6216 /* Try to convert STMT_INFO into a call to a gather load or scatter store
6217 internal function. Return the final statement on success and set
6218 *TYPE_OUT to the vector type being loaded or stored.
6220 This function only handles gathers and scatters that were recognized
6221 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
6223 static gimple *
6224 vect_recog_gather_scatter_pattern (vec_info *vinfo,
6225 stmt_vec_info stmt_info, tree *type_out)
6227 /* Currently we only support this for loop vectorization. */
6228 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6229 if (!loop_vinfo)
6230 return NULL;
6232 /* Make sure that we're looking at a gather load or scatter store. */
6233 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
6234 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6235 return NULL;
6237 /* Get the boolean that controls whether the load or store happens.
6238 This is null if the operation is unconditional. */
6239 tree mask = vect_get_load_store_mask (stmt_info);
6241 /* Make sure that the target supports an appropriate internal
6242 function for the gather/scatter operation. */
6243 gather_scatter_info gs_info;
6244 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
6245 || gs_info.ifn == IFN_LAST)
6246 return NULL;
6248 /* Convert the mask to the right form. */
6249 tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
6250 gs_info.element_type);
6251 if (mask)
6252 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
6253 loop_vinfo);
6254 else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
6255 || gs_info.ifn == IFN_MASK_GATHER_LOAD
6256 || gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE
6257 || gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
6258 mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
6260 /* Get the invariant base and non-invariant offset, converting the
6261 latter to the same width as the vector elements. */
6262 tree base = gs_info.base;
6263 tree offset_type = TREE_TYPE (gs_info.offset_vectype);
6264 tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
6265 gs_info.offset, stmt_info);
6267 /* Build the new pattern statement. */
6268 tree scale = size_int (gs_info.scale);
6269 gcall *pattern_stmt;
6270 if (DR_IS_READ (dr))
6272 tree zero = build_zero_cst (gs_info.element_type);
6273 if (mask != NULL)
6274 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
6275 offset, scale, zero, mask);
6276 else
6277 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
6278 offset, scale, zero);
6279 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
6280 gimple_call_set_lhs (pattern_stmt, load_lhs);
6282 else
6284 tree rhs = vect_get_store_rhs (stmt_info);
6285 if (mask != NULL)
6286 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
6287 base, offset, scale, rhs,
6288 mask);
6289 else
6290 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
6291 base, offset, scale, rhs);
6293 gimple_call_set_nothrow (pattern_stmt, true);
6295 /* Copy across relevant vectorization info and associate DR with the
6296 new pattern statement instead of the original statement. */
6297 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
6298 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
6300 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6301 *type_out = vectype;
6302 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
6304 return pattern_stmt;
6307 /* Return true if TYPE is a non-boolean integer type. These are the types
6308 that we want to consider for narrowing. */
6310 static bool
6311 vect_narrowable_type_p (tree type)
6313 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
6316 /* Return true if the operation given by CODE can be truncated to N bits
6317 when only N bits of the output are needed. This is only true if bit N+1
6318 of the inputs has no effect on the low N bits of the result. */
6320 static bool
6321 vect_truncatable_operation_p (tree_code code)
6323 switch (code)
6325 case NEGATE_EXPR:
6326 case PLUS_EXPR:
6327 case MINUS_EXPR:
6328 case MULT_EXPR:
6329 case BIT_NOT_EXPR:
6330 case BIT_AND_EXPR:
6331 case BIT_IOR_EXPR:
6332 case BIT_XOR_EXPR:
6333 case COND_EXPR:
6334 return true;
6336 default:
6337 return false;
6341 /* Record that STMT_INFO could be changed from operating on TYPE to
6342 operating on a type with the precision and sign given by PRECISION
6343 and SIGN respectively. PRECISION is an arbitrary bit precision;
6344 it might not be a whole number of bytes. */
6346 static void
6347 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
6348 unsigned int precision, signop sign)
6350 /* Round the precision up to a whole number of bytes. */
6351 precision = vect_element_precision (precision);
6352 if (precision < TYPE_PRECISION (type)
6353 && (!stmt_info->operation_precision
6354 || stmt_info->operation_precision > precision))
6356 stmt_info->operation_precision = precision;
6357 stmt_info->operation_sign = sign;
6361 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
6362 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
6363 is an arbitrary bit precision; it might not be a whole number of bytes. */
6365 static void
6366 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
6367 unsigned int min_input_precision)
6369 /* This operation in isolation only requires the inputs to have
6370 MIN_INPUT_PRECISION of precision, However, that doesn't mean
6371 that MIN_INPUT_PRECISION is a natural precision for the chain
6372 as a whole. E.g. consider something like:
6374 unsigned short *x, *y;
6375 *y = ((*x & 0xf0) >> 4) | (*y << 4);
6377 The right shift can be done on unsigned chars, and only requires the
6378 result of "*x & 0xf0" to be done on unsigned chars. But taking that
6379 approach would mean turning a natural chain of single-vector unsigned
6380 short operations into one that truncates "*x" and then extends
6381 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
6382 operation and one vector for each unsigned char operation.
6383 This would be a significant pessimization.
6385 Instead only propagate the maximum of this precision and the precision
6386 required by the users of the result. This means that we don't pessimize
6387 the case above but continue to optimize things like:
6389 unsigned char *y;
6390 unsigned short *x;
6391 *y = ((*x & 0xf0) >> 4) | (*y << 4);
6393 Here we would truncate two vectors of *x to a single vector of
6394 unsigned chars and use single-vector unsigned char operations for
6395 everything else, rather than doing two unsigned short copies of
6396 "(*x & 0xf0) >> 4" and then truncating the result. */
6397 min_input_precision = MAX (min_input_precision,
6398 stmt_info->min_output_precision);
6400 if (min_input_precision < TYPE_PRECISION (type)
6401 && (!stmt_info->min_input_precision
6402 || stmt_info->min_input_precision > min_input_precision))
6403 stmt_info->min_input_precision = min_input_precision;
6406 /* Subroutine of vect_determine_min_output_precision. Return true if
6407 we can calculate a reduced number of output bits for STMT_INFO,
6408 whose result is LHS. */
6410 static bool
6411 vect_determine_min_output_precision_1 (vec_info *vinfo,
6412 stmt_vec_info stmt_info, tree lhs)
6414 /* Take the maximum precision required by users of the result. */
6415 unsigned int precision = 0;
6416 imm_use_iterator iter;
6417 use_operand_p use;
6418 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
6420 gimple *use_stmt = USE_STMT (use);
6421 if (is_gimple_debug (use_stmt))
6422 continue;
6423 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
6424 if (!use_stmt_info || !use_stmt_info->min_input_precision)
6425 return false;
6426 /* The input precision recorded for COND_EXPRs applies only to the
6427 "then" and "else" values. */
6428 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
6429 if (assign
6430 && gimple_assign_rhs_code (assign) == COND_EXPR
6431 && use->use != gimple_assign_rhs2_ptr (assign)
6432 && use->use != gimple_assign_rhs3_ptr (assign))
6433 return false;
6434 precision = MAX (precision, use_stmt_info->min_input_precision);
6437 if (dump_enabled_p ())
6438 dump_printf_loc (MSG_NOTE, vect_location,
6439 "only the low %d bits of %T are significant\n",
6440 precision, lhs);
6441 stmt_info->min_output_precision = precision;
6442 return true;
6445 /* Calculate min_output_precision for STMT_INFO. */
6447 static void
6448 vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
6450 /* We're only interested in statements with a narrowable result. */
6451 tree lhs = gimple_get_lhs (stmt_info->stmt);
6452 if (!lhs
6453 || TREE_CODE (lhs) != SSA_NAME
6454 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
6455 return;
6457 if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
6458 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
6461 /* Use range information to decide whether STMT (described by STMT_INFO)
6462 could be done in a narrower type. This is effectively a forward
6463 propagation, since it uses context-independent information that applies
6464 to all users of an SSA name. */
6466 static void
6467 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
6469 tree lhs = gimple_assign_lhs (stmt);
6470 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
6471 return;
6473 tree type = TREE_TYPE (lhs);
6474 if (!vect_narrowable_type_p (type))
6475 return;
6477 /* First see whether we have any useful range information for the result. */
6478 unsigned int precision = TYPE_PRECISION (type);
6479 signop sign = TYPE_SIGN (type);
6480 wide_int min_value, max_value;
6481 if (!vect_get_range_info (lhs, &min_value, &max_value))
6482 return;
6484 tree_code code = gimple_assign_rhs_code (stmt);
6485 unsigned int nops = gimple_num_ops (stmt);
6487 if (!vect_truncatable_operation_p (code))
6489 /* Handle operations that can be computed in type T if all inputs
6490 and outputs can be represented in type T. Also handle left and
6491 right shifts, where (in addition) the maximum shift amount must
6492 be less than the number of bits in T. */
6493 bool is_shift;
6494 switch (code)
6496 case LSHIFT_EXPR:
6497 case RSHIFT_EXPR:
6498 is_shift = true;
6499 break;
6501 case ABS_EXPR:
6502 case MIN_EXPR:
6503 case MAX_EXPR:
6504 case TRUNC_DIV_EXPR:
6505 case CEIL_DIV_EXPR:
6506 case FLOOR_DIV_EXPR:
6507 case ROUND_DIV_EXPR:
6508 case EXACT_DIV_EXPR:
6509 /* Modulus is excluded because it is typically calculated by doing
6510 a division, for which minimum signed / -1 isn't representable in
6511 the original signed type. We could take the division range into
6512 account instead, if handling modulus ever becomes important. */
6513 is_shift = false;
6514 break;
6516 default:
6517 return;
6519 for (unsigned int i = 1; i < nops; ++i)
6521 tree op = gimple_op (stmt, i);
6522 wide_int op_min_value, op_max_value;
6523 if (TREE_CODE (op) == INTEGER_CST)
6525 unsigned int op_precision = TYPE_PRECISION (TREE_TYPE (op));
6526 op_min_value = op_max_value = wi::to_wide (op, op_precision);
6528 else if (TREE_CODE (op) == SSA_NAME)
6530 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
6531 return;
6533 else
6534 return;
6536 if (is_shift && i == 2)
6538 /* There needs to be one more bit than the maximum shift amount.
6540 If the maximum shift amount is already 1 less than PRECISION
6541 then we can't narrow the shift further. Dealing with that
6542 case first ensures that we can safely use an unsigned range
6543 below.
6545 op_min_value isn't relevant, since shifts by negative amounts
6546 are UB. */
6547 if (wi::geu_p (op_max_value, precision - 1))
6548 return;
6549 unsigned int min_bits = op_max_value.to_uhwi () + 1;
6551 /* As explained below, we can convert a signed shift into an
6552 unsigned shift if the sign bit is always clear. At this
6553 point we've already processed the ranges of the output and
6554 the first input. */
6555 auto op_sign = sign;
6556 if (sign == SIGNED && !wi::neg_p (min_value))
6557 op_sign = UNSIGNED;
6558 op_min_value = wide_int::from (wi::min_value (min_bits, op_sign),
6559 precision, op_sign);
6560 op_max_value = wide_int::from (wi::max_value (min_bits, op_sign),
6561 precision, op_sign);
6563 min_value = wi::min (min_value, op_min_value, sign);
6564 max_value = wi::max (max_value, op_max_value, sign);
6568 /* Try to switch signed types for unsigned types if we can.
6569 This is better for two reasons. First, unsigned ops tend
6570 to be cheaper than signed ops. Second, it means that we can
6571 handle things like:
6573 signed char c;
6574 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
6578 signed char c;
6579 unsigned short res_1 = (unsigned short) c & 0xff00;
6580 int res = (int) res_1;
6582 where the intermediate result res_1 has unsigned rather than
6583 signed type. */
6584 if (sign == SIGNED && !wi::neg_p (min_value))
6585 sign = UNSIGNED;
6587 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
6588 unsigned int precision1 = wi::min_precision (min_value, sign);
6589 unsigned int precision2 = wi::min_precision (max_value, sign);
6590 unsigned int value_precision = MAX (precision1, precision2);
6591 if (value_precision >= precision)
6592 return;
6594 if (dump_enabled_p ())
6595 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
6596 " without loss of precision: %G",
6597 sign == SIGNED ? "signed" : "unsigned",
6598 value_precision, (gimple *) stmt);
6600 vect_set_operation_type (stmt_info, type, value_precision, sign);
6601 vect_set_min_input_precision (stmt_info, type, value_precision);
6604 /* Use information about the users of STMT's result to decide whether
6605 STMT (described by STMT_INFO) could be done in a narrower type.
6606 This is effectively a backward propagation. */
6608 static void
6609 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
6611 tree_code code = gimple_assign_rhs_code (stmt);
6612 unsigned int opno = (code == COND_EXPR ? 2 : 1);
6613 tree type = TREE_TYPE (gimple_op (stmt, opno));
6614 if (!vect_narrowable_type_p (type))
6615 return;
6617 unsigned int precision = TYPE_PRECISION (type);
6618 unsigned int operation_precision, min_input_precision;
6619 switch (code)
6621 CASE_CONVERT:
6622 /* Only the bits that contribute to the output matter. Don't change
6623 the precision of the operation itself. */
6624 operation_precision = precision;
6625 min_input_precision = stmt_info->min_output_precision;
6626 break;
6628 case LSHIFT_EXPR:
6629 case RSHIFT_EXPR:
6631 tree shift = gimple_assign_rhs2 (stmt);
6632 if (TREE_CODE (shift) != INTEGER_CST
6633 || !wi::ltu_p (wi::to_widest (shift), precision))
6634 return;
6635 unsigned int const_shift = TREE_INT_CST_LOW (shift);
6636 if (code == LSHIFT_EXPR)
6638 /* Avoid creating an undefined shift.
6640 ??? We could instead use min_output_precision as-is and
6641 optimize out-of-range shifts to zero. However, only
6642 degenerate testcases shift away all their useful input data,
6643 and it isn't natural to drop input operations in the middle
6644 of vectorization. This sort of thing should really be
6645 handled before vectorization. */
6646 operation_precision = MAX (stmt_info->min_output_precision,
6647 const_shift + 1);
6648 /* We need CONST_SHIFT fewer bits of the input. */
6649 min_input_precision = (MAX (operation_precision, const_shift)
6650 - const_shift);
6652 else
6654 /* We need CONST_SHIFT extra bits to do the operation. */
6655 operation_precision = (stmt_info->min_output_precision
6656 + const_shift);
6657 min_input_precision = operation_precision;
6659 break;
6662 default:
6663 if (vect_truncatable_operation_p (code))
6665 /* Input bit N has no effect on output bits N-1 and lower. */
6666 operation_precision = stmt_info->min_output_precision;
6667 min_input_precision = operation_precision;
6668 break;
6670 return;
6673 if (operation_precision < precision)
6675 if (dump_enabled_p ())
6676 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
6677 " without affecting users: %G",
6678 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
6679 operation_precision, (gimple *) stmt);
6680 vect_set_operation_type (stmt_info, type, operation_precision,
6681 TYPE_SIGN (type));
6683 vect_set_min_input_precision (stmt_info, type, min_input_precision);
6686 /* Return true if the statement described by STMT_INFO sets a boolean
6687 SSA_NAME and if we know how to vectorize this kind of statement using
6688 vector mask types. */
6690 static bool
6691 possible_vector_mask_operation_p (stmt_vec_info stmt_info)
6693 tree lhs = gimple_get_lhs (stmt_info->stmt);
6694 tree_code code = ERROR_MARK;
6695 gassign *assign = NULL;
6696 gcond *cond = NULL;
6698 if ((assign = dyn_cast <gassign *> (stmt_info->stmt)))
6699 code = gimple_assign_rhs_code (assign);
6700 else if ((cond = dyn_cast <gcond *> (stmt_info->stmt)))
6702 lhs = gimple_cond_lhs (cond);
6703 code = gimple_cond_code (cond);
6706 if (!lhs
6707 || TREE_CODE (lhs) != SSA_NAME
6708 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
6709 return false;
6711 if (code != ERROR_MARK)
6713 switch (code)
6715 CASE_CONVERT:
6716 case SSA_NAME:
6717 case BIT_NOT_EXPR:
6718 case BIT_IOR_EXPR:
6719 case BIT_XOR_EXPR:
6720 case BIT_AND_EXPR:
6721 return true;
6723 default:
6724 return TREE_CODE_CLASS (code) == tcc_comparison;
6727 else if (is_a <gphi *> (stmt_info->stmt))
6728 return true;
6729 return false;
6732 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
6733 a vector mask type instead of a normal vector type. Record the
6734 result in STMT_INFO->mask_precision. */
6736 static void
6737 vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
6739 if (!possible_vector_mask_operation_p (stmt_info))
6740 return;
6742 /* If at least one boolean input uses a vector mask type,
6743 pick the mask type with the narrowest elements.
6745 ??? This is the traditional behavior. It should always produce
6746 the smallest number of operations, but isn't necessarily the
6747 optimal choice. For example, if we have:
6749 a = b & c
6751 where:
6753 - the user of a wants it to have a mask type for 16-bit elements (M16)
6754 - b also uses M16
6755 - c uses a mask type for 8-bit elements (M8)
6757 then picking M8 gives:
6759 - 1 M16->M8 pack for b
6760 - 1 M8 AND for a
6761 - 2 M8->M16 unpacks for the user of a
6763 whereas picking M16 would have given:
6765 - 2 M8->M16 unpacks for c
6766 - 2 M16 ANDs for a
6768 The number of operations are equal, but M16 would have given
6769 a shorter dependency chain and allowed more ILP. */
6770 unsigned int precision = ~0U;
6771 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6773 /* If the statement compares two values that shouldn't use vector masks,
6774 try comparing the values as normal scalars instead. */
6775 tree_code code = ERROR_MARK;
6776 tree op0_type;
6777 unsigned int nops = -1;
6778 unsigned int ops_start = 0;
6780 if (gassign *assign = dyn_cast <gassign *> (stmt))
6782 code = gimple_assign_rhs_code (assign);
6783 op0_type = TREE_TYPE (gimple_assign_rhs1 (assign));
6784 nops = gimple_num_ops (assign);
6785 ops_start = 1;
6787 else if (gcond *cond = dyn_cast <gcond *> (stmt))
6789 code = gimple_cond_code (cond);
6790 op0_type = TREE_TYPE (gimple_cond_lhs (cond));
6791 nops = 2;
6792 ops_start = 0;
6795 if (code != ERROR_MARK)
6797 for (unsigned int i = ops_start; i < nops; ++i)
6799 tree rhs = gimple_op (stmt, i);
6800 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
6801 continue;
6803 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
6804 if (!def_stmt_info)
6805 /* Don't let external or constant operands influence the choice.
6806 We can convert them to whichever vector type we pick. */
6807 continue;
6809 if (def_stmt_info->mask_precision)
6811 if (precision > def_stmt_info->mask_precision)
6812 precision = def_stmt_info->mask_precision;
6816 if (precision == ~0U
6817 && TREE_CODE_CLASS (code) == tcc_comparison)
6819 scalar_mode mode;
6820 tree vectype, mask_type;
6821 if (is_a <scalar_mode> (TYPE_MODE (op0_type), &mode)
6822 && (vectype = get_vectype_for_scalar_type (vinfo, op0_type))
6823 && (mask_type = get_mask_type_for_scalar_type (vinfo, op0_type))
6824 && expand_vec_cmp_expr_p (vectype, mask_type, code))
6825 precision = GET_MODE_BITSIZE (mode);
6828 else
6830 gphi *phi = as_a <gphi *> (stmt_info->stmt);
6831 for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
6833 tree rhs = gimple_phi_arg_def (phi, i);
6835 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
6836 if (!def_stmt_info)
6837 /* Don't let external or constant operands influence the choice.
6838 We can convert them to whichever vector type we pick. */
6839 continue;
6841 if (def_stmt_info->mask_precision)
6843 if (precision > def_stmt_info->mask_precision)
6844 precision = def_stmt_info->mask_precision;
6849 if (dump_enabled_p ())
6851 if (precision == ~0U)
6852 dump_printf_loc (MSG_NOTE, vect_location,
6853 "using normal nonmask vectors for %G",
6854 stmt_info->stmt);
6855 else
6856 dump_printf_loc (MSG_NOTE, vect_location,
6857 "using boolean precision %d for %G",
6858 precision, stmt_info->stmt);
6861 stmt_info->mask_precision = precision;
6864 /* Handle vect_determine_precisions for STMT_INFO, given that we
6865 have already done so for the users of its result. */
6867 void
6868 vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
6870 vect_determine_min_output_precision (vinfo, stmt_info);
6871 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
6873 vect_determine_precisions_from_range (stmt_info, stmt);
6874 vect_determine_precisions_from_users (stmt_info, stmt);
6878 /* Walk backwards through the vectorizable region to determine the
6879 values of these fields:
6881 - min_output_precision
6882 - min_input_precision
6883 - operation_precision
6884 - operation_sign. */
6886 void
6887 vect_determine_precisions (vec_info *vinfo)
6889 DUMP_VECT_SCOPE ("vect_determine_precisions");
6891 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
6893 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6894 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
6895 unsigned int nbbs = loop->num_nodes;
6897 for (unsigned int i = 0; i < nbbs; i++)
6899 basic_block bb = bbs[i];
6900 for (auto gsi = gsi_start_phis (bb);
6901 !gsi_end_p (gsi); gsi_next (&gsi))
6903 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6904 if (stmt_info)
6905 vect_determine_mask_precision (vinfo, stmt_info);
6907 for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
6908 if (!is_gimple_debug (gsi_stmt (si)))
6909 vect_determine_mask_precision
6910 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
6912 for (unsigned int i = 0; i < nbbs; i++)
6914 basic_block bb = bbs[nbbs - i - 1];
6915 for (gimple_stmt_iterator si = gsi_last_bb (bb);
6916 !gsi_end_p (si); gsi_prev (&si))
6917 if (!is_gimple_debug (gsi_stmt (si)))
6918 vect_determine_stmt_precisions
6919 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
6920 for (auto gsi = gsi_start_phis (bb);
6921 !gsi_end_p (gsi); gsi_next (&gsi))
6923 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6924 if (stmt_info)
6925 vect_determine_stmt_precisions (vinfo, stmt_info);
6929 else
6931 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
6932 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
6934 basic_block bb = bb_vinfo->bbs[i];
6935 for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6937 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6938 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6939 vect_determine_mask_precision (vinfo, stmt_info);
6941 for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6943 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
6944 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6945 vect_determine_mask_precision (vinfo, stmt_info);
6948 for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
6950 for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]);
6951 !gsi_end_p (gsi); gsi_prev (&gsi))
6953 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
6954 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6955 vect_determine_stmt_precisions (vinfo, stmt_info);
6957 for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]);
6958 !gsi_end_p (gsi); gsi_next (&gsi))
6960 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6961 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6962 vect_determine_stmt_precisions (vinfo, stmt_info);
6968 typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
6970 struct vect_recog_func
6972 vect_recog_func_ptr fn;
6973 const char *name;
6976 /* Note that ordering matters - the first pattern matching on a stmt is
6977 taken which means usually the more complex one needs to preceed the
6978 less comples onex (widen_sum only after dot_prod or sad for example). */
6979 static vect_recog_func vect_vect_recog_func_ptrs[] = {
6980 { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
6981 { vect_recog_bit_insert_pattern, "bit_insert" },
6982 { vect_recog_abd_pattern, "abd" },
6983 { vect_recog_over_widening_pattern, "over_widening" },
6984 /* Must come after over_widening, which narrows the shift as much as
6985 possible beforehand. */
6986 { vect_recog_average_pattern, "average" },
6987 { vect_recog_cond_expr_convert_pattern, "cond_expr_convert" },
6988 { vect_recog_mulhs_pattern, "mult_high" },
6989 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
6990 { vect_recog_widen_mult_pattern, "widen_mult" },
6991 { vect_recog_dot_prod_pattern, "dot_prod" },
6992 { vect_recog_sad_pattern, "sad" },
6993 { vect_recog_widen_sum_pattern, "widen_sum" },
6994 { vect_recog_pow_pattern, "pow" },
6995 { vect_recog_popcount_clz_ctz_ffs_pattern, "popcount_clz_ctz_ffs" },
6996 { vect_recog_ctz_ffs_pattern, "ctz_ffs" },
6997 { vect_recog_widen_shift_pattern, "widen_shift" },
6998 { vect_recog_rotate_pattern, "rotate" },
6999 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
7000 { vect_recog_divmod_pattern, "divmod" },
7001 { vect_recog_mult_pattern, "mult" },
7002 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
7003 { vect_recog_gcond_pattern, "gcond" },
7004 { vect_recog_bool_pattern, "bool" },
7005 /* This must come before mask conversion, and includes the parts
7006 of mask conversion that are needed for gather and scatter
7007 internal functions. */
7008 { vect_recog_gather_scatter_pattern, "gather_scatter" },
7009 { vect_recog_mask_conversion_pattern, "mask_conversion" },
7010 { vect_recog_widen_plus_pattern, "widen_plus" },
7011 { vect_recog_widen_minus_pattern, "widen_minus" },
7012 { vect_recog_widen_abd_pattern, "widen_abd" },
7013 /* These must come after the double widening ones. */
7016 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
7018 /* Mark statements that are involved in a pattern. */
7020 void
7021 vect_mark_pattern_stmts (vec_info *vinfo,
7022 stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
7023 tree pattern_vectype)
7025 stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
7026 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
7028 gimple *orig_pattern_stmt = NULL;
7029 if (is_pattern_stmt_p (orig_stmt_info))
7031 /* We're replacing a statement in an existing pattern definition
7032 sequence. */
7033 orig_pattern_stmt = orig_stmt_info->stmt;
7034 if (dump_enabled_p ())
7035 dump_printf_loc (MSG_NOTE, vect_location,
7036 "replacing earlier pattern %G", orig_pattern_stmt);
7038 /* To keep the book-keeping simple, just swap the lhs of the
7039 old and new statements, so that the old one has a valid but
7040 unused lhs. */
7041 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
7042 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
7043 gimple_set_lhs (pattern_stmt, old_lhs);
7045 if (dump_enabled_p ())
7046 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
7048 /* Switch to the statement that ORIG replaces. */
7049 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
7051 /* We shouldn't be replacing the main pattern statement. */
7052 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
7053 != orig_pattern_stmt);
7056 if (def_seq)
7057 for (gimple_stmt_iterator si = gsi_start (def_seq);
7058 !gsi_end_p (si); gsi_next (&si))
7060 if (dump_enabled_p ())
7061 dump_printf_loc (MSG_NOTE, vect_location,
7062 "extra pattern stmt: %G", gsi_stmt (si));
7063 stmt_vec_info pattern_stmt_info
7064 = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
7065 orig_stmt_info, pattern_vectype);
7066 /* Stmts in the def sequence are not vectorizable cycle or
7067 induction defs, instead they should all be vect_internal_def
7068 feeding the main pattern stmt which retains this def type. */
7069 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
7072 if (orig_pattern_stmt)
7074 vect_init_pattern_stmt (vinfo, pattern_stmt,
7075 orig_stmt_info, pattern_vectype);
7077 /* Insert all the new pattern statements before the original one. */
7078 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
7079 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
7080 orig_def_seq);
7081 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
7082 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
7084 /* Remove the pattern statement that this new pattern replaces. */
7085 gsi_remove (&gsi, false);
7087 else
7088 vect_set_pattern_stmt (vinfo,
7089 pattern_stmt, orig_stmt_info, pattern_vectype);
7091 /* For any conditionals mark them as vect_condition_def. */
7092 if (is_a <gcond *> (pattern_stmt))
7093 STMT_VINFO_DEF_TYPE (STMT_VINFO_RELATED_STMT (orig_stmt_info)) = vect_condition_def;
7095 /* Transfer reduction path info to the pattern. */
7096 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
7098 gimple_match_op op;
7099 if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op))
7100 gcc_unreachable ();
7101 tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)];
7102 /* Search the pattern def sequence and the main pattern stmt. Note
7103 we may have inserted all into a containing pattern def sequence
7104 so the following is a bit awkward. */
7105 gimple_stmt_iterator si;
7106 gimple *s;
7107 if (def_seq)
7109 si = gsi_start (def_seq);
7110 s = gsi_stmt (si);
7111 gsi_next (&si);
7113 else
7115 si = gsi_none ();
7116 s = pattern_stmt;
7120 bool found = false;
7121 if (gimple_extract_op (s, &op))
7122 for (unsigned i = 0; i < op.num_ops; ++i)
7123 if (op.ops[i] == lookfor)
7125 STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
7126 lookfor = gimple_get_lhs (s);
7127 found = true;
7128 break;
7130 if (s == pattern_stmt)
7132 if (!found && dump_enabled_p ())
7133 dump_printf_loc (MSG_NOTE, vect_location,
7134 "failed to update reduction index.\n");
7135 break;
7137 if (gsi_end_p (si))
7138 s = pattern_stmt;
7139 else
7141 s = gsi_stmt (si);
7142 if (s == pattern_stmt)
7143 /* Found the end inside a bigger pattern def seq. */
7144 si = gsi_none ();
7145 else
7146 gsi_next (&si);
7148 } while (1);
7152 /* Function vect_pattern_recog_1
7154 Input:
7155 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
7156 computation pattern.
7157 STMT_INFO: A stmt from which the pattern search should start.
7159 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
7160 a sequence of statements that has the same functionality and can be
7161 used to replace STMT_INFO. It returns the last statement in the sequence
7162 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
7163 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
7164 statement, having first checked that the target supports the new operation
7165 in that type.
7167 This function also does some bookkeeping, as explained in the documentation
7168 for vect_recog_pattern. */
7170 static void
7171 vect_pattern_recog_1 (vec_info *vinfo,
7172 vect_recog_func *recog_func, stmt_vec_info stmt_info)
7174 gimple *pattern_stmt;
7175 loop_vec_info loop_vinfo;
7176 tree pattern_vectype;
7178 /* If this statement has already been replaced with pattern statements,
7179 leave the original statement alone, since the first match wins.
7180 Instead try to match against the definition statements that feed
7181 the main pattern statement. */
7182 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7184 gimple_stmt_iterator gsi;
7185 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
7186 !gsi_end_p (gsi); gsi_next (&gsi))
7187 vect_pattern_recog_1 (vinfo, recog_func,
7188 vinfo->lookup_stmt (gsi_stmt (gsi)));
7189 return;
7192 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
7193 pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
7194 if (!pattern_stmt)
7196 /* Clear any half-formed pattern definition sequence. */
7197 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
7198 return;
7201 loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7203 /* Found a vectorizable pattern. */
7204 if (dump_enabled_p ())
7205 dump_printf_loc (MSG_NOTE, vect_location,
7206 "%s pattern recognized: %G",
7207 recog_func->name, pattern_stmt);
7209 /* Mark the stmts that are involved in the pattern. */
7210 vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
7212 /* Patterns cannot be vectorized using SLP, because they change the order of
7213 computation. */
7214 if (loop_vinfo)
7216 unsigned ix, ix2;
7217 stmt_vec_info *elem_ptr;
7218 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
7219 elem_ptr, *elem_ptr == stmt_info);
7224 /* Function vect_pattern_recog
7226 Input:
7227 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
7228 computation idioms.
7230 Output - for each computation idiom that is detected we create a new stmt
7231 that provides the same functionality and that can be vectorized. We
7232 also record some information in the struct_stmt_info of the relevant
7233 stmts, as explained below:
7235 At the entry to this function we have the following stmts, with the
7236 following initial value in the STMT_VINFO fields:
7238 stmt in_pattern_p related_stmt vec_stmt
7239 S1: a_i = .... - - -
7240 S2: a_2 = ..use(a_i).. - - -
7241 S3: a_1 = ..use(a_2).. - - -
7242 S4: a_0 = ..use(a_1).. - - -
7243 S5: ... = ..use(a_0).. - - -
7245 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
7246 represented by a single stmt. We then:
7247 - create a new stmt S6 equivalent to the pattern (the stmt is not
7248 inserted into the code)
7249 - fill in the STMT_VINFO fields as follows:
7251 in_pattern_p related_stmt vec_stmt
7252 S1: a_i = .... - - -
7253 S2: a_2 = ..use(a_i).. - - -
7254 S3: a_1 = ..use(a_2).. - - -
7255 S4: a_0 = ..use(a_1).. true S6 -
7256 '---> S6: a_new = .... - S4 -
7257 S5: ... = ..use(a_0).. - - -
7259 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
7260 to each other through the RELATED_STMT field).
7262 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
7263 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
7264 remain irrelevant unless used by stmts other than S4.
7266 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
7267 (because they are marked as irrelevant). It will vectorize S6, and record
7268 a pointer to the new vector stmt VS6 from S6 (as usual).
7269 S4 will be skipped, and S5 will be vectorized as usual:
7271 in_pattern_p related_stmt vec_stmt
7272 S1: a_i = .... - - -
7273 S2: a_2 = ..use(a_i).. - - -
7274 S3: a_1 = ..use(a_2).. - - -
7275 > VS6: va_new = .... - - -
7276 S4: a_0 = ..use(a_1).. true S6 VS6
7277 '---> S6: a_new = .... - S4 VS6
7278 > VS5: ... = ..vuse(va_new).. - - -
7279 S5: ... = ..use(a_0).. - - -
7281 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
7282 elsewhere), and we'll end up with:
7284 VS6: va_new = ....
7285 VS5: ... = ..vuse(va_new)..
7287 In case of more than one pattern statements, e.g., widen-mult with
7288 intermediate type:
7290 S1 a_t = ;
7291 S2 a_T = (TYPE) a_t;
7292 '--> S3: a_it = (interm_type) a_t;
7293 S4 prod_T = a_T * CONST;
7294 '--> S5: prod_T' = a_it w* CONST;
7296 there may be other users of a_T outside the pattern. In that case S2 will
7297 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
7298 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
7299 be recorded in S3. */
7301 void
7302 vect_pattern_recog (vec_info *vinfo)
7304 class loop *loop;
7305 basic_block *bbs;
7306 unsigned int nbbs;
7307 gimple_stmt_iterator si;
7308 unsigned int i, j;
7310 vect_determine_precisions (vinfo);
7312 DUMP_VECT_SCOPE ("vect_pattern_recog");
7314 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
7316 loop = LOOP_VINFO_LOOP (loop_vinfo);
7317 bbs = LOOP_VINFO_BBS (loop_vinfo);
7318 nbbs = loop->num_nodes;
7320 /* Scan through the loop stmts, applying the pattern recognition
7321 functions starting at each stmt visited: */
7322 for (i = 0; i < nbbs; i++)
7324 basic_block bb = bbs[i];
7325 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
7327 if (is_gimple_debug (gsi_stmt (si)))
7328 continue;
7329 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
7330 /* Scan over all generic vect_recog_xxx_pattern functions. */
7331 for (j = 0; j < NUM_PATTERNS; j++)
7332 vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j],
7333 stmt_info);
7337 else
7339 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
7340 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
7341 for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]);
7342 !gsi_end_p (gsi); gsi_next (&gsi))
7344 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (gsi));
7345 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
7346 continue;
7348 /* Scan over all generic vect_recog_xxx_pattern functions. */
7349 for (j = 0; j < NUM_PATTERNS; j++)
7350 vect_pattern_recog_1 (vinfo,
7351 &vect_vect_recog_func_ptrs[j], stmt_info);
7355 /* After this no more add_stmt calls are allowed. */
7356 vinfo->stmt_vec_info_ro = true;
7359 /* Build a GIMPLE_ASSIGN or GIMPLE_CALL with the tree_code,
7360 or internal_fn contained in ch, respectively. */
7361 gimple *
7362 vect_gimple_build (tree lhs, code_helper ch, tree op0, tree op1)
7364 gcc_assert (op0 != NULL_TREE);
7365 if (ch.is_tree_code ())
7366 return gimple_build_assign (lhs, (tree_code) ch, op0, op1);
7368 gcc_assert (ch.is_internal_fn ());
7369 gimple* stmt = gimple_build_call_internal (as_internal_fn ((combined_fn) ch),
7370 op1 == NULL_TREE ? 1 : 2,
7371 op0, op1);
7372 gimple_call_set_lhs (stmt, lhs);
7373 return stmt;