tree-optimization/114485 - neg induction with partial vectors
[official-gcc.git] / gcc / tree-vect-patterns.cc
blob4f491c6b8336f8710c3519dec1fa7e0f49387d2b
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2024 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "gimple-iterator.h"
29 #include "gimple-fold.h"
30 #include "ssa.h"
31 #include "expmed.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "tree-eh.h"
38 #include "gimplify.h"
39 #include "gimple-iterator.h"
40 #include "gimple-fold.h"
41 #include "gimplify-me.h"
42 #include "cfgloop.h"
43 #include "tree-vectorizer.h"
44 #include "dumpfile.h"
45 #include "builtins.h"
46 #include "internal-fn.h"
47 #include "case-cfn-macros.h"
48 #include "fold-const-call.h"
49 #include "attribs.h"
50 #include "cgraph.h"
51 #include "omp-simd-clone.h"
52 #include "predict.h"
53 #include "tree-vector-builder.h"
54 #include "vec-perm-indices.h"
55 #include "gimple-range.h"
58 /* TODO: Note the vectorizer still builds COND_EXPRs with GENERIC compares
59 in the first operand. Disentangling this is future work, the
60 IL is properly transfered to VEC_COND_EXPRs with separate compares. */
63 /* Return true if we have a useful VR_RANGE range for VAR, storing it
64 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
66 bool
67 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
69 value_range vr;
70 tree vr_min, vr_max;
71 get_range_query (cfun)->range_of_expr (vr, var);
72 if (vr.undefined_p ())
73 vr.set_varying (TREE_TYPE (var));
74 value_range_kind vr_type = get_legacy_range (vr, vr_min, vr_max);
75 *min_value = wi::to_wide (vr_min);
76 *max_value = wi::to_wide (vr_max);
77 wide_int nonzero = get_nonzero_bits (var);
78 signop sgn = TYPE_SIGN (TREE_TYPE (var));
79 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
80 nonzero, sgn) == VR_RANGE)
82 if (dump_enabled_p ())
84 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
85 dump_printf (MSG_NOTE, " has range [");
86 dump_hex (MSG_NOTE, *min_value);
87 dump_printf (MSG_NOTE, ", ");
88 dump_hex (MSG_NOTE, *max_value);
89 dump_printf (MSG_NOTE, "]\n");
91 return true;
93 else
95 if (dump_enabled_p ())
97 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
98 dump_printf (MSG_NOTE, " has no range info\n");
100 return false;
104 /* Report that we've found an instance of pattern PATTERN in
105 statement STMT. */
107 static void
108 vect_pattern_detected (const char *name, gimple *stmt)
110 if (dump_enabled_p ())
111 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
114 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
115 return the pattern statement's stmt_vec_info. Set its vector type to
116 VECTYPE if it doesn't have one already. */
118 static stmt_vec_info
119 vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
120 stmt_vec_info orig_stmt_info, tree vectype)
122 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
123 if (pattern_stmt_info == NULL)
124 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
125 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
127 pattern_stmt_info->pattern_stmt_p = true;
128 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
129 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
130 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
131 STMT_VINFO_TYPE (pattern_stmt_info) = STMT_VINFO_TYPE (orig_stmt_info);
132 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
134 gcc_assert (!vectype
135 || is_a <gcond *> (pattern_stmt)
136 || (VECTOR_BOOLEAN_TYPE_P (vectype)
137 == vect_use_mask_type_p (orig_stmt_info)));
138 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
139 pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
141 return pattern_stmt_info;
144 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
145 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
146 have one already. */
148 static void
149 vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
150 stmt_vec_info orig_stmt_info, tree vectype)
152 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
153 STMT_VINFO_RELATED_STMT (orig_stmt_info)
154 = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
157 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
158 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
159 be different from the vector type of the final pattern statement.
160 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
161 from which it was derived. */
163 static inline void
164 append_pattern_def_seq (vec_info *vinfo,
165 stmt_vec_info stmt_info, gimple *new_stmt,
166 tree vectype = NULL_TREE,
167 tree scalar_type_for_mask = NULL_TREE)
169 gcc_assert (!scalar_type_for_mask
170 == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
171 if (vectype)
173 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
174 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
175 if (scalar_type_for_mask)
176 new_stmt_info->mask_precision
177 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
179 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
180 new_stmt);
183 /* The caller wants to perform new operations on vect_external variable
184 VAR, so that the result of the operations would also be vect_external.
185 Return the edge on which the operations can be performed, if one exists.
186 Return null if the operations should instead be treated as part of
187 the pattern that needs them. */
189 static edge
190 vect_get_external_def_edge (vec_info *vinfo, tree var)
192 edge e = NULL;
193 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
195 e = loop_preheader_edge (loop_vinfo->loop);
196 if (!SSA_NAME_IS_DEFAULT_DEF (var))
198 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
199 if (bb == NULL
200 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
201 e = NULL;
204 return e;
207 /* Return true if the target supports a vector version of CODE,
208 where CODE is known to map to a direct optab with the given SUBTYPE.
209 ITYPE specifies the type of (some of) the scalar inputs and OTYPE
210 specifies the type of the scalar result.
212 If CODE allows the inputs and outputs to have different type
213 (such as for WIDEN_SUM_EXPR), it is the input mode rather
214 than the output mode that determines the appropriate target pattern.
215 Operand 0 of the target pattern then specifies the mode that the output
216 must have.
218 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
219 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
220 is nonnull. */
222 static bool
223 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
224 tree itype, tree *vecotype_out,
225 tree *vecitype_out = NULL,
226 enum optab_subtype subtype = optab_default)
228 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
229 if (!vecitype)
230 return false;
232 tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
233 if (!vecotype)
234 return false;
236 optab optab = optab_for_tree_code (code, vecitype, subtype);
237 if (!optab)
238 return false;
240 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
241 if (icode == CODE_FOR_nothing
242 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
243 return false;
245 *vecotype_out = vecotype;
246 if (vecitype_out)
247 *vecitype_out = vecitype;
248 return true;
251 /* Round bit precision PRECISION up to a full element. */
253 static unsigned int
254 vect_element_precision (unsigned int precision)
256 precision = 1 << ceil_log2 (precision);
257 return MAX (precision, BITS_PER_UNIT);
260 /* If OP is defined by a statement that's being considered for vectorization,
261 return information about that statement, otherwise return NULL. */
263 static stmt_vec_info
264 vect_get_internal_def (vec_info *vinfo, tree op)
266 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
267 if (def_stmt_info
268 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
269 return def_stmt_info;
270 return NULL;
273 /* Check whether NAME, an ssa-name used in STMT_VINFO,
274 is a result of a type promotion, such that:
275 DEF_STMT: NAME = NOP (name0)
276 If CHECK_SIGN is TRUE, check that either both types are signed or both are
277 unsigned. */
279 static bool
280 type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
281 tree *orig_type, gimple **def_stmt, bool *promotion)
283 tree type = TREE_TYPE (name);
284 tree oprnd0;
285 enum vect_def_type dt;
287 stmt_vec_info def_stmt_info;
288 if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
289 return false;
291 if (dt != vect_internal_def
292 && dt != vect_external_def && dt != vect_constant_def)
293 return false;
295 if (!*def_stmt)
296 return false;
298 if (!is_gimple_assign (*def_stmt))
299 return false;
301 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
302 return false;
304 oprnd0 = gimple_assign_rhs1 (*def_stmt);
306 *orig_type = TREE_TYPE (oprnd0);
307 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
308 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
309 return false;
311 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
312 *promotion = true;
313 else
314 *promotion = false;
316 if (!vect_is_simple_use (oprnd0, vinfo, &dt))
317 return false;
319 return true;
322 /* Holds information about an input operand after some sign changes
323 and type promotions have been peeled away. */
324 class vect_unpromoted_value {
325 public:
326 vect_unpromoted_value ();
328 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
330 /* The value obtained after peeling away zero or more casts. */
331 tree op;
333 /* The type of OP. */
334 tree type;
336 /* The definition type of OP. */
337 vect_def_type dt;
339 /* If OP is the result of peeling at least one cast, and if the cast
340 of OP itself is a vectorizable statement, CASTER identifies that
341 statement, otherwise it is null. */
342 stmt_vec_info caster;
345 inline vect_unpromoted_value::vect_unpromoted_value ()
346 : op (NULL_TREE),
347 type (NULL_TREE),
348 dt (vect_uninitialized_def),
349 caster (NULL)
353 /* Set the operand to OP_IN, its definition type to DT_IN, and the
354 statement that casts it to CASTER_IN. */
356 inline void
357 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
358 stmt_vec_info caster_in)
360 op = op_in;
361 type = TREE_TYPE (op);
362 dt = dt_in;
363 caster = caster_in;
366 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
367 to reach some vectorizable inner operand OP', continuing as long as it
368 is possible to convert OP' back to OP using a possible sign change
369 followed by a possible promotion P. Return this OP', or null if OP is
370 not a vectorizable SSA name. If there is a promotion P, describe its
371 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
372 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
373 have more than one user.
375 A successful return means that it is possible to go from OP' to OP
376 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
377 whereas the cast from UNPROM to OP might be a promotion, a sign
378 change, or a nop.
380 E.g. say we have:
382 signed short *ptr = ...;
383 signed short C = *ptr;
384 unsigned short B = (unsigned short) C; // sign change
385 signed int A = (signed int) B; // unsigned promotion
386 ...possible other uses of A...
387 unsigned int OP = (unsigned int) A; // sign change
389 In this case it's possible to go directly from C to OP using:
391 OP = (unsigned int) (unsigned short) C;
392 +------------+ +--------------+
393 promotion sign change
395 so OP' would be C. The input to the promotion is B, so UNPROM
396 would describe B. */
398 static tree
399 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
400 vect_unpromoted_value *unprom,
401 bool *single_use_p = NULL)
403 tree op_type = TREE_TYPE (op);
404 if (!INTEGRAL_TYPE_P (op_type))
405 return NULL_TREE;
407 tree res = NULL_TREE;
408 unsigned int orig_precision = TYPE_PRECISION (op_type);
409 unsigned int min_precision = orig_precision;
410 stmt_vec_info caster = NULL;
411 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
413 /* See whether OP is simple enough to vectorize. */
414 stmt_vec_info def_stmt_info;
415 gimple *def_stmt;
416 vect_def_type dt;
417 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
418 break;
420 /* If OP is the input of a demotion, skip over it to see whether
421 OP is itself the result of a promotion. If so, the combined
422 effect of the promotion and the demotion might fit the required
423 pattern, otherwise neither operation fits.
425 This copes with cases such as the result of an arithmetic
426 operation being truncated before being stored, and where that
427 arithmetic operation has been recognized as an over-widened one. */
428 if (TYPE_PRECISION (op_type) <= min_precision)
430 /* Use OP as the UNPROM described above if we haven't yet
431 found a promotion, or if using the new input preserves the
432 sign of the previous promotion. */
433 if (!res
434 || TYPE_PRECISION (unprom->type) == orig_precision
435 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
437 unprom->set_op (op, dt, caster);
438 min_precision = TYPE_PRECISION (op_type);
440 /* Stop if we've already seen a promotion and if this
441 conversion does more than change the sign. */
442 else if (TYPE_PRECISION (op_type)
443 != TYPE_PRECISION (unprom->type))
444 break;
446 /* The sequence now extends to OP. */
447 res = op;
450 /* See whether OP is defined by a cast. Record it as CASTER if
451 the cast is potentially vectorizable. */
452 if (!def_stmt)
453 break;
454 caster = def_stmt_info;
456 /* Ignore pattern statements, since we don't link uses for them. */
457 if (caster
458 && single_use_p
459 && !STMT_VINFO_RELATED_STMT (caster)
460 && !has_single_use (res))
461 *single_use_p = false;
463 gassign *assign = dyn_cast <gassign *> (def_stmt);
464 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
465 break;
467 /* Continue with the input to the cast. */
468 op = gimple_assign_rhs1 (def_stmt);
469 op_type = TREE_TYPE (op);
471 return res;
474 /* OP is an integer operand to an operation that returns TYPE, and we
475 want to treat the operation as a widening one. So far we can treat
476 it as widening from *COMMON_TYPE.
478 Return true if OP is suitable for such a widening operation,
479 either widening from *COMMON_TYPE or from some supertype of it.
480 Update *COMMON_TYPE to the supertype in the latter case.
482 SHIFT_P is true if OP is a shift amount. */
484 static bool
485 vect_joust_widened_integer (tree type, bool shift_p, tree op,
486 tree *common_type)
488 /* Calculate the minimum precision required by OP, without changing
489 the sign of either operand. */
490 unsigned int precision;
491 if (shift_p)
493 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
494 return false;
495 precision = TREE_INT_CST_LOW (op);
497 else
499 precision = wi::min_precision (wi::to_widest (op),
500 TYPE_SIGN (*common_type));
501 if (precision * 2 > TYPE_PRECISION (type))
502 return false;
505 /* If OP requires a wider type, switch to that type. The checks
506 above ensure that this is still narrower than the result. */
507 precision = vect_element_precision (precision);
508 if (TYPE_PRECISION (*common_type) < precision)
509 *common_type = build_nonstandard_integer_type
510 (precision, TYPE_UNSIGNED (*common_type));
511 return true;
514 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
515 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
517 static bool
518 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
520 if (types_compatible_p (*common_type, new_type))
521 return true;
523 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
524 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
525 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
526 return true;
528 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
529 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
530 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
532 *common_type = new_type;
533 return true;
536 /* We have mismatched signs, with the signed type being
537 no wider than the unsigned type. In this case we need
538 a wider signed type. */
539 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
540 TYPE_PRECISION (new_type));
541 precision *= 2;
543 if (precision * 2 > TYPE_PRECISION (type))
544 return false;
546 *common_type = build_nonstandard_integer_type (precision, false);
547 return true;
550 /* Check whether STMT_INFO can be viewed as a tree of integer operations
551 in which each node either performs CODE or WIDENED_CODE, and where
552 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
553 specifies the maximum number of leaf operands. SHIFT_P says whether
554 CODE and WIDENED_CODE are some sort of shift.
556 If STMT_INFO is such a tree, return the number of leaf operands
557 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
558 to a type that (a) is narrower than the result of STMT_INFO and
559 (b) can hold all leaf operand values.
561 If SUBTYPE then allow that the signs of the operands
562 may differ in signs but not in precision. SUBTYPE is updated to reflect
563 this.
565 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
566 exists. */
568 static unsigned int
569 vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
570 code_helper widened_code, bool shift_p,
571 unsigned int max_nops,
572 vect_unpromoted_value *unprom, tree *common_type,
573 enum optab_subtype *subtype = NULL)
575 /* Check for an integer operation with the right code. */
576 gimple* stmt = stmt_info->stmt;
577 if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
578 return 0;
580 code_helper rhs_code;
581 if (is_gimple_assign (stmt))
582 rhs_code = gimple_assign_rhs_code (stmt);
583 else if (is_gimple_call (stmt))
584 rhs_code = gimple_call_combined_fn (stmt);
585 else
586 return 0;
588 if (rhs_code != code
589 && rhs_code != widened_code)
590 return 0;
592 tree lhs = gimple_get_lhs (stmt);
593 tree type = TREE_TYPE (lhs);
594 if (!INTEGRAL_TYPE_P (type))
595 return 0;
597 /* Assume that both operands will be leaf operands. */
598 max_nops -= 2;
600 /* Check the operands. */
601 unsigned int next_op = 0;
602 for (unsigned int i = 0; i < 2; ++i)
604 vect_unpromoted_value *this_unprom = &unprom[next_op];
605 unsigned int nops = 1;
606 tree op = gimple_arg (stmt, i);
607 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
609 /* We already have a common type from earlier operands.
610 Update it to account for OP. */
611 this_unprom->set_op (op, vect_constant_def);
612 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
613 return 0;
615 else
617 /* Only allow shifts by constants. */
618 if (shift_p && i == 1)
619 return 0;
621 if (rhs_code != code)
623 /* If rhs_code is widened_code, don't look through further
624 possible promotions, there is a promotion already embedded
625 in the WIDEN_*_EXPR. */
626 if (TREE_CODE (op) != SSA_NAME
627 || !INTEGRAL_TYPE_P (TREE_TYPE (op)))
628 return 0;
630 stmt_vec_info def_stmt_info;
631 gimple *def_stmt;
632 vect_def_type dt;
633 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info,
634 &def_stmt))
635 return 0;
636 this_unprom->set_op (op, dt, NULL);
638 else if (!vect_look_through_possible_promotion (vinfo, op,
639 this_unprom))
640 return 0;
642 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
644 /* The operand isn't widened. If STMT_INFO has the code
645 for an unwidened operation, recursively check whether
646 this operand is a node of the tree. */
647 if (rhs_code != code
648 || max_nops == 0
649 || this_unprom->dt != vect_internal_def)
650 return 0;
652 /* Give back the leaf slot allocated above now that we're
653 not treating this as a leaf operand. */
654 max_nops += 1;
656 /* Recursively process the definition of the operand. */
657 stmt_vec_info def_stmt_info
658 = vinfo->lookup_def (this_unprom->op);
659 nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
660 widened_code, shift_p, max_nops,
661 this_unprom, common_type,
662 subtype);
663 if (nops == 0)
664 return 0;
666 max_nops -= nops;
668 else
670 /* Make sure that the operand is narrower than the result. */
671 if (TYPE_PRECISION (this_unprom->type) * 2
672 > TYPE_PRECISION (type))
673 return 0;
675 /* Update COMMON_TYPE for the new operand. */
676 if (i == 0)
677 *common_type = this_unprom->type;
678 else if (!vect_joust_widened_type (type, this_unprom->type,
679 common_type))
681 if (subtype)
683 /* See if we can sign extend the smaller type. */
684 if (TYPE_PRECISION (this_unprom->type)
685 > TYPE_PRECISION (*common_type))
686 *common_type = this_unprom->type;
687 *subtype = optab_vector_mixed_sign;
689 else
690 return 0;
694 next_op += nops;
696 return next_op;
699 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
700 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
702 static tree
703 vect_recog_temp_ssa_var (tree type, gimple *stmt = NULL)
705 return make_temp_ssa_name (type, stmt, "patt");
708 /* STMT2_INFO describes a type conversion that could be split into STMT1
709 followed by a version of STMT2_INFO that takes NEW_RHS as its first
710 input. Try to do this using pattern statements, returning true on
711 success. */
713 static bool
714 vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
715 gimple *stmt1, tree vectype)
717 if (is_pattern_stmt_p (stmt2_info))
719 /* STMT2_INFO is part of a pattern. Get the statement to which
720 the pattern is attached. */
721 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
722 vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
724 if (dump_enabled_p ())
725 dump_printf_loc (MSG_NOTE, vect_location,
726 "Splitting pattern statement: %G", stmt2_info->stmt);
728 /* Since STMT2_INFO is a pattern statement, we can change it
729 in-situ without worrying about changing the code for the
730 containing block. */
731 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
733 if (dump_enabled_p ())
735 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
736 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
737 stmt2_info->stmt);
740 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
741 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
742 /* STMT2_INFO is the actual pattern statement. Add STMT1
743 to the end of the definition sequence. */
744 gimple_seq_add_stmt_without_update (def_seq, stmt1);
745 else
747 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
748 before it. */
749 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
750 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
752 return true;
754 else
756 /* STMT2_INFO doesn't yet have a pattern. Try to create a
757 two-statement pattern now. */
758 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
759 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
760 tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
761 if (!lhs_vectype)
762 return false;
764 if (dump_enabled_p ())
765 dump_printf_loc (MSG_NOTE, vect_location,
766 "Splitting statement: %G", stmt2_info->stmt);
768 /* Add STMT1 as a singleton pattern definition sequence. */
769 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
770 vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
771 gimple_seq_add_stmt_without_update (def_seq, stmt1);
773 /* Build the second of the two pattern statements. */
774 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
775 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
776 vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
778 if (dump_enabled_p ())
780 dump_printf_loc (MSG_NOTE, vect_location,
781 "into pattern statements: %G", stmt1);
782 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
783 (gimple *) new_stmt2);
786 return true;
790 /* Look for the following pattern
791 X = x[i]
792 Y = y[i]
793 DIFF = X - Y
794 DAD = ABS_EXPR<DIFF>
796 ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
797 HALF_TYPE and UNPROM will be set should the statement be found to
798 be a widened operation.
799 DIFF_STMT will be set to the MINUS_EXPR
800 statement that precedes the ABS_STMT unless vect_widened_op_tree
801 succeeds.
803 static bool
804 vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
805 tree *half_type,
806 vect_unpromoted_value unprom[2],
807 gassign **diff_stmt)
809 if (!abs_stmt)
810 return false;
812 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
813 inside the loop (in case we are analyzing an outer-loop). */
814 enum tree_code code = gimple_assign_rhs_code (abs_stmt);
815 if (code != ABS_EXPR && code != ABSU_EXPR)
816 return false;
818 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
819 tree abs_type = TREE_TYPE (abs_oprnd);
820 if (!abs_oprnd)
821 return false;
822 if (!ANY_INTEGRAL_TYPE_P (abs_type)
823 || TYPE_OVERFLOW_WRAPS (abs_type)
824 || TYPE_UNSIGNED (abs_type))
825 return false;
827 /* Peel off conversions from the ABS input. This can involve sign
828 changes (e.g. from an unsigned subtraction to a signed ABS input)
829 or signed promotion, but it can't include unsigned promotion.
830 (Note that ABS of an unsigned promotion should have been folded
831 away before now anyway.) */
832 vect_unpromoted_value unprom_diff;
833 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
834 &unprom_diff);
835 if (!abs_oprnd)
836 return false;
837 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
838 && TYPE_UNSIGNED (unprom_diff.type))
839 return false;
841 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
842 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
843 if (!diff_stmt_vinfo)
844 return false;
846 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
847 inside the loop (in case we are analyzing an outer-loop). */
848 if (vect_widened_op_tree (vinfo, diff_stmt_vinfo,
849 MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
850 false, 2, unprom, half_type))
851 return true;
853 /* Failed to find a widen operation so we check for a regular MINUS_EXPR. */
854 gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
855 if (diff_stmt && diff
856 && gimple_assign_rhs_code (diff) == MINUS_EXPR
857 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd)))
859 *diff_stmt = diff;
860 *half_type = NULL_TREE;
861 return true;
864 return false;
867 /* Convert UNPROM to TYPE and return the result, adding new statements
868 to STMT_INFO's pattern definition statements if no better way is
869 available. VECTYPE is the vector form of TYPE.
871 If SUBTYPE then convert the type based on the subtype. */
873 static tree
874 vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
875 vect_unpromoted_value *unprom, tree vectype,
876 enum optab_subtype subtype = optab_default)
878 /* Update the type if the signs differ. */
879 if (subtype == optab_vector_mixed_sign)
881 gcc_assert (!TYPE_UNSIGNED (type));
882 if (TYPE_UNSIGNED (TREE_TYPE (unprom->op)))
884 type = unsigned_type_for (type);
885 vectype = unsigned_type_for (vectype);
889 /* Check for a no-op conversion. */
890 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
891 return unprom->op;
893 /* Allow the caller to create constant vect_unpromoted_values. */
894 if (TREE_CODE (unprom->op) == INTEGER_CST)
895 return wide_int_to_tree (type, wi::to_widest (unprom->op));
897 tree input = unprom->op;
898 if (unprom->caster)
900 tree lhs = gimple_get_lhs (unprom->caster->stmt);
901 tree lhs_type = TREE_TYPE (lhs);
903 /* If the result of the existing cast is the right width, use it
904 instead of the source of the cast. */
905 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
906 input = lhs;
907 /* If the precision we want is between the source and result
908 precisions of the existing cast, try splitting the cast into
909 two and tapping into a mid-way point. */
910 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
911 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
913 /* In order to preserve the semantics of the original cast,
914 give the mid-way point the same signedness as the input value.
916 It would be possible to use a signed type here instead if
917 TYPE is signed and UNPROM->TYPE is unsigned, but that would
918 make the sign of the midtype sensitive to the order in
919 which we process the statements, since the signedness of
920 TYPE is the signedness required by just one of possibly
921 many users. Also, unsigned promotions are usually as cheap
922 as or cheaper than signed ones, so it's better to keep an
923 unsigned promotion. */
924 tree midtype = build_nonstandard_integer_type
925 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
926 tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
927 if (vec_midtype)
929 input = vect_recog_temp_ssa_var (midtype, NULL);
930 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
931 unprom->op);
932 if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
933 vec_midtype))
934 append_pattern_def_seq (vinfo, stmt_info,
935 new_stmt, vec_midtype);
939 /* See if we can reuse an existing result. */
940 if (types_compatible_p (type, TREE_TYPE (input)))
941 return input;
944 /* We need a new conversion statement. */
945 tree new_op = vect_recog_temp_ssa_var (type, NULL);
946 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
948 /* If OP is an external value, see if we can insert the new statement
949 on an incoming edge. */
950 if (input == unprom->op && unprom->dt == vect_external_def)
951 if (edge e = vect_get_external_def_edge (vinfo, input))
953 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
954 gcc_assert (!new_bb);
955 return new_op;
958 /* As a (common) last resort, add the statement to the pattern itself. */
959 append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
960 return new_op;
963 /* Invoke vect_convert_input for N elements of UNPROM and store the
964 result in the corresponding elements of RESULT.
966 If SUBTYPE then convert the type based on the subtype. */
968 static void
969 vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
970 tree *result, tree type, vect_unpromoted_value *unprom,
971 tree vectype, enum optab_subtype subtype = optab_default)
973 for (unsigned int i = 0; i < n; ++i)
975 unsigned int j;
976 for (j = 0; j < i; ++j)
977 if (unprom[j].op == unprom[i].op)
978 break;
980 if (j < i)
981 result[i] = result[j];
982 else
983 result[i] = vect_convert_input (vinfo, stmt_info,
984 type, &unprom[i], vectype, subtype);
988 /* The caller has created a (possibly empty) sequence of pattern definition
989 statements followed by a single statement PATTERN_STMT. Cast the result
990 of this final statement to TYPE. If a new statement is needed, add
991 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
992 and return the new statement, otherwise return PATTERN_STMT as-is.
993 VECITYPE is the vector form of PATTERN_STMT's result type. */
995 static gimple *
996 vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
997 gimple *pattern_stmt, tree vecitype)
999 tree lhs = gimple_get_lhs (pattern_stmt);
1000 if (!types_compatible_p (type, TREE_TYPE (lhs)))
1002 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
1003 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
1004 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
1006 return pattern_stmt;
1009 /* Return true if STMT_VINFO describes a reduction for which reassociation
1010 is allowed. If STMT_INFO is part of a group, assume that it's part of
1011 a reduction chain and optimistically assume that all statements
1012 except the last allow reassociation.
1013 Also require it to have code CODE and to be a reduction
1014 in the outermost loop. When returning true, store the operands in
1015 *OP0_OUT and *OP1_OUT. */
1017 static bool
1018 vect_reassociating_reduction_p (vec_info *vinfo,
1019 stmt_vec_info stmt_info, tree_code code,
1020 tree *op0_out, tree *op1_out)
1022 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
1023 if (!loop_info)
1024 return false;
1026 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
1027 if (!assign || gimple_assign_rhs_code (assign) != code)
1028 return false;
1030 /* We don't allow changing the order of the computation in the inner-loop
1031 when doing outer-loop vectorization. */
1032 class loop *loop = LOOP_VINFO_LOOP (loop_info);
1033 if (loop && nested_in_vect_loop_p (loop, stmt_info))
1034 return false;
1036 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
1038 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
1039 code))
1040 return false;
1042 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
1043 return false;
1045 *op0_out = gimple_assign_rhs1 (assign);
1046 *op1_out = gimple_assign_rhs2 (assign);
1047 if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
1048 std::swap (*op0_out, *op1_out);
1049 return true;
1052 /* match.pd function to match
1053 (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
1054 with conditions:
1055 1) @1, @2, c, d, a, b are all integral type.
1056 2) There's single_use for both @1 and @2.
1057 3) a, c have same precision.
1058 4) c and @1 have different precision.
1059 5) c, d are the same type or they can differ in sign when convert is
1060 truncation.
1062 record a and c and d and @3. */
1064 extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree));
1066 /* Function vect_recog_cond_expr_convert
1068 Try to find the following pattern:
1070 TYPE_AB A,B;
1071 TYPE_CD C,D;
1072 TYPE_E E;
1073 TYPE_E op_true = (TYPE_E) A;
1074 TYPE_E op_false = (TYPE_E) B;
1076 E = C cmp D ? op_true : op_false;
1078 where
1079 TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
1080 TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
1081 single_use of op_true and op_false.
1082 TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
1084 Input:
1086 * STMT_VINFO: The stmt from which the pattern search begins.
1087 here it starts with E = c cmp D ? op_true : op_false;
1089 Output:
1091 TYPE1 E' = C cmp D ? A : B;
1092 TYPE3 E = (TYPE3) E';
1094 There may extra nop_convert for A or B to handle different signness.
1096 * TYPE_OUT: The vector type of the output of this pattern.
1098 * Return value: A new stmt that will be used to replace the sequence of
1099 stmts that constitute the pattern. In this case it will be:
1100 E = (TYPE3)E';
1101 E' = C cmp D ? A : B; is recorded in pattern definition statements; */
1103 static gimple *
1104 vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
1105 stmt_vec_info stmt_vinfo, tree *type_out)
1107 gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
1108 tree lhs, match[4], temp, type, new_lhs, op2;
1109 gimple *cond_stmt;
1110 gimple *pattern_stmt;
1112 if (!last_stmt)
1113 return NULL;
1115 lhs = gimple_assign_lhs (last_stmt);
1117 /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
1118 TYPE_PRECISION (A) == TYPE_PRECISION (C). */
1119 if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
1120 return NULL;
1122 vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
1124 op2 = match[2];
1125 type = TREE_TYPE (match[1]);
1126 if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
1128 op2 = vect_recog_temp_ssa_var (type, NULL);
1129 gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
1130 append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt,
1131 get_vectype_for_scalar_type (vinfo, type));
1134 temp = vect_recog_temp_ssa_var (type, NULL);
1135 cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
1136 match[1], op2));
1137 append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt,
1138 get_vectype_for_scalar_type (vinfo, type));
1139 new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
1140 pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp);
1141 *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
1143 if (dump_enabled_p ())
1144 dump_printf_loc (MSG_NOTE, vect_location,
1145 "created pattern stmt: %G", pattern_stmt);
1146 return pattern_stmt;
1149 /* Function vect_recog_dot_prod_pattern
1151 Try to find the following pattern:
1153 type1a x_t
1154 type1b y_t;
1155 TYPE1 prod;
1156 TYPE2 sum = init;
1157 loop:
1158 sum_0 = phi <init, sum_1>
1159 S1 x_t = ...
1160 S2 y_t = ...
1161 S3 x_T = (TYPE1) x_t;
1162 S4 y_T = (TYPE1) y_t;
1163 S5 prod = x_T * y_T;
1164 [S6 prod = (TYPE2) prod; #optional]
1165 S7 sum_1 = prod + sum_0;
1167 where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
1168 the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
1169 'type1a' and 'type1b' can differ.
1171 Input:
1173 * STMT_VINFO: The stmt from which the pattern search begins. In the
1174 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
1175 will be detected.
1177 Output:
1179 * TYPE_OUT: The type of the output of this pattern.
1181 * Return value: A new stmt that will be used to replace the sequence of
1182 stmts that constitute the pattern. In this case it will be:
1183 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
1185 Note: The dot-prod idiom is a widening reduction pattern that is
1186 vectorized without preserving all the intermediate results. It
1187 produces only N/2 (widened) results (by summing up pairs of
1188 intermediate results) rather than all N results. Therefore, we
1189 cannot allow this pattern when we want to get all the results and in
1190 the correct order (as is the case when this computation is in an
1191 inner-loop nested in an outer-loop that us being vectorized). */
1193 static gimple *
1194 vect_recog_dot_prod_pattern (vec_info *vinfo,
1195 stmt_vec_info stmt_vinfo, tree *type_out)
1197 tree oprnd0, oprnd1;
1198 gimple *last_stmt = stmt_vinfo->stmt;
1199 tree type, half_type;
1200 gimple *pattern_stmt;
1201 tree var;
1203 /* Look for the following pattern
1204 DX = (TYPE1) X;
1205 DY = (TYPE1) Y;
1206 DPROD = DX * DY;
1207 DDPROD = (TYPE2) DPROD;
1208 sum_1 = DDPROD + sum_0;
1209 In which
1210 - DX is double the size of X
1211 - DY is double the size of Y
1212 - DX, DY, DPROD all have the same type but the sign
1213 between X, Y and DPROD can differ.
1214 - sum is the same size of DPROD or bigger
1215 - sum has been recognized as a reduction variable.
1217 This is equivalent to:
1218 DPROD = X w* Y; #widen mult
1219 sum_1 = DPROD w+ sum_0; #widen summation
1221 DPROD = X w* Y; #widen mult
1222 sum_1 = DPROD + sum_0; #summation
1225 /* Starting from LAST_STMT, follow the defs of its uses in search
1226 of the above pattern. */
1228 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1229 &oprnd0, &oprnd1))
1230 return NULL;
1232 type = TREE_TYPE (gimple_get_lhs (last_stmt));
1234 vect_unpromoted_value unprom_mult;
1235 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
1237 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1238 we know that oprnd1 is the reduction variable (defined by a loop-header
1239 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1240 Left to check that oprnd0 is defined by a (widen_)mult_expr */
1241 if (!oprnd0)
1242 return NULL;
1244 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
1245 if (!mult_vinfo)
1246 return NULL;
1248 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1249 inside the loop (in case we are analyzing an outer-loop). */
1250 vect_unpromoted_value unprom0[2];
1251 enum optab_subtype subtype = optab_vector;
1252 if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
1253 false, 2, unprom0, &half_type, &subtype))
1254 return NULL;
1256 /* If there are two widening operations, make sure they agree on the sign
1257 of the extension. The result of an optab_vector_mixed_sign operation
1258 is signed; otherwise, the result has the same sign as the operands. */
1259 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
1260 && (subtype == optab_vector_mixed_sign
1261 ? TYPE_UNSIGNED (unprom_mult.type)
1262 : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
1263 return NULL;
1265 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
1267 /* If the inputs have mixed signs, canonicalize on using the signed
1268 input type for analysis. This also helps when emulating mixed-sign
1269 operations using signed operations. */
1270 if (subtype == optab_vector_mixed_sign)
1271 half_type = signed_type_for (half_type);
1273 tree half_vectype;
1274 if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1275 type_out, &half_vectype, subtype))
1277 /* We can emulate a mixed-sign dot-product using a sequence of
1278 signed dot-products; see vect_emulate_mixed_dot_prod for details. */
1279 if (subtype != optab_vector_mixed_sign
1280 || !vect_supportable_direct_optab_p (vinfo, signed_type_for (type),
1281 DOT_PROD_EXPR, half_type,
1282 type_out, &half_vectype,
1283 optab_vector))
1284 return NULL;
1286 *type_out = signed_or_unsigned_type_for (TYPE_UNSIGNED (type),
1287 *type_out);
1290 /* Get the inputs in the appropriate types. */
1291 tree mult_oprnd[2];
1292 vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
1293 unprom0, half_vectype, subtype);
1295 var = vect_recog_temp_ssa_var (type, NULL);
1296 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1297 mult_oprnd[0], mult_oprnd[1], oprnd1);
1299 return pattern_stmt;
1303 /* Function vect_recog_sad_pattern
1305 Try to find the following Sum of Absolute Difference (SAD) pattern:
1307 type x_t, y_t;
1308 signed TYPE1 diff, abs_diff;
1309 TYPE2 sum = init;
1310 loop:
1311 sum_0 = phi <init, sum_1>
1312 S1 x_t = ...
1313 S2 y_t = ...
1314 S3 x_T = (TYPE1) x_t;
1315 S4 y_T = (TYPE1) y_t;
1316 S5 diff = x_T - y_T;
1317 S6 abs_diff = ABS_EXPR <diff>;
1318 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1319 S8 sum_1 = abs_diff + sum_0;
1321 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1322 same size of 'TYPE1' or bigger. This is a special case of a reduction
1323 computation.
1325 Input:
1327 * STMT_VINFO: The stmt from which the pattern search begins. In the
1328 example, when this function is called with S8, the pattern
1329 {S3,S4,S5,S6,S7,S8} will be detected.
1331 Output:
1333 * TYPE_OUT: The type of the output of this pattern.
1335 * Return value: A new stmt that will be used to replace the sequence of
1336 stmts that constitute the pattern. In this case it will be:
1337 SAD_EXPR <x_t, y_t, sum_0>
1340 static gimple *
1341 vect_recog_sad_pattern (vec_info *vinfo,
1342 stmt_vec_info stmt_vinfo, tree *type_out)
1344 gimple *last_stmt = stmt_vinfo->stmt;
1345 tree half_type;
1347 /* Look for the following pattern
1348 DX = (TYPE1) X;
1349 DY = (TYPE1) Y;
1350 DDIFF = DX - DY;
1351 DAD = ABS_EXPR <DDIFF>;
1352 DDPROD = (TYPE2) DPROD;
1353 sum_1 = DAD + sum_0;
1354 In which
1355 - DX is at least double the size of X
1356 - DY is at least double the size of Y
1357 - DX, DY, DDIFF, DAD all have the same type
1358 - sum is the same size of DAD or bigger
1359 - sum has been recognized as a reduction variable.
1361 This is equivalent to:
1362 DDIFF = X w- Y; #widen sub
1363 DAD = ABS_EXPR <DDIFF>;
1364 sum_1 = DAD w+ sum_0; #widen summation
1366 DDIFF = X w- Y; #widen sub
1367 DAD = ABS_EXPR <DDIFF>;
1368 sum_1 = DAD + sum_0; #summation
1371 /* Starting from LAST_STMT, follow the defs of its uses in search
1372 of the above pattern. */
1374 tree plus_oprnd0, plus_oprnd1;
1375 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1376 &plus_oprnd0, &plus_oprnd1))
1377 return NULL;
1379 tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
1381 /* Any non-truncating sequence of conversions is OK here, since
1382 with a successful match, the result of the ABS(U) is known to fit
1383 within the nonnegative range of the result type. (It cannot be the
1384 negative of the minimum signed value due to the range of the widening
1385 MINUS_EXPR.) */
1386 vect_unpromoted_value unprom_abs;
1387 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1388 &unprom_abs);
1390 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1391 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1392 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1393 Then check that plus_oprnd0 is defined by an abs_expr. */
1395 if (!plus_oprnd0)
1396 return NULL;
1398 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1399 if (!abs_stmt_vinfo)
1400 return NULL;
1402 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1403 inside the loop (in case we are analyzing an outer-loop). */
1404 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1405 vect_unpromoted_value unprom[2];
1407 if (!abs_stmt)
1409 gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
1410 if (!abd_stmt
1411 || !gimple_call_internal_p (abd_stmt)
1412 || gimple_call_num_args (abd_stmt) != 2)
1413 return NULL;
1415 tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
1416 tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
1418 if (gimple_call_internal_fn (abd_stmt) == IFN_ABD)
1420 if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0,
1421 &unprom[0])
1422 || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
1423 &unprom[1]))
1424 return NULL;
1426 else if (gimple_call_internal_fn (abd_stmt) == IFN_VEC_WIDEN_ABD)
1428 unprom[0].op = abd_oprnd0;
1429 unprom[0].type = TREE_TYPE (abd_oprnd0);
1430 unprom[1].op = abd_oprnd1;
1431 unprom[1].type = TREE_TYPE (abd_oprnd1);
1433 else
1434 return NULL;
1436 half_type = unprom[0].type;
1438 else if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
1439 unprom, NULL))
1440 return NULL;
1442 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1444 tree half_vectype;
1445 if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1446 type_out, &half_vectype))
1447 return NULL;
1449 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1450 tree sad_oprnd[2];
1451 vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
1452 unprom, half_vectype);
1454 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1455 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1456 sad_oprnd[1], plus_oprnd1);
1458 return pattern_stmt;
1461 /* Function vect_recog_abd_pattern
1463 Try to find the following ABsolute Difference (ABD) or
1464 widening ABD (WIDEN_ABD) pattern:
1466 TYPE1 x;
1467 TYPE2 y;
1468 TYPE3 x_cast = (TYPE3) x; // widening or no-op
1469 TYPE3 y_cast = (TYPE3) y; // widening or no-op
1470 TYPE3 diff = x_cast - y_cast;
1471 TYPE4 diff_cast = (TYPE4) diff; // widening or no-op
1472 TYPE5 abs = ABS(U)_EXPR <diff_cast>;
1474 WIDEN_ABD exists to optimize the case where TYPE4 is at least
1475 twice as wide as TYPE3.
1477 Input:
1479 * STMT_VINFO: The stmt from which the pattern search begins
1481 Output:
1483 * TYPE_OUT: The type of the output of this pattern
1485 * Return value: A new stmt that will be used to replace the sequence of
1486 stmts that constitute the pattern, principally:
1487 out = IFN_ABD (x, y)
1488 out = IFN_WIDEN_ABD (x, y)
1491 static gimple *
1492 vect_recog_abd_pattern (vec_info *vinfo,
1493 stmt_vec_info stmt_vinfo, tree *type_out)
1495 gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
1496 if (!last_stmt)
1497 return NULL;
1499 tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1501 vect_unpromoted_value unprom[2];
1502 gassign *diff_stmt;
1503 tree half_type;
1504 if (!vect_recog_absolute_difference (vinfo, last_stmt, &half_type,
1505 unprom, &diff_stmt))
1506 return NULL;
1508 tree abd_in_type, abd_out_type;
1510 if (half_type)
1512 abd_in_type = half_type;
1513 abd_out_type = abd_in_type;
1515 else
1517 unprom[0].op = gimple_assign_rhs1 (diff_stmt);
1518 unprom[1].op = gimple_assign_rhs2 (diff_stmt);
1519 abd_in_type = signed_type_for (out_type);
1520 abd_out_type = abd_in_type;
1523 tree vectype_in = get_vectype_for_scalar_type (vinfo, abd_in_type);
1524 if (!vectype_in)
1525 return NULL;
1527 internal_fn ifn = IFN_ABD;
1528 tree vectype_out = vectype_in;
1530 if (TYPE_PRECISION (out_type) >= TYPE_PRECISION (abd_in_type) * 2
1531 && stmt_vinfo->min_output_precision >= TYPE_PRECISION (abd_in_type) * 2)
1533 tree mid_type
1534 = build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
1535 TYPE_UNSIGNED (abd_in_type));
1536 tree mid_vectype = get_vectype_for_scalar_type (vinfo, mid_type);
1538 code_helper dummy_code;
1539 int dummy_int;
1540 auto_vec<tree> dummy_vec;
1541 if (mid_vectype
1542 && supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD,
1543 stmt_vinfo, mid_vectype,
1544 vectype_in,
1545 &dummy_code, &dummy_code,
1546 &dummy_int, &dummy_vec))
1548 ifn = IFN_VEC_WIDEN_ABD;
1549 abd_out_type = mid_type;
1550 vectype_out = mid_vectype;
1554 if (ifn == IFN_ABD
1555 && !direct_internal_fn_supported_p (ifn, vectype_in,
1556 OPTIMIZE_FOR_SPEED))
1557 return NULL;
1559 vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
1561 tree abd_oprnds[2];
1562 vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
1563 abd_in_type, unprom, vectype_in);
1565 *type_out = get_vectype_for_scalar_type (vinfo, out_type);
1567 tree abd_result = vect_recog_temp_ssa_var (abd_out_type, NULL);
1568 gcall *abd_stmt = gimple_build_call_internal (ifn, 2,
1569 abd_oprnds[0], abd_oprnds[1]);
1570 gimple_call_set_lhs (abd_stmt, abd_result);
1571 gimple_set_location (abd_stmt, gimple_location (last_stmt));
1573 gimple *stmt = abd_stmt;
1574 if (TYPE_PRECISION (abd_in_type) == TYPE_PRECISION (abd_out_type)
1575 && TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type)
1576 && !TYPE_UNSIGNED (abd_out_type))
1578 tree unsign = unsigned_type_for (abd_out_type);
1579 stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt, vectype_out);
1580 vectype_out = get_vectype_for_scalar_type (vinfo, unsign);
1583 return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype_out);
1586 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1587 so that it can be treated as though it had the form:
1589 A_TYPE a;
1590 B_TYPE b;
1591 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1592 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1593 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1594 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1595 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1597 Try to replace the pattern with:
1599 A_TYPE a;
1600 B_TYPE b;
1601 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1602 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1603 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1604 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1606 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1608 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1609 name of the pattern being matched, for dump purposes. */
1611 static gimple *
1612 vect_recog_widen_op_pattern (vec_info *vinfo,
1613 stmt_vec_info last_stmt_info, tree *type_out,
1614 tree_code orig_code, code_helper wide_code,
1615 bool shift_p, const char *name)
1617 gimple *last_stmt = last_stmt_info->stmt;
1619 vect_unpromoted_value unprom[2];
1620 tree half_type;
1621 if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
1622 shift_p, 2, unprom, &half_type))
1624 return NULL;
1626 /* Pattern detected. */
1627 vect_pattern_detected (name, last_stmt);
1629 tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
1630 tree itype = type;
1631 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1632 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1633 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1634 TYPE_UNSIGNED (half_type));
1636 /* Check target support */
1637 tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1638 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1639 tree ctype = itype;
1640 tree vecctype = vecitype;
1641 if (orig_code == MINUS_EXPR
1642 && TYPE_UNSIGNED (itype)
1643 && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
1645 /* Subtraction is special, even if half_type is unsigned and no matter
1646 whether type is signed or unsigned, if type is wider than itype,
1647 we need to sign-extend from the widening operation result to the
1648 result type.
1649 Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1650 itype unsigned short and type either int or unsigned int.
1651 Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1652 (unsigned short) 0xffff, but for type int we want the result -1
1653 and for type unsigned int 0xffffffff rather than 0xffff. */
1654 ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
1655 vecctype = get_vectype_for_scalar_type (vinfo, ctype);
1658 code_helper dummy_code;
1659 int dummy_int;
1660 auto_vec<tree> dummy_vec;
1661 if (!vectype
1662 || !vecitype
1663 || !vecctype
1664 || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1665 vecitype, vectype,
1666 &dummy_code, &dummy_code,
1667 &dummy_int, &dummy_vec))
1668 return NULL;
1670 *type_out = get_vectype_for_scalar_type (vinfo, type);
1671 if (!*type_out)
1672 return NULL;
1674 tree oprnd[2];
1675 vect_convert_inputs (vinfo, last_stmt_info,
1676 2, oprnd, half_type, unprom, vectype);
1678 tree var = vect_recog_temp_ssa_var (itype, NULL);
1679 gimple *pattern_stmt = vect_gimple_build (var, wide_code, oprnd[0], oprnd[1]);
1681 if (vecctype != vecitype)
1682 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
1683 pattern_stmt, vecitype);
1685 return vect_convert_output (vinfo, last_stmt_info,
1686 type, pattern_stmt, vecctype);
1689 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1690 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1692 static gimple *
1693 vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1694 tree *type_out)
1696 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1697 MULT_EXPR, WIDEN_MULT_EXPR, false,
1698 "vect_recog_widen_mult_pattern");
1701 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1702 to IFN_VEC_WIDEN_PLUS. See vect_recog_widen_op_pattern for details. */
1704 static gimple *
1705 vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1706 tree *type_out)
1708 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1709 PLUS_EXPR, IFN_VEC_WIDEN_PLUS,
1710 false, "vect_recog_widen_plus_pattern");
1713 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1714 to IFN_VEC_WIDEN_MINUS. See vect_recog_widen_op_pattern for details. */
1715 static gimple *
1716 vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1717 tree *type_out)
1719 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1720 MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
1721 false, "vect_recog_widen_minus_pattern");
1724 /* Try to detect abd on widened inputs, converting IFN_ABD
1725 to IFN_VEC_WIDEN_ABD. */
1726 static gimple *
1727 vect_recog_widen_abd_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1728 tree *type_out)
1730 gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
1731 if (!last_stmt || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (last_stmt)))
1732 return NULL;
1734 tree last_rhs = gimple_assign_rhs1 (last_stmt);
1736 tree in_type = TREE_TYPE (last_rhs);
1737 tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1738 if (!INTEGRAL_TYPE_P (in_type)
1739 || !INTEGRAL_TYPE_P (out_type)
1740 || TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type)
1741 || !TYPE_UNSIGNED (in_type))
1742 return NULL;
1744 vect_unpromoted_value unprom;
1745 tree op = vect_look_through_possible_promotion (vinfo, last_rhs, &unprom);
1746 if (!op || TYPE_PRECISION (TREE_TYPE (op)) != TYPE_PRECISION (in_type))
1747 return NULL;
1749 stmt_vec_info abd_pattern_vinfo = vect_get_internal_def (vinfo, op);
1750 if (!abd_pattern_vinfo)
1751 return NULL;
1753 abd_pattern_vinfo = vect_stmt_to_vectorize (abd_pattern_vinfo);
1754 gcall *abd_stmt = dyn_cast <gcall *> (STMT_VINFO_STMT (abd_pattern_vinfo));
1755 if (!abd_stmt
1756 || !gimple_call_internal_p (abd_stmt)
1757 || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
1758 return NULL;
1760 tree vectype_in = get_vectype_for_scalar_type (vinfo, in_type);
1761 tree vectype_out = get_vectype_for_scalar_type (vinfo, out_type);
1763 code_helper dummy_code;
1764 int dummy_int;
1765 auto_vec<tree> dummy_vec;
1766 if (!supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD, stmt_vinfo,
1767 vectype_out, vectype_in,
1768 &dummy_code, &dummy_code,
1769 &dummy_int, &dummy_vec))
1770 return NULL;
1772 vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt);
1774 *type_out = vectype_out;
1776 tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
1777 tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
1778 tree widen_abd_result = vect_recog_temp_ssa_var (out_type, NULL);
1779 gcall *widen_abd_stmt = gimple_build_call_internal (IFN_VEC_WIDEN_ABD, 2,
1780 abd_oprnd0, abd_oprnd1);
1781 gimple_call_set_lhs (widen_abd_stmt, widen_abd_result);
1782 gimple_set_location (widen_abd_stmt, gimple_location (last_stmt));
1783 return widen_abd_stmt;
1786 /* Function vect_recog_ctz_ffs_pattern
1788 Try to find the following pattern:
1790 TYPE1 A;
1791 TYPE1 B;
1793 B = __builtin_ctz{,l,ll} (A);
1797 B = __builtin_ffs{,l,ll} (A);
1799 Input:
1801 * STMT_VINFO: The stmt from which the pattern search begins.
1802 here it starts with B = __builtin_* (A);
1804 Output:
1806 * TYPE_OUT: The vector type of the output of this pattern.
1808 * Return value: A new stmt that will be used to replace the sequence of
1809 stmts that constitute the pattern, using clz or popcount builtins. */
1811 static gimple *
1812 vect_recog_ctz_ffs_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1813 tree *type_out)
1815 gimple *call_stmt = stmt_vinfo->stmt;
1816 gimple *pattern_stmt;
1817 tree rhs_oprnd, rhs_type, lhs_oprnd, lhs_type, vec_type, vec_rhs_type;
1818 tree new_var;
1819 internal_fn ifn = IFN_LAST, ifnnew = IFN_LAST;
1820 bool defined_at_zero = true, defined_at_zero_new = false;
1821 int val = 0, val_new = 0, val_cmp = 0;
1822 int prec;
1823 int sub = 0, add = 0;
1824 location_t loc;
1826 if (!is_gimple_call (call_stmt))
1827 return NULL;
1829 if (gimple_call_num_args (call_stmt) != 1
1830 && gimple_call_num_args (call_stmt) != 2)
1831 return NULL;
1833 rhs_oprnd = gimple_call_arg (call_stmt, 0);
1834 rhs_type = TREE_TYPE (rhs_oprnd);
1835 lhs_oprnd = gimple_call_lhs (call_stmt);
1836 if (!lhs_oprnd)
1837 return NULL;
1838 lhs_type = TREE_TYPE (lhs_oprnd);
1839 if (!INTEGRAL_TYPE_P (lhs_type)
1840 || !INTEGRAL_TYPE_P (rhs_type)
1841 || !type_has_mode_precision_p (rhs_type)
1842 || TREE_CODE (rhs_oprnd) != SSA_NAME)
1843 return NULL;
1845 switch (gimple_call_combined_fn (call_stmt))
1847 CASE_CFN_CTZ:
1848 ifn = IFN_CTZ;
1849 if (!gimple_call_internal_p (call_stmt)
1850 || gimple_call_num_args (call_stmt) != 2)
1851 defined_at_zero = false;
1852 else
1853 val = tree_to_shwi (gimple_call_arg (call_stmt, 1));
1854 break;
1855 CASE_CFN_FFS:
1856 ifn = IFN_FFS;
1857 break;
1858 default:
1859 return NULL;
1862 prec = TYPE_PRECISION (rhs_type);
1863 loc = gimple_location (call_stmt);
1865 vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
1866 if (!vec_type)
1867 return NULL;
1869 vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
1870 if (!vec_rhs_type)
1871 return NULL;
1873 /* Do it only if the backend doesn't have ctz<vector_mode>2 or
1874 ffs<vector_mode>2 pattern but does have clz<vector_mode>2 or
1875 popcount<vector_mode>2. */
1876 if (!vec_type
1877 || direct_internal_fn_supported_p (ifn, vec_rhs_type,
1878 OPTIMIZE_FOR_SPEED))
1879 return NULL;
1881 if (ifn == IFN_FFS
1882 && direct_internal_fn_supported_p (IFN_CTZ, vec_rhs_type,
1883 OPTIMIZE_FOR_SPEED))
1885 ifnnew = IFN_CTZ;
1886 defined_at_zero_new
1887 = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
1888 val_new) == 2;
1890 else if (direct_internal_fn_supported_p (IFN_CLZ, vec_rhs_type,
1891 OPTIMIZE_FOR_SPEED))
1893 ifnnew = IFN_CLZ;
1894 defined_at_zero_new
1895 = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
1896 val_new) == 2;
1898 if ((ifnnew == IFN_LAST
1899 || (defined_at_zero && !defined_at_zero_new))
1900 && direct_internal_fn_supported_p (IFN_POPCOUNT, vec_rhs_type,
1901 OPTIMIZE_FOR_SPEED))
1903 ifnnew = IFN_POPCOUNT;
1904 defined_at_zero_new = true;
1905 val_new = prec;
1907 if (ifnnew == IFN_LAST)
1908 return NULL;
1910 vect_pattern_detected ("vec_recog_ctz_ffs_pattern", call_stmt);
1912 val_cmp = val_new;
1913 if ((ifnnew == IFN_CLZ
1914 && defined_at_zero
1915 && defined_at_zero_new
1916 && val == prec
1917 && val_new == prec)
1918 || (ifnnew == IFN_POPCOUNT && ifn == IFN_CTZ))
1920 /* .CTZ (X) = PREC - .CLZ ((X - 1) & ~X)
1921 .CTZ (X) = .POPCOUNT ((X - 1) & ~X). */
1922 if (ifnnew == IFN_CLZ)
1923 sub = prec;
1924 val_cmp = prec;
1926 if (!TYPE_UNSIGNED (rhs_type))
1928 rhs_type = unsigned_type_for (rhs_type);
1929 vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
1930 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1931 pattern_stmt = gimple_build_assign (new_var, NOP_EXPR, rhs_oprnd);
1932 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
1933 vec_rhs_type);
1934 rhs_oprnd = new_var;
1937 tree m1 = vect_recog_temp_ssa_var (rhs_type, NULL);
1938 pattern_stmt = gimple_build_assign (m1, PLUS_EXPR, rhs_oprnd,
1939 build_int_cst (rhs_type, -1));
1940 gimple_set_location (pattern_stmt, loc);
1941 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1943 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1944 pattern_stmt = gimple_build_assign (new_var, BIT_NOT_EXPR, rhs_oprnd);
1945 gimple_set_location (pattern_stmt, loc);
1946 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1947 rhs_oprnd = new_var;
1949 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1950 pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
1951 m1, rhs_oprnd);
1952 gimple_set_location (pattern_stmt, loc);
1953 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1954 rhs_oprnd = new_var;
1956 else if (ifnnew == IFN_CLZ)
1958 /* .CTZ (X) = (PREC - 1) - .CLZ (X & -X)
1959 .FFS (X) = PREC - .CLZ (X & -X). */
1960 sub = prec - (ifn == IFN_CTZ);
1961 val_cmp = sub - val_new;
1963 tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
1964 pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
1965 gimple_set_location (pattern_stmt, loc);
1966 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1968 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1969 pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
1970 rhs_oprnd, neg);
1971 gimple_set_location (pattern_stmt, loc);
1972 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1973 rhs_oprnd = new_var;
1975 else if (ifnnew == IFN_POPCOUNT)
1977 /* .CTZ (X) = PREC - .POPCOUNT (X | -X)
1978 .FFS (X) = (PREC + 1) - .POPCOUNT (X | -X). */
1979 sub = prec + (ifn == IFN_FFS);
1980 val_cmp = sub;
1982 tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
1983 pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
1984 gimple_set_location (pattern_stmt, loc);
1985 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1987 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1988 pattern_stmt = gimple_build_assign (new_var, BIT_IOR_EXPR,
1989 rhs_oprnd, neg);
1990 gimple_set_location (pattern_stmt, loc);
1991 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1992 rhs_oprnd = new_var;
1994 else if (ifnnew == IFN_CTZ)
1996 /* .FFS (X) = .CTZ (X) + 1. */
1997 add = 1;
1998 val_cmp++;
2001 /* Create B = .IFNNEW (A). */
2002 new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2003 if ((ifnnew == IFN_CLZ || ifnnew == IFN_CTZ) && defined_at_zero_new)
2004 pattern_stmt
2005 = gimple_build_call_internal (ifnnew, 2, rhs_oprnd,
2006 build_int_cst (integer_type_node,
2007 val_new));
2008 else
2009 pattern_stmt = gimple_build_call_internal (ifnnew, 1, rhs_oprnd);
2010 gimple_call_set_lhs (pattern_stmt, new_var);
2011 gimple_set_location (pattern_stmt, loc);
2012 *type_out = vec_type;
2014 if (sub)
2016 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2017 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2018 pattern_stmt = gimple_build_assign (ret_var, MINUS_EXPR,
2019 build_int_cst (lhs_type, sub),
2020 new_var);
2021 gimple_set_location (pattern_stmt, loc);
2022 new_var = ret_var;
2024 else if (add)
2026 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2027 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2028 pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
2029 build_int_cst (lhs_type, add));
2030 gimple_set_location (pattern_stmt, loc);
2031 new_var = ret_var;
2034 if (defined_at_zero
2035 && (!defined_at_zero_new || val != val_cmp))
2037 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2038 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2039 rhs_oprnd = gimple_call_arg (call_stmt, 0);
2040 rhs_type = TREE_TYPE (rhs_oprnd);
2041 tree cmp = build2_loc (loc, NE_EXPR, boolean_type_node,
2042 rhs_oprnd, build_zero_cst (rhs_type));
2043 pattern_stmt = gimple_build_assign (ret_var, COND_EXPR, cmp,
2044 new_var,
2045 build_int_cst (lhs_type, val));
2048 if (dump_enabled_p ())
2049 dump_printf_loc (MSG_NOTE, vect_location,
2050 "created pattern stmt: %G", pattern_stmt);
2052 return pattern_stmt;
2055 /* Function vect_recog_popcount_clz_ctz_ffs_pattern
2057 Try to find the following pattern:
2059 UTYPE1 A;
2060 TYPE1 B;
2061 UTYPE2 temp_in;
2062 TYPE3 temp_out;
2063 temp_in = (UTYPE2)A;
2065 temp_out = __builtin_popcount{,l,ll} (temp_in);
2066 B = (TYPE1) temp_out;
2068 TYPE2 may or may not be equal to TYPE3.
2069 i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
2070 i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
2072 Input:
2074 * STMT_VINFO: The stmt from which the pattern search begins.
2075 here it starts with B = (TYPE1) temp_out;
2077 Output:
2079 * TYPE_OUT: The vector type of the output of this pattern.
2081 * Return value: A new stmt that will be used to replace the sequence of
2082 stmts that constitute the pattern. In this case it will be:
2083 B = .POPCOUNT (A);
2085 Similarly for clz, ctz and ffs.
2088 static gimple *
2089 vect_recog_popcount_clz_ctz_ffs_pattern (vec_info *vinfo,
2090 stmt_vec_info stmt_vinfo,
2091 tree *type_out)
2093 gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
2094 gimple *call_stmt, *pattern_stmt;
2095 tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
2096 internal_fn ifn = IFN_LAST;
2097 int addend = 0;
2099 /* Find B = (TYPE1) temp_out. */
2100 if (!last_stmt)
2101 return NULL;
2102 tree_code code = gimple_assign_rhs_code (last_stmt);
2103 if (!CONVERT_EXPR_CODE_P (code))
2104 return NULL;
2106 lhs_oprnd = gimple_assign_lhs (last_stmt);
2107 lhs_type = TREE_TYPE (lhs_oprnd);
2108 if (!INTEGRAL_TYPE_P (lhs_type))
2109 return NULL;
2111 rhs_oprnd = gimple_assign_rhs1 (last_stmt);
2112 if (TREE_CODE (rhs_oprnd) != SSA_NAME
2113 || !has_single_use (rhs_oprnd))
2114 return NULL;
2115 call_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
2117 /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
2118 if (!is_gimple_call (call_stmt))
2119 return NULL;
2120 switch (gimple_call_combined_fn (call_stmt))
2122 int val;
2123 CASE_CFN_POPCOUNT:
2124 ifn = IFN_POPCOUNT;
2125 break;
2126 CASE_CFN_CLZ:
2127 ifn = IFN_CLZ;
2128 /* Punt if call result is unsigned and defined value at zero
2129 is negative, as the negative value doesn't extend correctly. */
2130 if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
2131 && gimple_call_internal_p (call_stmt)
2132 && CLZ_DEFINED_VALUE_AT_ZERO
2133 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
2134 && val < 0)
2135 return NULL;
2136 break;
2137 CASE_CFN_CTZ:
2138 ifn = IFN_CTZ;
2139 /* Punt if call result is unsigned and defined value at zero
2140 is negative, as the negative value doesn't extend correctly. */
2141 if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
2142 && gimple_call_internal_p (call_stmt)
2143 && CTZ_DEFINED_VALUE_AT_ZERO
2144 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
2145 && val < 0)
2146 return NULL;
2147 break;
2148 CASE_CFN_FFS:
2149 ifn = IFN_FFS;
2150 break;
2151 default:
2152 return NULL;
2155 if (gimple_call_num_args (call_stmt) != 1
2156 && gimple_call_num_args (call_stmt) != 2)
2157 return NULL;
2159 rhs_oprnd = gimple_call_arg (call_stmt, 0);
2160 vect_unpromoted_value unprom_diff;
2161 rhs_origin
2162 = vect_look_through_possible_promotion (vinfo, rhs_oprnd, &unprom_diff);
2164 if (!rhs_origin)
2165 return NULL;
2167 /* Input and output of .POPCOUNT should be same-precision integer. */
2168 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type))
2169 return NULL;
2171 /* Also A should be unsigned or same precision as temp_in, otherwise
2172 different builtins/internal functions have different behaviors. */
2173 if (TYPE_PRECISION (unprom_diff.type)
2174 != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))
2175 switch (ifn)
2177 case IFN_POPCOUNT:
2178 /* For popcount require zero extension, which doesn't add any
2179 further bits to the count. */
2180 if (!TYPE_UNSIGNED (unprom_diff.type))
2181 return NULL;
2182 break;
2183 case IFN_CLZ:
2184 /* clzll (x) == clz (x) + 32 for unsigned x != 0, so ok
2185 if it is undefined at zero or if it matches also for the
2186 defined value there. */
2187 if (!TYPE_UNSIGNED (unprom_diff.type))
2188 return NULL;
2189 if (!type_has_mode_precision_p (lhs_type)
2190 || !type_has_mode_precision_p (TREE_TYPE (rhs_oprnd)))
2191 return NULL;
2192 addend = (TYPE_PRECISION (TREE_TYPE (rhs_oprnd))
2193 - TYPE_PRECISION (lhs_type));
2194 if (gimple_call_internal_p (call_stmt)
2195 && gimple_call_num_args (call_stmt) == 2)
2197 int val1, val2;
2198 val1 = tree_to_shwi (gimple_call_arg (call_stmt, 1));
2199 int d2
2200 = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2201 val2);
2202 if (d2 != 2 || val1 != val2 + addend)
2203 return NULL;
2205 break;
2206 case IFN_CTZ:
2207 /* ctzll (x) == ctz (x) for unsigned or signed x != 0, so ok
2208 if it is undefined at zero or if it matches also for the
2209 defined value there. */
2210 if (gimple_call_internal_p (call_stmt)
2211 && gimple_call_num_args (call_stmt) == 2)
2213 int val1, val2;
2214 val1 = tree_to_shwi (gimple_call_arg (call_stmt, 1));
2215 int d2
2216 = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2217 val2);
2218 if (d2 != 2 || val1 != val2)
2219 return NULL;
2221 break;
2222 case IFN_FFS:
2223 /* ffsll (x) == ffs (x) for unsigned or signed x. */
2224 break;
2225 default:
2226 gcc_unreachable ();
2229 vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
2230 /* Do it only if the backend has popcount<vector_mode>2 etc. pattern. */
2231 if (!vec_type)
2232 return NULL;
2234 bool supported
2235 = direct_internal_fn_supported_p (ifn, vec_type, OPTIMIZE_FOR_SPEED);
2236 if (!supported)
2237 switch (ifn)
2239 case IFN_POPCOUNT:
2240 case IFN_CLZ:
2241 return NULL;
2242 case IFN_FFS:
2243 /* vect_recog_ctz_ffs_pattern can implement ffs using ctz. */
2244 if (direct_internal_fn_supported_p (IFN_CTZ, vec_type,
2245 OPTIMIZE_FOR_SPEED))
2246 break;
2247 /* FALLTHRU */
2248 case IFN_CTZ:
2249 /* vect_recog_ctz_ffs_pattern can implement ffs or ctz using
2250 clz or popcount. */
2251 if (direct_internal_fn_supported_p (IFN_CLZ, vec_type,
2252 OPTIMIZE_FOR_SPEED))
2253 break;
2254 if (direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
2255 OPTIMIZE_FOR_SPEED))
2256 break;
2257 return NULL;
2258 default:
2259 gcc_unreachable ();
2262 vect_pattern_detected ("vec_recog_popcount_clz_ctz_ffs_pattern",
2263 call_stmt);
2265 /* Create B = .POPCOUNT (A). */
2266 new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2267 tree arg2 = NULL_TREE;
2268 int val;
2269 if (ifn == IFN_CLZ
2270 && CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2271 val) == 2)
2272 arg2 = build_int_cst (integer_type_node, val);
2273 else if (ifn == IFN_CTZ
2274 && CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2275 val) == 2)
2276 arg2 = build_int_cst (integer_type_node, val);
2277 if (arg2)
2278 pattern_stmt = gimple_build_call_internal (ifn, 2, unprom_diff.op, arg2);
2279 else
2280 pattern_stmt = gimple_build_call_internal (ifn, 1, unprom_diff.op);
2281 gimple_call_set_lhs (pattern_stmt, new_var);
2282 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2283 *type_out = vec_type;
2285 if (dump_enabled_p ())
2286 dump_printf_loc (MSG_NOTE, vect_location,
2287 "created pattern stmt: %G", pattern_stmt);
2289 if (addend)
2291 gcc_assert (supported);
2292 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2293 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2294 pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
2295 build_int_cst (lhs_type, addend));
2297 else if (!supported)
2299 stmt_vec_info new_stmt_info = vinfo->add_stmt (pattern_stmt);
2300 STMT_VINFO_VECTYPE (new_stmt_info) = vec_type;
2301 pattern_stmt
2302 = vect_recog_ctz_ffs_pattern (vinfo, new_stmt_info, type_out);
2303 if (pattern_stmt == NULL)
2304 return NULL;
2305 if (gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (new_stmt_info))
2307 gimple_seq *pseq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
2308 gimple_seq_add_seq_without_update (pseq, seq);
2311 return pattern_stmt;
2314 /* Function vect_recog_pow_pattern
2316 Try to find the following pattern:
2318 x = POW (y, N);
2320 with POW being one of pow, powf, powi, powif and N being
2321 either 2 or 0.5.
2323 Input:
2325 * STMT_VINFO: The stmt from which the pattern search begins.
2327 Output:
2329 * TYPE_OUT: The type of the output of this pattern.
2331 * Return value: A new stmt that will be used to replace the sequence of
2332 stmts that constitute the pattern. In this case it will be:
2333 x = x * x
2335 x = sqrt (x)
2338 static gimple *
2339 vect_recog_pow_pattern (vec_info *vinfo,
2340 stmt_vec_info stmt_vinfo, tree *type_out)
2342 gimple *last_stmt = stmt_vinfo->stmt;
2343 tree base, exp;
2344 gimple *stmt;
2345 tree var;
2347 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
2348 return NULL;
2350 switch (gimple_call_combined_fn (last_stmt))
2352 CASE_CFN_POW:
2353 CASE_CFN_POWI:
2354 break;
2356 default:
2357 return NULL;
2360 base = gimple_call_arg (last_stmt, 0);
2361 exp = gimple_call_arg (last_stmt, 1);
2362 if (TREE_CODE (exp) != REAL_CST
2363 && TREE_CODE (exp) != INTEGER_CST)
2365 if (flag_unsafe_math_optimizations
2366 && TREE_CODE (base) == REAL_CST
2367 && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
2369 combined_fn log_cfn;
2370 built_in_function exp_bfn;
2371 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
2373 case BUILT_IN_POW:
2374 log_cfn = CFN_BUILT_IN_LOG;
2375 exp_bfn = BUILT_IN_EXP;
2376 break;
2377 case BUILT_IN_POWF:
2378 log_cfn = CFN_BUILT_IN_LOGF;
2379 exp_bfn = BUILT_IN_EXPF;
2380 break;
2381 case BUILT_IN_POWL:
2382 log_cfn = CFN_BUILT_IN_LOGL;
2383 exp_bfn = BUILT_IN_EXPL;
2384 break;
2385 default:
2386 return NULL;
2388 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
2389 tree exp_decl = builtin_decl_implicit (exp_bfn);
2390 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
2391 does that, but if C is a power of 2, we want to use
2392 exp2 (log2 (C) * x) in the non-vectorized version, but for
2393 vectorization we don't have vectorized exp2. */
2394 if (logc
2395 && TREE_CODE (logc) == REAL_CST
2396 && exp_decl
2397 && lookup_attribute ("omp declare simd",
2398 DECL_ATTRIBUTES (exp_decl)))
2400 cgraph_node *node = cgraph_node::get_create (exp_decl);
2401 if (node->simd_clones == NULL)
2403 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
2404 || node->definition)
2405 return NULL;
2406 expand_simd_clones (node);
2407 if (node->simd_clones == NULL)
2408 return NULL;
2410 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
2411 if (!*type_out)
2412 return NULL;
2413 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2414 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
2415 append_pattern_def_seq (vinfo, stmt_vinfo, g);
2416 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2417 g = gimple_build_call (exp_decl, 1, def);
2418 gimple_call_set_lhs (g, res);
2419 return g;
2423 return NULL;
2426 /* We now have a pow or powi builtin function call with a constant
2427 exponent. */
2429 /* Catch squaring. */
2430 if ((tree_fits_shwi_p (exp)
2431 && tree_to_shwi (exp) == 2)
2432 || (TREE_CODE (exp) == REAL_CST
2433 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
2435 if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
2436 TREE_TYPE (base), type_out))
2437 return NULL;
2439 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2440 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
2441 return stmt;
2444 /* Catch square root. */
2445 if (TREE_CODE (exp) == REAL_CST
2446 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
2448 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
2449 if (*type_out
2450 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
2451 OPTIMIZE_FOR_SPEED))
2453 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
2454 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
2455 gimple_call_set_lhs (stmt, var);
2456 gimple_call_set_nothrow (stmt, true);
2457 return stmt;
2461 return NULL;
2465 /* Function vect_recog_widen_sum_pattern
2467 Try to find the following pattern:
2469 type x_t;
2470 TYPE x_T, sum = init;
2471 loop:
2472 sum_0 = phi <init, sum_1>
2473 S1 x_t = *p;
2474 S2 x_T = (TYPE) x_t;
2475 S3 sum_1 = x_T + sum_0;
2477 where type 'TYPE' is at least double the size of type 'type', i.e - we're
2478 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
2479 a special case of a reduction computation.
2481 Input:
2483 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
2484 when this function is called with S3, the pattern {S2,S3} will be detected.
2486 Output:
2488 * TYPE_OUT: The type of the output of this pattern.
2490 * Return value: A new stmt that will be used to replace the sequence of
2491 stmts that constitute the pattern. In this case it will be:
2492 WIDEN_SUM <x_t, sum_0>
2494 Note: The widening-sum idiom is a widening reduction pattern that is
2495 vectorized without preserving all the intermediate results. It
2496 produces only N/2 (widened) results (by summing up pairs of
2497 intermediate results) rather than all N results. Therefore, we
2498 cannot allow this pattern when we want to get all the results and in
2499 the correct order (as is the case when this computation is in an
2500 inner-loop nested in an outer-loop that us being vectorized). */
2502 static gimple *
2503 vect_recog_widen_sum_pattern (vec_info *vinfo,
2504 stmt_vec_info stmt_vinfo, tree *type_out)
2506 gimple *last_stmt = stmt_vinfo->stmt;
2507 tree oprnd0, oprnd1;
2508 tree type;
2509 gimple *pattern_stmt;
2510 tree var;
2512 /* Look for the following pattern
2513 DX = (TYPE) X;
2514 sum_1 = DX + sum_0;
2515 In which DX is at least double the size of X, and sum_1 has been
2516 recognized as a reduction variable.
2519 /* Starting from LAST_STMT, follow the defs of its uses in search
2520 of the above pattern. */
2522 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
2523 &oprnd0, &oprnd1)
2524 || TREE_CODE (oprnd0) != SSA_NAME
2525 || !vinfo->lookup_def (oprnd0))
2526 return NULL;
2528 type = TREE_TYPE (gimple_get_lhs (last_stmt));
2530 /* So far so good. Since last_stmt was detected as a (summation) reduction,
2531 we know that oprnd1 is the reduction variable (defined by a loop-header
2532 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
2533 Left to check that oprnd0 is defined by a cast from type 'type' to type
2534 'TYPE'. */
2536 vect_unpromoted_value unprom0;
2537 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
2538 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
2539 return NULL;
2541 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
2543 if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
2544 unprom0.type, type_out))
2545 return NULL;
2547 var = vect_recog_temp_ssa_var (type, NULL);
2548 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
2550 return pattern_stmt;
2553 /* Function vect_recog_bitfield_ref_pattern
2555 Try to find the following pattern:
2557 bf_value = BIT_FIELD_REF (container, bitsize, bitpos);
2558 result = (type_out) bf_value;
2562 if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
2564 where type_out is a non-bitfield type, that is to say, it's precision matches
2565 2^(TYPE_SIZE(type_out) - (TYPE_UNSIGNED (type_out) ? 1 : 2)).
2567 Input:
2569 * STMT_VINFO: The stmt from which the pattern search begins.
2570 here it starts with:
2571 result = (type_out) bf_value;
2575 if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
2577 Output:
2579 * TYPE_OUT: The vector type of the output of this pattern.
2581 * Return value: A new stmt that will be used to replace the sequence of
2582 stmts that constitute the pattern. If the precision of type_out is bigger
2583 than the precision type of _1 we perform the widening before the shifting,
2584 since the new precision will be large enough to shift the value and moving
2585 widening operations up the statement chain enables the generation of
2586 widening loads. If we are widening and the operation after the pattern is
2587 an addition then we mask first and shift later, to enable the generation of
2588 shifting adds. In the case of narrowing we will always mask first, shift
2589 last and then perform a narrowing operation. This will enable the
2590 generation of narrowing shifts.
2592 Widening with mask first, shift later:
2593 container = (type_out) container;
2594 masked = container & (((1 << bitsize) - 1) << bitpos);
2595 result = masked >> bitpos;
2597 Widening with shift first, mask last:
2598 container = (type_out) container;
2599 shifted = container >> bitpos;
2600 result = shifted & ((1 << bitsize) - 1);
2602 Narrowing:
2603 masked = container & (((1 << bitsize) - 1) << bitpos);
2604 result = masked >> bitpos;
2605 result = (type_out) result;
2607 If the bitfield is signed and it's wider than type_out, we need to
2608 keep the result sign-extended:
2609 container = (type) container;
2610 masked = container << (prec - bitsize - bitpos);
2611 result = (type_out) (masked >> (prec - bitsize));
2613 Here type is the signed variant of the wider of type_out and the type
2614 of container.
2616 The shifting is always optional depending on whether bitpos != 0.
2618 When the original bitfield was inside a gcond then an new gcond is also
2619 generated with the newly `result` as the operand to the comparison.
2623 static gimple *
2624 vect_recog_bitfield_ref_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
2625 tree *type_out)
2627 gimple *bf_stmt = NULL;
2628 tree lhs = NULL_TREE;
2629 tree ret_type = NULL_TREE;
2630 gimple *stmt = STMT_VINFO_STMT (stmt_info);
2631 if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
2633 tree op = gimple_cond_lhs (cond_stmt);
2634 if (TREE_CODE (op) != SSA_NAME)
2635 return NULL;
2636 bf_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
2637 if (TREE_CODE (gimple_cond_rhs (cond_stmt)) != INTEGER_CST)
2638 return NULL;
2640 else if (is_gimple_assign (stmt)
2641 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
2642 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
2644 gimple *second_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
2645 bf_stmt = dyn_cast <gassign *> (second_stmt);
2646 lhs = gimple_assign_lhs (stmt);
2647 ret_type = TREE_TYPE (lhs);
2650 if (!bf_stmt
2651 || gimple_assign_rhs_code (bf_stmt) != BIT_FIELD_REF)
2652 return NULL;
2654 tree bf_ref = gimple_assign_rhs1 (bf_stmt);
2655 tree container = TREE_OPERAND (bf_ref, 0);
2656 ret_type = ret_type ? ret_type : TREE_TYPE (container);
2658 if (!bit_field_offset (bf_ref).is_constant ()
2659 || !bit_field_size (bf_ref).is_constant ()
2660 || !tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (container))))
2661 return NULL;
2663 if (!INTEGRAL_TYPE_P (TREE_TYPE (bf_ref))
2664 || !INTEGRAL_TYPE_P (TREE_TYPE (container))
2665 || TYPE_MODE (TREE_TYPE (container)) == E_BLKmode)
2666 return NULL;
2668 gimple *use_stmt, *pattern_stmt;
2669 use_operand_p use_p;
2670 bool shift_first = true;
2671 tree container_type = TREE_TYPE (container);
2672 tree vectype = get_vectype_for_scalar_type (vinfo, container_type);
2674 /* Calculate shift_n before the adjustments for widening loads, otherwise
2675 the container may change and we have to consider offset change for
2676 widening loads on big endianness. The shift_n calculated here can be
2677 independent of widening. */
2678 unsigned HOST_WIDE_INT shift_n = bit_field_offset (bf_ref).to_constant ();
2679 unsigned HOST_WIDE_INT mask_width = bit_field_size (bf_ref).to_constant ();
2680 unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
2681 if (BYTES_BIG_ENDIAN)
2682 shift_n = prec - shift_n - mask_width;
2684 bool ref_sext = (!TYPE_UNSIGNED (TREE_TYPE (bf_ref)) &&
2685 TYPE_PRECISION (ret_type) > mask_width);
2686 bool load_widen = (TYPE_PRECISION (TREE_TYPE (container)) <
2687 TYPE_PRECISION (ret_type));
2689 /* We move the conversion earlier if the loaded type is smaller than the
2690 return type to enable the use of widening loads. And if we need a
2691 sign extension, we need to convert the loaded value early to a signed
2692 type as well. */
2693 if (ref_sext || load_widen)
2695 tree type = load_widen ? ret_type : container_type;
2696 if (ref_sext)
2697 type = gimple_signed_type (type);
2698 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type),
2699 NOP_EXPR, container);
2700 container = gimple_get_lhs (pattern_stmt);
2701 container_type = TREE_TYPE (container);
2702 prec = tree_to_uhwi (TYPE_SIZE (container_type));
2703 vectype = get_vectype_for_scalar_type (vinfo, container_type);
2704 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2706 else if (!useless_type_conversion_p (TREE_TYPE (container), ret_type))
2707 /* If we are doing the conversion last then also delay the shift as we may
2708 be able to combine the shift and conversion in certain cases. */
2709 shift_first = false;
2711 /* If the only use of the result of this BIT_FIELD_REF + CONVERT is a
2712 PLUS_EXPR then do the shift last as some targets can combine the shift and
2713 add into a single instruction. */
2714 if (lhs && single_imm_use (lhs, &use_p, &use_stmt))
2716 if (gimple_code (use_stmt) == GIMPLE_ASSIGN
2717 && gimple_assign_rhs_code (use_stmt) == PLUS_EXPR)
2718 shift_first = false;
2721 /* If we don't have to shift we only generate the mask, so just fix the
2722 code-path to shift_first. */
2723 if (shift_n == 0)
2724 shift_first = true;
2726 tree result;
2727 if (shift_first && !ref_sext)
2729 tree shifted = container;
2730 if (shift_n)
2732 pattern_stmt
2733 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2734 RSHIFT_EXPR, container,
2735 build_int_cst (sizetype, shift_n));
2736 shifted = gimple_assign_lhs (pattern_stmt);
2737 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2740 tree mask = wide_int_to_tree (container_type,
2741 wi::mask (mask_width, false, prec));
2743 pattern_stmt
2744 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2745 BIT_AND_EXPR, shifted, mask);
2746 result = gimple_assign_lhs (pattern_stmt);
2748 else
2750 tree temp = vect_recog_temp_ssa_var (container_type);
2751 if (!ref_sext)
2753 tree mask = wide_int_to_tree (container_type,
2754 wi::shifted_mask (shift_n,
2755 mask_width,
2756 false, prec));
2757 pattern_stmt = gimple_build_assign (temp, BIT_AND_EXPR,
2758 container, mask);
2760 else
2762 HOST_WIDE_INT shl = prec - shift_n - mask_width;
2763 shift_n += shl;
2764 pattern_stmt = gimple_build_assign (temp, LSHIFT_EXPR,
2765 container,
2766 build_int_cst (sizetype,
2767 shl));
2770 tree masked = gimple_assign_lhs (pattern_stmt);
2771 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2772 pattern_stmt
2773 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2774 RSHIFT_EXPR, masked,
2775 build_int_cst (sizetype, shift_n));
2776 result = gimple_assign_lhs (pattern_stmt);
2779 if (!useless_type_conversion_p (TREE_TYPE (result), ret_type))
2781 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2782 pattern_stmt
2783 = gimple_build_assign (vect_recog_temp_ssa_var (ret_type),
2784 NOP_EXPR, result);
2787 if (!lhs)
2789 if (!vectype)
2790 return NULL;
2792 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2793 vectype = truth_type_for (vectype);
2795 /* FIXME: This part extracts the boolean value out of the bitfield in the
2796 same way as vect_recog_gcond_pattern does. However because
2797 patterns cannot match the same root twice, when we handle and
2798 lower the bitfield in the gcond, vect_recog_gcond_pattern can't
2799 apply anymore. We should really fix it so that we don't need to
2800 duplicate transformations like these. */
2801 tree new_lhs = vect_recog_temp_ssa_var (boolean_type_node, NULL);
2802 gcond *cond_stmt = dyn_cast <gcond *> (stmt_info->stmt);
2803 tree cond_cst = gimple_cond_rhs (cond_stmt);
2804 gimple *new_stmt
2805 = gimple_build_assign (new_lhs, gimple_cond_code (cond_stmt),
2806 gimple_get_lhs (pattern_stmt),
2807 fold_convert (container_type, cond_cst));
2808 append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype, container_type);
2809 pattern_stmt
2810 = gimple_build_cond (NE_EXPR, new_lhs,
2811 build_zero_cst (TREE_TYPE (new_lhs)),
2812 NULL_TREE, NULL_TREE);
2815 *type_out = STMT_VINFO_VECTYPE (stmt_info);
2816 vect_pattern_detected ("bitfield_ref pattern", stmt_info->stmt);
2818 return pattern_stmt;
2821 /* Function vect_recog_bit_insert_pattern
2823 Try to find the following pattern:
2825 written = BIT_INSERT_EXPR (container, value, bitpos);
2827 Input:
2829 * STMT_VINFO: The stmt we want to replace.
2831 Output:
2833 * TYPE_OUT: The vector type of the output of this pattern.
2835 * Return value: A new stmt that will be used to replace the sequence of
2836 stmts that constitute the pattern. In this case it will be:
2837 value = (container_type) value; // Make sure
2838 shifted = value << bitpos; // Shift value into place
2839 masked = shifted & (mask << bitpos); // Mask off the non-relevant bits in
2840 // the 'to-write value'.
2841 cleared = container & ~(mask << bitpos); // Clearing the bits we want to
2842 // write to from the value we want
2843 // to write to.
2844 written = cleared | masked; // Write bits.
2847 where mask = ((1 << TYPE_PRECISION (value)) - 1), a mask to keep the number of
2848 bits corresponding to the real size of the bitfield value we are writing to.
2849 The shifting is always optional depending on whether bitpos != 0.
2853 static gimple *
2854 vect_recog_bit_insert_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
2855 tree *type_out)
2857 gassign *bf_stmt = dyn_cast <gassign *> (stmt_info->stmt);
2858 if (!bf_stmt || gimple_assign_rhs_code (bf_stmt) != BIT_INSERT_EXPR)
2859 return NULL;
2861 tree container = gimple_assign_rhs1 (bf_stmt);
2862 tree value = gimple_assign_rhs2 (bf_stmt);
2863 tree shift = gimple_assign_rhs3 (bf_stmt);
2865 tree bf_type = TREE_TYPE (value);
2866 tree container_type = TREE_TYPE (container);
2868 if (!INTEGRAL_TYPE_P (container_type)
2869 || !tree_fits_uhwi_p (TYPE_SIZE (container_type)))
2870 return NULL;
2872 gimple *pattern_stmt;
2874 vect_unpromoted_value unprom;
2875 unprom.set_op (value, vect_internal_def);
2876 value = vect_convert_input (vinfo, stmt_info, container_type, &unprom,
2877 get_vectype_for_scalar_type (vinfo,
2878 container_type));
2880 unsigned HOST_WIDE_INT mask_width = TYPE_PRECISION (bf_type);
2881 unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
2882 unsigned HOST_WIDE_INT shift_n = tree_to_uhwi (shift);
2883 if (BYTES_BIG_ENDIAN)
2885 shift_n = prec - shift_n - mask_width;
2886 shift = build_int_cst (TREE_TYPE (shift), shift_n);
2889 if (!useless_type_conversion_p (TREE_TYPE (value), container_type))
2891 pattern_stmt =
2892 gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2893 NOP_EXPR, value);
2894 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2895 value = gimple_get_lhs (pattern_stmt);
2898 /* Shift VALUE into place. */
2899 tree shifted = value;
2900 if (shift_n)
2902 gimple_seq stmts = NULL;
2903 shifted
2904 = gimple_build (&stmts, LSHIFT_EXPR, container_type, value, shift);
2905 if (!gimple_seq_empty_p (stmts))
2906 append_pattern_def_seq (vinfo, stmt_info,
2907 gimple_seq_first_stmt (stmts));
2910 tree mask_t
2911 = wide_int_to_tree (container_type,
2912 wi::shifted_mask (shift_n, mask_width, false, prec));
2914 /* Clear bits we don't want to write back from SHIFTED. */
2915 gimple_seq stmts = NULL;
2916 tree masked = gimple_build (&stmts, BIT_AND_EXPR, container_type, shifted,
2917 mask_t);
2918 if (!gimple_seq_empty_p (stmts))
2920 pattern_stmt = gimple_seq_first_stmt (stmts);
2921 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2924 /* Mask off the bits in the container that we are to write to. */
2925 mask_t = wide_int_to_tree (container_type,
2926 wi::shifted_mask (shift_n, mask_width, true, prec));
2927 tree cleared = vect_recog_temp_ssa_var (container_type);
2928 pattern_stmt = gimple_build_assign (cleared, BIT_AND_EXPR, container, mask_t);
2929 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2931 /* Write MASKED into CLEARED. */
2932 pattern_stmt
2933 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2934 BIT_IOR_EXPR, cleared, masked);
2936 *type_out = STMT_VINFO_VECTYPE (stmt_info);
2937 vect_pattern_detected ("bit_insert pattern", stmt_info->stmt);
2939 return pattern_stmt;
2943 /* Recognize cases in which an operation is performed in one type WTYPE
2944 but could be done more efficiently in a narrower type NTYPE. For example,
2945 if we have:
2947 ATYPE a; // narrower than NTYPE
2948 BTYPE b; // narrower than NTYPE
2949 WTYPE aw = (WTYPE) a;
2950 WTYPE bw = (WTYPE) b;
2951 WTYPE res = aw + bw; // only uses of aw and bw
2953 then it would be more efficient to do:
2955 NTYPE an = (NTYPE) a;
2956 NTYPE bn = (NTYPE) b;
2957 NTYPE resn = an + bn;
2958 WTYPE res = (WTYPE) resn;
2960 Other situations include things like:
2962 ATYPE a; // NTYPE or narrower
2963 WTYPE aw = (WTYPE) a;
2964 WTYPE res = aw + b;
2966 when only "(NTYPE) res" is significant. In that case it's more efficient
2967 to truncate "b" and do the operation on NTYPE instead:
2969 NTYPE an = (NTYPE) a;
2970 NTYPE bn = (NTYPE) b; // truncation
2971 NTYPE resn = an + bn;
2972 WTYPE res = (WTYPE) resn;
2974 All users of "res" should then use "resn" instead, making the final
2975 statement dead (not marked as relevant). The final statement is still
2976 needed to maintain the type correctness of the IR.
2978 vect_determine_precisions has already determined the minimum
2979 precison of the operation and the minimum precision required
2980 by users of the result. */
2982 static gimple *
2983 vect_recog_over_widening_pattern (vec_info *vinfo,
2984 stmt_vec_info last_stmt_info, tree *type_out)
2986 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2987 if (!last_stmt)
2988 return NULL;
2990 /* See whether we have found that this operation can be done on a
2991 narrower type without changing its semantics. */
2992 unsigned int new_precision = last_stmt_info->operation_precision;
2993 if (!new_precision)
2994 return NULL;
2996 tree lhs = gimple_assign_lhs (last_stmt);
2997 tree type = TREE_TYPE (lhs);
2998 tree_code code = gimple_assign_rhs_code (last_stmt);
3000 /* Punt for reductions where we don't handle the type conversions. */
3001 if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
3002 return NULL;
3004 /* Keep the first operand of a COND_EXPR as-is: only the other two
3005 operands are interesting. */
3006 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
3008 /* Check the operands. */
3009 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
3010 auto_vec <vect_unpromoted_value, 3> unprom (nops);
3011 unprom.quick_grow_cleared (nops);
3012 unsigned int min_precision = 0;
3013 bool single_use_p = false;
3014 for (unsigned int i = 0; i < nops; ++i)
3016 tree op = gimple_op (last_stmt, first_op + i);
3017 if (TREE_CODE (op) == INTEGER_CST)
3018 unprom[i].set_op (op, vect_constant_def);
3019 else if (TREE_CODE (op) == SSA_NAME)
3021 bool op_single_use_p = true;
3022 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
3023 &op_single_use_p))
3024 return NULL;
3025 /* If:
3027 (1) N bits of the result are needed;
3028 (2) all inputs are widened from M<N bits; and
3029 (3) one operand OP is a single-use SSA name
3031 we can shift the M->N widening from OP to the output
3032 without changing the number or type of extensions involved.
3033 This then reduces the number of copies of STMT_INFO.
3035 If instead of (3) more than one operand is a single-use SSA name,
3036 shifting the extension to the output is even more of a win.
3038 If instead:
3040 (1) N bits of the result are needed;
3041 (2) one operand OP2 is widened from M2<N bits;
3042 (3) another operand OP1 is widened from M1<M2 bits; and
3043 (4) both OP1 and OP2 are single-use
3045 the choice is between:
3047 (a) truncating OP2 to M1, doing the operation on M1,
3048 and then widening the result to N
3050 (b) widening OP1 to M2, doing the operation on M2, and then
3051 widening the result to N
3053 Both shift the M2->N widening of the inputs to the output.
3054 (a) additionally shifts the M1->M2 widening to the output;
3055 it requires fewer copies of STMT_INFO but requires an extra
3056 M2->M1 truncation.
3058 Which is better will depend on the complexity and cost of
3059 STMT_INFO, which is hard to predict at this stage. However,
3060 a clear tie-breaker in favor of (b) is the fact that the
3061 truncation in (a) increases the length of the operation chain.
3063 If instead of (4) only one of OP1 or OP2 is single-use,
3064 (b) is still a win over doing the operation in N bits:
3065 it still shifts the M2->N widening on the single-use operand
3066 to the output and reduces the number of STMT_INFO copies.
3068 If neither operand is single-use then operating on fewer than
3069 N bits might lead to more extensions overall. Whether it does
3070 or not depends on global information about the vectorization
3071 region, and whether that's a good trade-off would again
3072 depend on the complexity and cost of the statements involved,
3073 as well as things like register pressure that are not normally
3074 modelled at this stage. We therefore ignore these cases
3075 and just optimize the clear single-use wins above.
3077 Thus we take the maximum precision of the unpromoted operands
3078 and record whether any operand is single-use. */
3079 if (unprom[i].dt == vect_internal_def)
3081 min_precision = MAX (min_precision,
3082 TYPE_PRECISION (unprom[i].type));
3083 single_use_p |= op_single_use_p;
3086 else
3087 return NULL;
3090 /* Although the operation could be done in operation_precision, we have
3091 to balance that against introducing extra truncations or extensions.
3092 Calculate the minimum precision that can be handled efficiently.
3094 The loop above determined that the operation could be handled
3095 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
3096 extension from the inputs to the output without introducing more
3097 instructions, and would reduce the number of instructions required
3098 for STMT_INFO itself.
3100 vect_determine_precisions has also determined that the result only
3101 needs min_output_precision bits. Truncating by a factor of N times
3102 requires a tree of N - 1 instructions, so if TYPE is N times wider
3103 than min_output_precision, doing the operation in TYPE and truncating
3104 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
3105 In contrast:
3107 - truncating the input to a unary operation and doing the operation
3108 in the new type requires at most N - 1 + 1 = N instructions per
3109 output vector
3111 - doing the same for a binary operation requires at most
3112 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
3114 Both unary and binary operations require fewer instructions than
3115 this if the operands were extended from a suitable truncated form.
3116 Thus there is usually nothing to lose by doing operations in
3117 min_output_precision bits, but there can be something to gain. */
3118 if (!single_use_p)
3119 min_precision = last_stmt_info->min_output_precision;
3120 else
3121 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
3123 /* Apply the minimum efficient precision we just calculated. */
3124 if (new_precision < min_precision)
3125 new_precision = min_precision;
3126 new_precision = vect_element_precision (new_precision);
3127 if (new_precision >= TYPE_PRECISION (type))
3128 return NULL;
3130 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
3132 *type_out = get_vectype_for_scalar_type (vinfo, type);
3133 if (!*type_out)
3134 return NULL;
3136 /* We've found a viable pattern. Get the new type of the operation. */
3137 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
3138 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
3140 /* If we're truncating an operation, we need to make sure that we
3141 don't introduce new undefined overflow. The codes tested here are
3142 a subset of those accepted by vect_truncatable_operation_p. */
3143 tree op_type = new_type;
3144 if (TYPE_OVERFLOW_UNDEFINED (new_type)
3145 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
3146 op_type = build_nonstandard_integer_type (new_precision, true);
3148 /* We specifically don't check here whether the target supports the
3149 new operation, since it might be something that a later pattern
3150 wants to rewrite anyway. If targets have a minimum element size
3151 for some optabs, we should pattern-match smaller ops to larger ops
3152 where beneficial. */
3153 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3154 tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
3155 if (!new_vectype || !op_vectype)
3156 return NULL;
3158 if (dump_enabled_p ())
3159 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
3160 type, new_type);
3162 /* Calculate the rhs operands for an operation on OP_TYPE. */
3163 tree ops[3] = {};
3164 for (unsigned int i = 1; i < first_op; ++i)
3165 ops[i - 1] = gimple_op (last_stmt, i);
3166 vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
3167 op_type, &unprom[0], op_vectype);
3169 /* Use the operation to produce a result of type OP_TYPE. */
3170 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
3171 gimple *pattern_stmt = gimple_build_assign (new_var, code,
3172 ops[0], ops[1], ops[2]);
3173 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3175 if (dump_enabled_p ())
3176 dump_printf_loc (MSG_NOTE, vect_location,
3177 "created pattern stmt: %G", pattern_stmt);
3179 /* Convert back to the original signedness, if OP_TYPE is different
3180 from NEW_TYPE. */
3181 if (op_type != new_type)
3182 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
3183 pattern_stmt, op_vectype);
3185 /* Promote the result to the original type. */
3186 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
3187 pattern_stmt, new_vectype);
3189 return pattern_stmt;
3192 /* Recognize the following patterns:
3194 ATYPE a; // narrower than TYPE
3195 BTYPE b; // narrower than TYPE
3197 1) Multiply high with scaling
3198 TYPE res = ((TYPE) a * (TYPE) b) >> c;
3199 Here, c is bitsize (TYPE) / 2 - 1.
3201 2) ... or also with rounding
3202 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
3203 Here, d is bitsize (TYPE) / 2 - 2.
3205 3) Normal multiply high
3206 TYPE res = ((TYPE) a * (TYPE) b) >> e;
3207 Here, e is bitsize (TYPE) / 2.
3209 where only the bottom half of res is used. */
3211 static gimple *
3212 vect_recog_mulhs_pattern (vec_info *vinfo,
3213 stmt_vec_info last_stmt_info, tree *type_out)
3215 /* Check for a right shift. */
3216 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3217 if (!last_stmt
3218 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
3219 return NULL;
3221 /* Check that the shift result is wider than the users of the
3222 result need (i.e. that narrowing would be a natural choice). */
3223 tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
3224 unsigned int target_precision
3225 = vect_element_precision (last_stmt_info->min_output_precision);
3226 if (!INTEGRAL_TYPE_P (lhs_type)
3227 || target_precision >= TYPE_PRECISION (lhs_type))
3228 return NULL;
3230 /* Look through any change in sign on the outer shift input. */
3231 vect_unpromoted_value unprom_rshift_input;
3232 tree rshift_input = vect_look_through_possible_promotion
3233 (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
3234 if (!rshift_input
3235 || TYPE_PRECISION (TREE_TYPE (rshift_input))
3236 != TYPE_PRECISION (lhs_type))
3237 return NULL;
3239 /* Get the definition of the shift input. */
3240 stmt_vec_info rshift_input_stmt_info
3241 = vect_get_internal_def (vinfo, rshift_input);
3242 if (!rshift_input_stmt_info)
3243 return NULL;
3244 gassign *rshift_input_stmt
3245 = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
3246 if (!rshift_input_stmt)
3247 return NULL;
3249 stmt_vec_info mulh_stmt_info;
3250 tree scale_term;
3251 bool rounding_p = false;
3253 /* Check for the presence of the rounding term. */
3254 if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
3256 /* Check that the outer shift was by 1. */
3257 if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
3258 return NULL;
3260 /* Check that the second operand of the PLUS_EXPR is 1. */
3261 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
3262 return NULL;
3264 /* Look through any change in sign on the addition input. */
3265 vect_unpromoted_value unprom_plus_input;
3266 tree plus_input = vect_look_through_possible_promotion
3267 (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
3268 if (!plus_input
3269 || TYPE_PRECISION (TREE_TYPE (plus_input))
3270 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
3271 return NULL;
3273 /* Get the definition of the multiply-high-scale part. */
3274 stmt_vec_info plus_input_stmt_info
3275 = vect_get_internal_def (vinfo, plus_input);
3276 if (!plus_input_stmt_info)
3277 return NULL;
3278 gassign *plus_input_stmt
3279 = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
3280 if (!plus_input_stmt
3281 || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
3282 return NULL;
3284 /* Look through any change in sign on the scaling input. */
3285 vect_unpromoted_value unprom_scale_input;
3286 tree scale_input = vect_look_through_possible_promotion
3287 (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
3288 if (!scale_input
3289 || TYPE_PRECISION (TREE_TYPE (scale_input))
3290 != TYPE_PRECISION (TREE_TYPE (plus_input)))
3291 return NULL;
3293 /* Get the definition of the multiply-high part. */
3294 mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
3295 if (!mulh_stmt_info)
3296 return NULL;
3298 /* Get the scaling term. */
3299 scale_term = gimple_assign_rhs2 (plus_input_stmt);
3300 rounding_p = true;
3302 else
3304 mulh_stmt_info = rshift_input_stmt_info;
3305 scale_term = gimple_assign_rhs2 (last_stmt);
3308 /* Check that the scaling factor is constant. */
3309 if (TREE_CODE (scale_term) != INTEGER_CST)
3310 return NULL;
3312 /* Check whether the scaling input term can be seen as two widened
3313 inputs multiplied together. */
3314 vect_unpromoted_value unprom_mult[2];
3315 tree new_type;
3316 unsigned int nops
3317 = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
3318 false, 2, unprom_mult, &new_type);
3319 if (nops != 2)
3320 return NULL;
3322 /* Adjust output precision. */
3323 if (TYPE_PRECISION (new_type) < target_precision)
3324 new_type = build_nonstandard_integer_type
3325 (target_precision, TYPE_UNSIGNED (new_type));
3327 unsigned mult_precision = TYPE_PRECISION (new_type);
3328 internal_fn ifn;
3329 /* Check that the scaling factor is expected. Instead of
3330 target_precision, we should use the one that we actually
3331 use for internal function. */
3332 if (rounding_p)
3334 /* Check pattern 2). */
3335 if (wi::to_widest (scale_term) + mult_precision + 2
3336 != TYPE_PRECISION (lhs_type))
3337 return NULL;
3339 ifn = IFN_MULHRS;
3341 else
3343 /* Check for pattern 1). */
3344 if (wi::to_widest (scale_term) + mult_precision + 1
3345 == TYPE_PRECISION (lhs_type))
3346 ifn = IFN_MULHS;
3347 /* Check for pattern 3). */
3348 else if (wi::to_widest (scale_term) + mult_precision
3349 == TYPE_PRECISION (lhs_type))
3350 ifn = IFN_MULH;
3351 else
3352 return NULL;
3355 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
3357 /* Check for target support. */
3358 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3359 if (!new_vectype
3360 || !direct_internal_fn_supported_p
3361 (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
3362 return NULL;
3364 /* The IR requires a valid vector type for the cast result, even though
3365 it's likely to be discarded. */
3366 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
3367 if (!*type_out)
3368 return NULL;
3370 /* Generate the IFN_MULHRS call. */
3371 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
3372 tree new_ops[2];
3373 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
3374 unprom_mult, new_vectype);
3375 gcall *mulhrs_stmt
3376 = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
3377 gimple_call_set_lhs (mulhrs_stmt, new_var);
3378 gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
3380 if (dump_enabled_p ())
3381 dump_printf_loc (MSG_NOTE, vect_location,
3382 "created pattern stmt: %G", (gimple *) mulhrs_stmt);
3384 return vect_convert_output (vinfo, last_stmt_info, lhs_type,
3385 mulhrs_stmt, new_vectype);
3388 /* Recognize the patterns:
3390 ATYPE a; // narrower than TYPE
3391 BTYPE b; // narrower than TYPE
3392 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
3393 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
3395 where only the bottom half of avg is used. Try to transform them into:
3397 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
3398 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
3400 followed by:
3402 TYPE avg = (TYPE) avg';
3404 where NTYPE is no wider than half of TYPE. Since only the bottom half
3405 of avg is used, all or part of the cast of avg' should become redundant.
3407 If there is no target support available, generate code to distribute rshift
3408 over plus and add a carry. */
3410 static gimple *
3411 vect_recog_average_pattern (vec_info *vinfo,
3412 stmt_vec_info last_stmt_info, tree *type_out)
3414 /* Check for a shift right by one bit. */
3415 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3416 if (!last_stmt
3417 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
3418 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
3419 return NULL;
3421 /* Check that the shift result is wider than the users of the
3422 result need (i.e. that narrowing would be a natural choice). */
3423 tree lhs = gimple_assign_lhs (last_stmt);
3424 tree type = TREE_TYPE (lhs);
3425 unsigned int target_precision
3426 = vect_element_precision (last_stmt_info->min_output_precision);
3427 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
3428 return NULL;
3430 /* Look through any change in sign on the shift input. */
3431 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
3432 vect_unpromoted_value unprom_plus;
3433 rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
3434 &unprom_plus);
3435 if (!rshift_rhs
3436 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
3437 return NULL;
3439 /* Get the definition of the shift input. */
3440 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
3441 if (!plus_stmt_info)
3442 return NULL;
3444 /* Check whether the shift input can be seen as a tree of additions on
3445 2 or 3 widened inputs.
3447 Note that the pattern should be a win even if the result of one or
3448 more additions is reused elsewhere: if the pattern matches, we'd be
3449 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
3450 internal_fn ifn = IFN_AVG_FLOOR;
3451 vect_unpromoted_value unprom[3];
3452 tree new_type;
3453 unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
3454 IFN_VEC_WIDEN_PLUS, false, 3,
3455 unprom, &new_type);
3456 if (nops == 0)
3457 return NULL;
3458 if (nops == 3)
3460 /* Check that one operand is 1. */
3461 unsigned int i;
3462 for (i = 0; i < 3; ++i)
3463 if (integer_onep (unprom[i].op))
3464 break;
3465 if (i == 3)
3466 return NULL;
3467 /* Throw away the 1 operand and keep the other two. */
3468 if (i < 2)
3469 unprom[i] = unprom[2];
3470 ifn = IFN_AVG_CEIL;
3473 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
3475 /* We know that:
3477 (a) the operation can be viewed as:
3479 TYPE widened0 = (TYPE) UNPROM[0];
3480 TYPE widened1 = (TYPE) UNPROM[1];
3481 TYPE tmp1 = widened0 + widened1 {+ 1};
3482 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
3484 (b) the first two statements are equivalent to:
3486 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
3487 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
3489 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
3490 where sensible;
3492 (d) all the operations can be performed correctly at twice the width of
3493 NEW_TYPE, due to the nature of the average operation; and
3495 (e) users of the result of the right shift need only TARGET_PRECISION
3496 bits, where TARGET_PRECISION is no more than half of TYPE's
3497 precision.
3499 Under these circumstances, the only situation in which NEW_TYPE
3500 could be narrower than TARGET_PRECISION is if widened0, widened1
3501 and an addition result are all used more than once. Thus we can
3502 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
3503 as "free", whereas widening the result of the average instruction
3504 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
3505 therefore better not to go narrower than TARGET_PRECISION. */
3506 if (TYPE_PRECISION (new_type) < target_precision)
3507 new_type = build_nonstandard_integer_type (target_precision,
3508 TYPE_UNSIGNED (new_type));
3510 /* Check for target support. */
3511 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3512 if (!new_vectype)
3513 return NULL;
3515 bool fallback_p = false;
3517 if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
3519 else if (TYPE_UNSIGNED (new_type)
3520 && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
3521 && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
3522 && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
3523 && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
3524 fallback_p = true;
3525 else
3526 return NULL;
3528 /* The IR requires a valid vector type for the cast result, even though
3529 it's likely to be discarded. */
3530 *type_out = get_vectype_for_scalar_type (vinfo, type);
3531 if (!*type_out)
3532 return NULL;
3534 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
3535 tree new_ops[2];
3536 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
3537 unprom, new_vectype);
3539 if (fallback_p)
3541 /* As a fallback, generate code for following sequence:
3543 shifted_op0 = new_ops[0] >> 1;
3544 shifted_op1 = new_ops[1] >> 1;
3545 sum_of_shifted = shifted_op0 + shifted_op1;
3546 unmasked_carry = new_ops[0] and/or new_ops[1];
3547 carry = unmasked_carry & 1;
3548 new_var = sum_of_shifted + carry;
3551 tree one_cst = build_one_cst (new_type);
3552 gassign *g;
3554 tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
3555 g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
3556 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3558 tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
3559 g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
3560 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3562 tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
3563 g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
3564 shifted_op0, shifted_op1);
3565 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3567 tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
3568 tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
3569 g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
3570 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3572 tree carry = vect_recog_temp_ssa_var (new_type, NULL);
3573 g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
3574 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3576 g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
3577 return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
3580 /* Generate the IFN_AVG* call. */
3581 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
3582 new_ops[1]);
3583 gimple_call_set_lhs (average_stmt, new_var);
3584 gimple_set_location (average_stmt, gimple_location (last_stmt));
3586 if (dump_enabled_p ())
3587 dump_printf_loc (MSG_NOTE, vect_location,
3588 "created pattern stmt: %G", (gimple *) average_stmt);
3590 return vect_convert_output (vinfo, last_stmt_info,
3591 type, average_stmt, new_vectype);
3594 /* Recognize cases in which the input to a cast is wider than its
3595 output, and the input is fed by a widening operation. Fold this
3596 by removing the unnecessary intermediate widening. E.g.:
3598 unsigned char a;
3599 unsigned int b = (unsigned int) a;
3600 unsigned short c = (unsigned short) b;
3604 unsigned short c = (unsigned short) a;
3606 Although this is rare in input IR, it is an expected side-effect
3607 of the over-widening pattern above.
3609 This is beneficial also for integer-to-float conversions, if the
3610 widened integer has more bits than the float, and if the unwidened
3611 input doesn't. */
3613 static gimple *
3614 vect_recog_cast_forwprop_pattern (vec_info *vinfo,
3615 stmt_vec_info last_stmt_info, tree *type_out)
3617 /* Check for a cast, including an integer-to-float conversion. */
3618 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3619 if (!last_stmt)
3620 return NULL;
3621 tree_code code = gimple_assign_rhs_code (last_stmt);
3622 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
3623 return NULL;
3625 /* Make sure that the rhs is a scalar with a natural bitsize. */
3626 tree lhs = gimple_assign_lhs (last_stmt);
3627 if (!lhs)
3628 return NULL;
3629 tree lhs_type = TREE_TYPE (lhs);
3630 scalar_mode lhs_mode;
3631 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
3632 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
3633 return NULL;
3635 /* Check for a narrowing operation (from a vector point of view). */
3636 tree rhs = gimple_assign_rhs1 (last_stmt);
3637 tree rhs_type = TREE_TYPE (rhs);
3638 if (!INTEGRAL_TYPE_P (rhs_type)
3639 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
3640 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
3641 return NULL;
3643 /* Try to find an unpromoted input. */
3644 vect_unpromoted_value unprom;
3645 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
3646 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
3647 return NULL;
3649 /* If the bits above RHS_TYPE matter, make sure that they're the
3650 same when extending from UNPROM as they are when extending from RHS. */
3651 if (!INTEGRAL_TYPE_P (lhs_type)
3652 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
3653 return NULL;
3655 /* We can get the same result by casting UNPROM directly, to avoid
3656 the unnecessary widening and narrowing. */
3657 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
3659 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
3660 if (!*type_out)
3661 return NULL;
3663 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
3664 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
3665 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3667 return pattern_stmt;
3670 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
3671 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
3673 static gimple *
3674 vect_recog_widen_shift_pattern (vec_info *vinfo,
3675 stmt_vec_info last_stmt_info, tree *type_out)
3677 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
3678 LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
3679 "vect_recog_widen_shift_pattern");
3682 /* Detect a rotate pattern wouldn't be otherwise vectorized:
3684 type a_t, b_t, c_t;
3686 S0 a_t = b_t r<< c_t;
3688 Input/Output:
3690 * STMT_VINFO: The stmt from which the pattern search begins,
3691 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
3692 with a sequence:
3694 S1 d_t = -c_t;
3695 S2 e_t = d_t & (B - 1);
3696 S3 f_t = b_t << c_t;
3697 S4 g_t = b_t >> e_t;
3698 S0 a_t = f_t | g_t;
3700 where B is element bitsize of type.
3702 Output:
3704 * TYPE_OUT: The type of the output of this pattern.
3706 * Return value: A new stmt that will be used to replace the rotate
3707 S0 stmt. */
3709 static gimple *
3710 vect_recog_rotate_pattern (vec_info *vinfo,
3711 stmt_vec_info stmt_vinfo, tree *type_out)
3713 gimple *last_stmt = stmt_vinfo->stmt;
3714 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
3715 gimple *pattern_stmt, *def_stmt;
3716 enum tree_code rhs_code;
3717 enum vect_def_type dt;
3718 optab optab1, optab2;
3719 edge ext_def = NULL;
3720 bool bswap16_p = false;
3722 if (is_gimple_assign (last_stmt))
3724 rhs_code = gimple_assign_rhs_code (last_stmt);
3725 switch (rhs_code)
3727 case LROTATE_EXPR:
3728 case RROTATE_EXPR:
3729 break;
3730 default:
3731 return NULL;
3734 lhs = gimple_assign_lhs (last_stmt);
3735 oprnd0 = gimple_assign_rhs1 (last_stmt);
3736 type = TREE_TYPE (oprnd0);
3737 oprnd1 = gimple_assign_rhs2 (last_stmt);
3739 else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
3741 /* __builtin_bswap16 (x) is another form of x r>> 8.
3742 The vectorizer has bswap support, but only if the argument isn't
3743 promoted. */
3744 lhs = gimple_call_lhs (last_stmt);
3745 oprnd0 = gimple_call_arg (last_stmt, 0);
3746 type = TREE_TYPE (oprnd0);
3747 if (!lhs
3748 || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
3749 || TYPE_PRECISION (type) <= 16
3750 || TREE_CODE (oprnd0) != SSA_NAME
3751 || BITS_PER_UNIT != 8)
3752 return NULL;
3754 stmt_vec_info def_stmt_info;
3755 if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
3756 return NULL;
3758 if (dt != vect_internal_def)
3759 return NULL;
3761 if (gimple_assign_cast_p (def_stmt))
3763 def = gimple_assign_rhs1 (def_stmt);
3764 if (INTEGRAL_TYPE_P (TREE_TYPE (def))
3765 && TYPE_PRECISION (TREE_TYPE (def)) == 16)
3766 oprnd0 = def;
3769 type = TREE_TYPE (lhs);
3770 vectype = get_vectype_for_scalar_type (vinfo, type);
3771 if (vectype == NULL_TREE)
3772 return NULL;
3774 if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
3776 /* The encoding uses one stepped pattern for each byte in the
3777 16-bit word. */
3778 vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
3779 for (unsigned i = 0; i < 3; ++i)
3780 for (unsigned j = 0; j < 2; ++j)
3781 elts.quick_push ((i + 1) * 2 - j - 1);
3783 vec_perm_indices indices (elts, 1,
3784 TYPE_VECTOR_SUBPARTS (char_vectype));
3785 machine_mode vmode = TYPE_MODE (char_vectype);
3786 if (can_vec_perm_const_p (vmode, vmode, indices))
3788 /* vectorizable_bswap can handle the __builtin_bswap16 if we
3789 undo the argument promotion. */
3790 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
3792 def = vect_recog_temp_ssa_var (type, NULL);
3793 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3794 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3795 oprnd0 = def;
3798 /* Pattern detected. */
3799 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3801 *type_out = vectype;
3803 /* Pattern supported. Create a stmt to be used to replace the
3804 pattern, with the unpromoted argument. */
3805 var = vect_recog_temp_ssa_var (type, NULL);
3806 pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
3807 1, oprnd0);
3808 gimple_call_set_lhs (pattern_stmt, var);
3809 gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
3810 gimple_call_fntype (last_stmt));
3811 return pattern_stmt;
3815 oprnd1 = build_int_cst (integer_type_node, 8);
3816 rhs_code = LROTATE_EXPR;
3817 bswap16_p = true;
3819 else
3820 return NULL;
3822 if (TREE_CODE (oprnd0) != SSA_NAME
3823 || !INTEGRAL_TYPE_P (type)
3824 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type))
3825 return NULL;
3827 stmt_vec_info def_stmt_info;
3828 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
3829 return NULL;
3831 if (dt != vect_internal_def
3832 && dt != vect_constant_def
3833 && dt != vect_external_def)
3834 return NULL;
3836 vectype = get_vectype_for_scalar_type (vinfo, type);
3837 if (vectype == NULL_TREE)
3838 return NULL;
3840 /* If vector/vector or vector/scalar rotate is supported by the target,
3841 don't do anything here. */
3842 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
3843 if (optab1
3844 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3846 use_rotate:
3847 if (bswap16_p)
3849 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
3851 def = vect_recog_temp_ssa_var (type, NULL);
3852 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3853 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3854 oprnd0 = def;
3857 /* Pattern detected. */
3858 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3860 *type_out = vectype;
3862 /* Pattern supported. Create a stmt to be used to replace the
3863 pattern. */
3864 var = vect_recog_temp_ssa_var (type, NULL);
3865 pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
3866 oprnd1);
3867 return pattern_stmt;
3869 return NULL;
3872 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
3874 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
3875 if (optab2
3876 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3877 goto use_rotate;
3880 tree utype = unsigned_type_for (type);
3881 tree uvectype = get_vectype_for_scalar_type (vinfo, utype);
3882 if (!uvectype)
3883 return NULL;
3885 /* If vector/vector or vector/scalar shifts aren't supported by the target,
3886 don't do anything here either. */
3887 optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_vector);
3888 optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_vector);
3889 if (!optab1
3890 || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
3891 || !optab2
3892 || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
3894 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
3895 return NULL;
3896 optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_scalar);
3897 optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_scalar);
3898 if (!optab1
3899 || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
3900 || !optab2
3901 || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
3902 return NULL;
3905 *type_out = vectype;
3907 if (!useless_type_conversion_p (utype, TREE_TYPE (oprnd0)))
3909 def = vect_recog_temp_ssa_var (utype, NULL);
3910 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3911 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3912 oprnd0 = def;
3915 if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
3916 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
3918 def = NULL_TREE;
3919 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (utype);
3920 if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
3921 def = oprnd1;
3922 else if (def_stmt && gimple_assign_cast_p (def_stmt))
3924 tree rhs1 = gimple_assign_rhs1 (def_stmt);
3925 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
3926 && TYPE_PRECISION (TREE_TYPE (rhs1))
3927 == TYPE_PRECISION (type))
3928 def = rhs1;
3931 if (def == NULL_TREE)
3933 def = vect_recog_temp_ssa_var (utype, NULL);
3934 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
3935 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3937 stype = TREE_TYPE (def);
3939 if (TREE_CODE (def) == INTEGER_CST)
3941 if (!tree_fits_uhwi_p (def)
3942 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
3943 || integer_zerop (def))
3944 return NULL;
3945 def2 = build_int_cst (stype,
3946 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
3948 else
3950 tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
3952 if (vecstype == NULL_TREE)
3953 return NULL;
3954 def2 = vect_recog_temp_ssa_var (stype, NULL);
3955 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
3956 if (ext_def)
3958 basic_block new_bb
3959 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
3960 gcc_assert (!new_bb);
3962 else
3963 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
3965 def2 = vect_recog_temp_ssa_var (stype, NULL);
3966 tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
3967 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
3968 gimple_assign_lhs (def_stmt), mask);
3969 if (ext_def)
3971 basic_block new_bb
3972 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
3973 gcc_assert (!new_bb);
3975 else
3976 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
3979 var1 = vect_recog_temp_ssa_var (utype, NULL);
3980 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
3981 ? LSHIFT_EXPR : RSHIFT_EXPR,
3982 oprnd0, def);
3983 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3985 var2 = vect_recog_temp_ssa_var (utype, NULL);
3986 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
3987 ? RSHIFT_EXPR : LSHIFT_EXPR,
3988 oprnd0, def2);
3989 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3991 /* Pattern detected. */
3992 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3994 /* Pattern supported. Create a stmt to be used to replace the pattern. */
3995 var = vect_recog_temp_ssa_var (utype, NULL);
3996 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
3998 if (!useless_type_conversion_p (type, utype))
4000 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, uvectype);
4001 tree result = vect_recog_temp_ssa_var (type, NULL);
4002 pattern_stmt = gimple_build_assign (result, NOP_EXPR, var);
4004 return pattern_stmt;
4007 /* Detect a vector by vector shift pattern that wouldn't be otherwise
4008 vectorized:
4010 type a_t;
4011 TYPE b_T, res_T;
4013 S1 a_t = ;
4014 S2 b_T = ;
4015 S3 res_T = b_T op a_t;
4017 where type 'TYPE' is a type with different size than 'type',
4018 and op is <<, >> or rotate.
4020 Also detect cases:
4022 type a_t;
4023 TYPE b_T, c_T, res_T;
4025 S0 c_T = ;
4026 S1 a_t = (type) c_T;
4027 S2 b_T = ;
4028 S3 res_T = b_T op a_t;
4030 Input/Output:
4032 * STMT_VINFO: The stmt from which the pattern search begins,
4033 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
4034 with a shift/rotate which has same type on both operands, in the
4035 second case just b_T op c_T, in the first case with added cast
4036 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
4038 Output:
4040 * TYPE_OUT: The type of the output of this pattern.
4042 * Return value: A new stmt that will be used to replace the shift/rotate
4043 S3 stmt. */
4045 static gimple *
4046 vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
4047 stmt_vec_info stmt_vinfo,
4048 tree *type_out)
4050 gimple *last_stmt = stmt_vinfo->stmt;
4051 tree oprnd0, oprnd1, lhs, var;
4052 gimple *pattern_stmt;
4053 enum tree_code rhs_code;
4055 if (!is_gimple_assign (last_stmt))
4056 return NULL;
4058 rhs_code = gimple_assign_rhs_code (last_stmt);
4059 switch (rhs_code)
4061 case LSHIFT_EXPR:
4062 case RSHIFT_EXPR:
4063 case LROTATE_EXPR:
4064 case RROTATE_EXPR:
4065 break;
4066 default:
4067 return NULL;
4070 lhs = gimple_assign_lhs (last_stmt);
4071 oprnd0 = gimple_assign_rhs1 (last_stmt);
4072 oprnd1 = gimple_assign_rhs2 (last_stmt);
4073 if (TREE_CODE (oprnd0) != SSA_NAME
4074 || TREE_CODE (oprnd1) != SSA_NAME
4075 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
4076 || !INTEGRAL_TYPE_P (TREE_TYPE (oprnd0))
4077 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
4078 || TYPE_PRECISION (TREE_TYPE (lhs))
4079 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
4080 return NULL;
4082 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
4083 if (!def_vinfo)
4084 return NULL;
4086 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
4087 if (*type_out == NULL_TREE)
4088 return NULL;
4090 tree def = NULL_TREE;
4091 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
4092 if (def_stmt && gimple_assign_cast_p (def_stmt))
4094 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4095 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
4096 && TYPE_PRECISION (TREE_TYPE (rhs1))
4097 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
4099 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
4100 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
4101 def = rhs1;
4102 else
4104 tree mask
4105 = build_low_bits_mask (TREE_TYPE (rhs1),
4106 TYPE_PRECISION (TREE_TYPE (oprnd1)));
4107 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4108 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
4109 tree vecstype = get_vectype_for_scalar_type (vinfo,
4110 TREE_TYPE (rhs1));
4111 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
4116 if (def == NULL_TREE)
4118 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
4119 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
4120 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4123 /* Pattern detected. */
4124 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
4126 /* Pattern supported. Create a stmt to be used to replace the pattern. */
4127 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
4128 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
4130 return pattern_stmt;
4133 /* Return true iff the target has a vector optab implementing the operation
4134 CODE on type VECTYPE. */
4136 static bool
4137 target_has_vecop_for_code (tree_code code, tree vectype)
4139 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
4140 return voptab
4141 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
4144 /* Verify that the target has optabs of VECTYPE to perform all the steps
4145 needed by the multiplication-by-immediate synthesis algorithm described by
4146 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
4147 present. Return true iff the target supports all the steps. */
4149 static bool
4150 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
4151 tree vectype, bool synth_shift_p)
4153 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
4154 return false;
4156 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
4157 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
4159 if (var == negate_variant
4160 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
4161 return false;
4163 /* If we must synthesize shifts with additions make sure that vector
4164 addition is available. */
4165 if ((var == add_variant || synth_shift_p) && !supports_vplus)
4166 return false;
4168 for (int i = 1; i < alg->ops; i++)
4170 switch (alg->op[i])
4172 case alg_shift:
4173 break;
4174 case alg_add_t_m2:
4175 case alg_add_t2_m:
4176 case alg_add_factor:
4177 if (!supports_vplus)
4178 return false;
4179 break;
4180 case alg_sub_t_m2:
4181 case alg_sub_t2_m:
4182 case alg_sub_factor:
4183 if (!supports_vminus)
4184 return false;
4185 break;
4186 case alg_unknown:
4187 case alg_m:
4188 case alg_zero:
4189 case alg_impossible:
4190 return false;
4191 default:
4192 gcc_unreachable ();
4196 return true;
4199 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
4200 putting the final result in DEST. Append all statements but the last into
4201 VINFO. Return the last statement. */
4203 static gimple *
4204 synth_lshift_by_additions (vec_info *vinfo,
4205 tree dest, tree op, HOST_WIDE_INT amnt,
4206 stmt_vec_info stmt_info)
4208 HOST_WIDE_INT i;
4209 tree itype = TREE_TYPE (op);
4210 tree prev_res = op;
4211 gcc_assert (amnt >= 0);
4212 for (i = 0; i < amnt; i++)
4214 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
4215 : dest;
4216 gimple *stmt
4217 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
4218 prev_res = tmp_var;
4219 if (i < amnt - 1)
4220 append_pattern_def_seq (vinfo, stmt_info, stmt);
4221 else
4222 return stmt;
4224 gcc_unreachable ();
4225 return NULL;
4228 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
4229 CODE to operands OP1 and OP2, creating a new temporary SSA var in
4230 the process if necessary. Append the resulting assignment statements
4231 to the sequence in STMT_VINFO. Return the SSA variable that holds the
4232 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
4233 left shifts using additions. */
4235 static tree
4236 apply_binop_and_append_stmt (vec_info *vinfo,
4237 tree_code code, tree op1, tree op2,
4238 stmt_vec_info stmt_vinfo, bool synth_shift_p)
4240 if (integer_zerop (op2)
4241 && (code == LSHIFT_EXPR
4242 || code == PLUS_EXPR))
4244 gcc_assert (TREE_CODE (op1) == SSA_NAME);
4245 return op1;
4248 gimple *stmt;
4249 tree itype = TREE_TYPE (op1);
4250 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
4252 if (code == LSHIFT_EXPR
4253 && synth_shift_p)
4255 stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
4256 TREE_INT_CST_LOW (op2), stmt_vinfo);
4257 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4258 return tmp_var;
4261 stmt = gimple_build_assign (tmp_var, code, op1, op2);
4262 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4263 return tmp_var;
4266 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
4267 and simple arithmetic operations to be vectorized. Record the statements
4268 produced in STMT_VINFO and return the last statement in the sequence or
4269 NULL if it's not possible to synthesize such a multiplication.
4270 This function mirrors the behavior of expand_mult_const in expmed.cc but
4271 works on tree-ssa form. */
4273 static gimple *
4274 vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
4275 stmt_vec_info stmt_vinfo)
4277 tree itype = TREE_TYPE (op);
4278 machine_mode mode = TYPE_MODE (itype);
4279 struct algorithm alg;
4280 mult_variant variant;
4281 if (!tree_fits_shwi_p (val))
4282 return NULL;
4284 /* Multiplication synthesis by shifts, adds and subs can introduce
4285 signed overflow where the original operation didn't. Perform the
4286 operations on an unsigned type and cast back to avoid this.
4287 In the future we may want to relax this for synthesis algorithms
4288 that we can prove do not cause unexpected overflow. */
4289 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
4291 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
4292 tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
4293 if (!vectype)
4294 return NULL;
4296 /* Targets that don't support vector shifts but support vector additions
4297 can synthesize shifts that way. */
4298 bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
4300 HOST_WIDE_INT hwval = tree_to_shwi (val);
4301 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
4302 The vectorizer's benefit analysis will decide whether it's beneficial
4303 to do this. */
4304 bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
4305 ? TYPE_MODE (vectype) : mode,
4306 hwval, &alg, &variant, MAX_COST);
4307 if (!possible)
4308 return NULL;
4310 if (!target_supports_mult_synth_alg (&alg, variant, vectype, synth_shift_p))
4311 return NULL;
4313 tree accumulator;
4315 /* Clear out the sequence of statements so we can populate it below. */
4316 gimple *stmt = NULL;
4318 if (cast_to_unsigned_p)
4320 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
4321 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
4322 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4323 op = tmp_op;
4326 if (alg.op[0] == alg_zero)
4327 accumulator = build_int_cst (multtype, 0);
4328 else
4329 accumulator = op;
4331 bool needs_fixup = (variant == negate_variant)
4332 || (variant == add_variant);
4334 for (int i = 1; i < alg.ops; i++)
4336 tree shft_log = build_int_cst (multtype, alg.log[i]);
4337 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4338 tree tmp_var = NULL_TREE;
4340 switch (alg.op[i])
4342 case alg_shift:
4343 if (synth_shift_p)
4344 stmt
4345 = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
4346 alg.log[i], stmt_vinfo);
4347 else
4348 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
4349 shft_log);
4350 break;
4351 case alg_add_t_m2:
4352 tmp_var
4353 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
4354 stmt_vinfo, synth_shift_p);
4355 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
4356 tmp_var);
4357 break;
4358 case alg_sub_t_m2:
4359 tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
4360 shft_log, stmt_vinfo,
4361 synth_shift_p);
4362 /* In some algorithms the first step involves zeroing the
4363 accumulator. If subtracting from such an accumulator
4364 just emit the negation directly. */
4365 if (integer_zerop (accumulator))
4366 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
4367 else
4368 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
4369 tmp_var);
4370 break;
4371 case alg_add_t2_m:
4372 tmp_var
4373 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4374 shft_log, stmt_vinfo, synth_shift_p);
4375 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
4376 break;
4377 case alg_sub_t2_m:
4378 tmp_var
4379 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4380 shft_log, stmt_vinfo, synth_shift_p);
4381 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
4382 break;
4383 case alg_add_factor:
4384 tmp_var
4385 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4386 shft_log, stmt_vinfo, synth_shift_p);
4387 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
4388 tmp_var);
4389 break;
4390 case alg_sub_factor:
4391 tmp_var
4392 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4393 shft_log, stmt_vinfo, synth_shift_p);
4394 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
4395 accumulator);
4396 break;
4397 default:
4398 gcc_unreachable ();
4400 /* We don't want to append the last stmt in the sequence to stmt_vinfo
4401 but rather return it directly. */
4403 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
4404 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4405 accumulator = accum_tmp;
4407 if (variant == negate_variant)
4409 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4410 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
4411 accumulator = accum_tmp;
4412 if (cast_to_unsigned_p)
4413 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4415 else if (variant == add_variant)
4417 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4418 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
4419 accumulator = accum_tmp;
4420 if (cast_to_unsigned_p)
4421 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4423 /* Move back to a signed if needed. */
4424 if (cast_to_unsigned_p)
4426 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
4427 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
4430 return stmt;
4433 /* Detect multiplication by constant and convert it into a sequence of
4434 shifts and additions, subtractions, negations. We reuse the
4435 choose_mult_variant algorithms from expmed.cc
4437 Input/Output:
4439 STMT_VINFO: The stmt from which the pattern search begins,
4440 i.e. the mult stmt.
4442 Output:
4444 * TYPE_OUT: The type of the output of this pattern.
4446 * Return value: A new stmt that will be used to replace
4447 the multiplication. */
4449 static gimple *
4450 vect_recog_mult_pattern (vec_info *vinfo,
4451 stmt_vec_info stmt_vinfo, tree *type_out)
4453 gimple *last_stmt = stmt_vinfo->stmt;
4454 tree oprnd0, oprnd1, vectype, itype;
4455 gimple *pattern_stmt;
4457 if (!is_gimple_assign (last_stmt))
4458 return NULL;
4460 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
4461 return NULL;
4463 oprnd0 = gimple_assign_rhs1 (last_stmt);
4464 oprnd1 = gimple_assign_rhs2 (last_stmt);
4465 itype = TREE_TYPE (oprnd0);
4467 if (TREE_CODE (oprnd0) != SSA_NAME
4468 || TREE_CODE (oprnd1) != INTEGER_CST
4469 || !INTEGRAL_TYPE_P (itype)
4470 || !type_has_mode_precision_p (itype))
4471 return NULL;
4473 vectype = get_vectype_for_scalar_type (vinfo, itype);
4474 if (vectype == NULL_TREE)
4475 return NULL;
4477 /* If the target can handle vectorized multiplication natively,
4478 don't attempt to optimize this. */
4479 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
4480 if (mul_optab != unknown_optab)
4482 machine_mode vec_mode = TYPE_MODE (vectype);
4483 int icode = (int) optab_handler (mul_optab, vec_mode);
4484 if (icode != CODE_FOR_nothing)
4485 return NULL;
4488 pattern_stmt = vect_synth_mult_by_constant (vinfo,
4489 oprnd0, oprnd1, stmt_vinfo);
4490 if (!pattern_stmt)
4491 return NULL;
4493 /* Pattern detected. */
4494 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
4496 *type_out = vectype;
4498 return pattern_stmt;
4501 /* Detect a signed division by a constant that wouldn't be
4502 otherwise vectorized:
4504 type a_t, b_t;
4506 S1 a_t = b_t / N;
4508 where type 'type' is an integral type and N is a constant.
4510 Similarly handle modulo by a constant:
4512 S4 a_t = b_t % N;
4514 Input/Output:
4516 * STMT_VINFO: The stmt from which the pattern search begins,
4517 i.e. the division stmt. S1 is replaced by if N is a power
4518 of two constant and type is signed:
4519 S3 y_t = b_t < 0 ? N - 1 : 0;
4520 S2 x_t = b_t + y_t;
4521 S1' a_t = x_t >> log2 (N);
4523 S4 is replaced if N is a power of two constant and
4524 type is signed by (where *_T temporaries have unsigned type):
4525 S9 y_T = b_t < 0 ? -1U : 0U;
4526 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
4527 S7 z_t = (type) z_T;
4528 S6 w_t = b_t + z_t;
4529 S5 x_t = w_t & (N - 1);
4530 S4' a_t = x_t - z_t;
4532 Output:
4534 * TYPE_OUT: The type of the output of this pattern.
4536 * Return value: A new stmt that will be used to replace the division
4537 S1 or modulo S4 stmt. */
4539 static gimple *
4540 vect_recog_divmod_pattern (vec_info *vinfo,
4541 stmt_vec_info stmt_vinfo, tree *type_out)
4543 gimple *last_stmt = stmt_vinfo->stmt;
4544 tree oprnd0, oprnd1, vectype, itype, cond;
4545 gimple *pattern_stmt, *def_stmt;
4546 enum tree_code rhs_code;
4547 optab optab;
4548 tree q, cst;
4549 int dummy_int, prec;
4551 if (!is_gimple_assign (last_stmt))
4552 return NULL;
4554 rhs_code = gimple_assign_rhs_code (last_stmt);
4555 switch (rhs_code)
4557 case TRUNC_DIV_EXPR:
4558 case EXACT_DIV_EXPR:
4559 case TRUNC_MOD_EXPR:
4560 break;
4561 default:
4562 return NULL;
4565 oprnd0 = gimple_assign_rhs1 (last_stmt);
4566 oprnd1 = gimple_assign_rhs2 (last_stmt);
4567 itype = TREE_TYPE (oprnd0);
4568 if (TREE_CODE (oprnd0) != SSA_NAME
4569 || TREE_CODE (oprnd1) != INTEGER_CST
4570 || TREE_CODE (itype) != INTEGER_TYPE
4571 || !type_has_mode_precision_p (itype))
4572 return NULL;
4574 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
4575 vectype = get_vectype_for_scalar_type (vinfo, itype);
4576 if (vectype == NULL_TREE)
4577 return NULL;
4579 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
4581 /* If the target can handle vectorized division or modulo natively,
4582 don't attempt to optimize this, since native division is likely
4583 to give smaller code. */
4584 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
4585 if (optab != unknown_optab)
4587 machine_mode vec_mode = TYPE_MODE (vectype);
4588 int icode = (int) optab_handler (optab, vec_mode);
4589 if (icode != CODE_FOR_nothing)
4590 return NULL;
4594 prec = TYPE_PRECISION (itype);
4595 if (integer_pow2p (oprnd1))
4597 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
4598 return NULL;
4600 /* Pattern detected. */
4601 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
4603 *type_out = vectype;
4605 /* Check if the target supports this internal function. */
4606 internal_fn ifn = IFN_DIV_POW2;
4607 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
4609 tree shift = build_int_cst (itype, tree_log2 (oprnd1));
4611 tree var_div = vect_recog_temp_ssa_var (itype, NULL);
4612 gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
4613 gimple_call_set_lhs (div_stmt, var_div);
4615 if (rhs_code == TRUNC_MOD_EXPR)
4617 append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
4618 def_stmt
4619 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4620 LSHIFT_EXPR, var_div, shift);
4621 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4622 pattern_stmt
4623 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4624 MINUS_EXPR, oprnd0,
4625 gimple_assign_lhs (def_stmt));
4627 else
4628 pattern_stmt = div_stmt;
4629 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
4631 return pattern_stmt;
4634 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
4635 build_int_cst (itype, 0));
4636 if (rhs_code == TRUNC_DIV_EXPR
4637 || rhs_code == EXACT_DIV_EXPR)
4639 tree var = vect_recog_temp_ssa_var (itype, NULL);
4640 tree shift;
4641 def_stmt
4642 = gimple_build_assign (var, COND_EXPR, cond,
4643 fold_build2 (MINUS_EXPR, itype, oprnd1,
4644 build_int_cst (itype, 1)),
4645 build_int_cst (itype, 0));
4646 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4647 var = vect_recog_temp_ssa_var (itype, NULL);
4648 def_stmt
4649 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
4650 gimple_assign_lhs (def_stmt));
4651 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4653 shift = build_int_cst (itype, tree_log2 (oprnd1));
4654 pattern_stmt
4655 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4656 RSHIFT_EXPR, var, shift);
4658 else
4660 tree signmask;
4661 if (compare_tree_int (oprnd1, 2) == 0)
4663 signmask = vect_recog_temp_ssa_var (itype, NULL);
4664 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
4665 build_int_cst (itype, 1),
4666 build_int_cst (itype, 0));
4667 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4669 else
4671 tree utype
4672 = build_nonstandard_integer_type (prec, 1);
4673 tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
4674 tree shift
4675 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
4676 - tree_log2 (oprnd1));
4677 tree var = vect_recog_temp_ssa_var (utype, NULL);
4679 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
4680 build_int_cst (utype, -1),
4681 build_int_cst (utype, 0));
4682 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
4683 var = vect_recog_temp_ssa_var (utype, NULL);
4684 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
4685 gimple_assign_lhs (def_stmt),
4686 shift);
4687 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
4688 signmask = vect_recog_temp_ssa_var (itype, NULL);
4689 def_stmt
4690 = gimple_build_assign (signmask, NOP_EXPR, var);
4691 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4693 def_stmt
4694 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4695 PLUS_EXPR, oprnd0, signmask);
4696 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4697 def_stmt
4698 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4699 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
4700 fold_build2 (MINUS_EXPR, itype, oprnd1,
4701 build_int_cst (itype, 1)));
4702 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4704 pattern_stmt
4705 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4706 MINUS_EXPR, gimple_assign_lhs (def_stmt),
4707 signmask);
4710 return pattern_stmt;
4713 if ((cst = uniform_integer_cst_p (oprnd1))
4714 && TYPE_UNSIGNED (itype)
4715 && rhs_code == TRUNC_DIV_EXPR
4716 && vectype
4717 && targetm.vectorize.preferred_div_as_shifts_over_mult (vectype))
4719 /* We can use the relationship:
4721 x // N == ((x+N+2) // (N+1) + x) // (N+1) for 0 <= x < N(N+3)
4723 to optimize cases where N+1 is a power of 2, and where // (N+1)
4724 is therefore a shift right. When operating in modes that are
4725 multiples of a byte in size, there are two cases:
4727 (1) N(N+3) is not representable, in which case the question
4728 becomes whether the replacement expression overflows.
4729 It is enough to test that x+N+2 does not overflow,
4730 i.e. that x < MAX-(N+1).
4732 (2) N(N+3) is representable, in which case it is the (only)
4733 bound that we need to check.
4735 ??? For now we just handle the case where // (N+1) is a shift
4736 right by half the precision, since some architectures can
4737 optimize the associated addition and shift combinations
4738 into single instructions. */
4740 auto wcst = wi::to_wide (cst);
4741 int pow = wi::exact_log2 (wcst + 1);
4742 if (pow == prec / 2)
4744 gimple *stmt = SSA_NAME_DEF_STMT (oprnd0);
4746 gimple_ranger ranger;
4747 int_range_max r;
4749 /* Check that no overflow will occur. If we don't have range
4750 information we can't perform the optimization. */
4752 if (ranger.range_of_expr (r, oprnd0, stmt) && !r.undefined_p ())
4754 wide_int max = r.upper_bound ();
4755 wide_int one = wi::shwi (1, prec);
4756 wide_int adder = wi::add (one, wi::lshift (one, pow));
4757 wi::overflow_type ovf;
4758 wi::add (max, adder, UNSIGNED, &ovf);
4759 if (ovf == wi::OVF_NONE)
4761 *type_out = vectype;
4762 tree tadder = wide_int_to_tree (itype, adder);
4763 tree rshift = wide_int_to_tree (itype, pow);
4765 tree new_lhs1 = vect_recog_temp_ssa_var (itype, NULL);
4766 gassign *patt1
4767 = gimple_build_assign (new_lhs1, PLUS_EXPR, oprnd0, tadder);
4768 append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
4770 tree new_lhs2 = vect_recog_temp_ssa_var (itype, NULL);
4771 patt1 = gimple_build_assign (new_lhs2, RSHIFT_EXPR, new_lhs1,
4772 rshift);
4773 append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
4775 tree new_lhs3 = vect_recog_temp_ssa_var (itype, NULL);
4776 patt1 = gimple_build_assign (new_lhs3, PLUS_EXPR, new_lhs2,
4777 oprnd0);
4778 append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
4780 tree new_lhs4 = vect_recog_temp_ssa_var (itype, NULL);
4781 pattern_stmt = gimple_build_assign (new_lhs4, RSHIFT_EXPR,
4782 new_lhs3, rshift);
4784 return pattern_stmt;
4790 if (prec > HOST_BITS_PER_WIDE_INT
4791 || integer_zerop (oprnd1))
4792 return NULL;
4794 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
4795 return NULL;
4797 if (TYPE_UNSIGNED (itype))
4799 unsigned HOST_WIDE_INT mh, ml;
4800 int pre_shift, post_shift;
4801 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
4802 & GET_MODE_MASK (itype_mode));
4803 tree t1, t2, t3, t4;
4805 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
4806 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
4807 return NULL;
4809 /* Find a suitable multiplier and right shift count
4810 instead of multiplying with D. */
4811 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
4813 /* If the suggested multiplier is more than SIZE bits, we can do better
4814 for even divisors, using an initial right shift. */
4815 if (mh != 0 && (d & 1) == 0)
4817 pre_shift = ctz_or_zero (d);
4818 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
4819 &ml, &post_shift, &dummy_int);
4820 gcc_assert (!mh);
4822 else
4823 pre_shift = 0;
4825 if (mh != 0)
4827 if (post_shift - 1 >= prec)
4828 return NULL;
4830 /* t1 = oprnd0 h* ml;
4831 t2 = oprnd0 - t1;
4832 t3 = t2 >> 1;
4833 t4 = t1 + t3;
4834 q = t4 >> (post_shift - 1); */
4835 t1 = vect_recog_temp_ssa_var (itype, NULL);
4836 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
4837 build_int_cst (itype, ml));
4838 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4840 t2 = vect_recog_temp_ssa_var (itype, NULL);
4841 def_stmt
4842 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
4843 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4845 t3 = vect_recog_temp_ssa_var (itype, NULL);
4846 def_stmt
4847 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
4848 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4850 t4 = vect_recog_temp_ssa_var (itype, NULL);
4851 def_stmt
4852 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
4854 if (post_shift != 1)
4856 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4858 q = vect_recog_temp_ssa_var (itype, NULL);
4859 pattern_stmt
4860 = gimple_build_assign (q, RSHIFT_EXPR, t4,
4861 build_int_cst (itype, post_shift - 1));
4863 else
4865 q = t4;
4866 pattern_stmt = def_stmt;
4869 else
4871 if (pre_shift >= prec || post_shift >= prec)
4872 return NULL;
4874 /* t1 = oprnd0 >> pre_shift;
4875 t2 = t1 h* ml;
4876 q = t2 >> post_shift; */
4877 if (pre_shift)
4879 t1 = vect_recog_temp_ssa_var (itype, NULL);
4880 def_stmt
4881 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
4882 build_int_cst (NULL, pre_shift));
4883 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4885 else
4886 t1 = oprnd0;
4888 t2 = vect_recog_temp_ssa_var (itype, NULL);
4889 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
4890 build_int_cst (itype, ml));
4892 if (post_shift)
4894 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4896 q = vect_recog_temp_ssa_var (itype, NULL);
4897 def_stmt
4898 = gimple_build_assign (q, RSHIFT_EXPR, t2,
4899 build_int_cst (itype, post_shift));
4901 else
4902 q = t2;
4904 pattern_stmt = def_stmt;
4907 else
4909 unsigned HOST_WIDE_INT ml;
4910 int post_shift;
4911 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
4912 unsigned HOST_WIDE_INT abs_d;
4913 bool add = false;
4914 tree t1, t2, t3, t4;
4916 /* Give up for -1. */
4917 if (d == -1)
4918 return NULL;
4920 /* Since d might be INT_MIN, we have to cast to
4921 unsigned HOST_WIDE_INT before negating to avoid
4922 undefined signed overflow. */
4923 abs_d = (d >= 0
4924 ? (unsigned HOST_WIDE_INT) d
4925 : - (unsigned HOST_WIDE_INT) d);
4927 /* n rem d = n rem -d */
4928 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
4930 d = abs_d;
4931 oprnd1 = build_int_cst (itype, abs_d);
4933 if (HOST_BITS_PER_WIDE_INT >= prec
4934 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
4935 /* This case is not handled correctly below. */
4936 return NULL;
4938 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
4939 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
4941 add = true;
4942 ml |= HOST_WIDE_INT_M1U << (prec - 1);
4944 if (post_shift >= prec)
4945 return NULL;
4947 /* t1 = oprnd0 h* ml; */
4948 t1 = vect_recog_temp_ssa_var (itype, NULL);
4949 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
4950 build_int_cst (itype, ml));
4952 if (add)
4954 /* t2 = t1 + oprnd0; */
4955 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4956 t2 = vect_recog_temp_ssa_var (itype, NULL);
4957 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
4959 else
4960 t2 = t1;
4962 if (post_shift)
4964 /* t3 = t2 >> post_shift; */
4965 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4966 t3 = vect_recog_temp_ssa_var (itype, NULL);
4967 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
4968 build_int_cst (itype, post_shift));
4970 else
4971 t3 = t2;
4973 int msb = 1;
4974 value_range r;
4975 get_range_query (cfun)->range_of_expr (r, oprnd0);
4976 if (!r.varying_p () && !r.undefined_p ())
4978 if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
4979 msb = 0;
4980 else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
4981 msb = -1;
4984 if (msb == 0 && d >= 0)
4986 /* q = t3; */
4987 q = t3;
4988 pattern_stmt = def_stmt;
4990 else
4992 /* t4 = oprnd0 >> (prec - 1);
4993 or if we know from VRP that oprnd0 >= 0
4994 t4 = 0;
4995 or if we know from VRP that oprnd0 < 0
4996 t4 = -1; */
4997 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4998 t4 = vect_recog_temp_ssa_var (itype, NULL);
4999 if (msb != 1)
5000 def_stmt = gimple_build_assign (t4, INTEGER_CST,
5001 build_int_cst (itype, msb));
5002 else
5003 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
5004 build_int_cst (itype, prec - 1));
5005 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5007 /* q = t3 - t4; or q = t4 - t3; */
5008 q = vect_recog_temp_ssa_var (itype, NULL);
5009 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
5010 d < 0 ? t3 : t4);
5014 if (rhs_code == TRUNC_MOD_EXPR)
5016 tree r, t1;
5018 /* We divided. Now finish by:
5019 t1 = q * oprnd1;
5020 r = oprnd0 - t1; */
5021 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
5023 t1 = vect_recog_temp_ssa_var (itype, NULL);
5024 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
5025 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5027 r = vect_recog_temp_ssa_var (itype, NULL);
5028 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
5031 /* Pattern detected. */
5032 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
5034 *type_out = vectype;
5035 return pattern_stmt;
5038 /* Function vect_recog_mixed_size_cond_pattern
5040 Try to find the following pattern:
5042 type x_t, y_t;
5043 TYPE a_T, b_T, c_T;
5044 loop:
5045 S1 a_T = x_t CMP y_t ? b_T : c_T;
5047 where type 'TYPE' is an integral type which has different size
5048 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
5049 than 'type', the constants need to fit into an integer type
5050 with the same width as 'type') or results of conversion from 'type'.
5052 Input:
5054 * STMT_VINFO: The stmt from which the pattern search begins.
5056 Output:
5058 * TYPE_OUT: The type of the output of this pattern.
5060 * Return value: A new stmt that will be used to replace the pattern.
5061 Additionally a def_stmt is added.
5063 a_it = x_t CMP y_t ? b_it : c_it;
5064 a_T = (TYPE) a_it; */
5066 static gimple *
5067 vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
5068 stmt_vec_info stmt_vinfo, tree *type_out)
5070 gimple *last_stmt = stmt_vinfo->stmt;
5071 tree cond_expr, then_clause, else_clause;
5072 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
5073 gimple *pattern_stmt, *def_stmt;
5074 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
5075 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
5076 bool promotion;
5077 tree comp_scalar_type;
5079 if (!is_gimple_assign (last_stmt)
5080 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
5081 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
5082 return NULL;
5084 cond_expr = gimple_assign_rhs1 (last_stmt);
5085 then_clause = gimple_assign_rhs2 (last_stmt);
5086 else_clause = gimple_assign_rhs3 (last_stmt);
5088 if (!COMPARISON_CLASS_P (cond_expr))
5089 return NULL;
5091 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
5092 comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
5093 if (comp_vectype == NULL_TREE)
5094 return NULL;
5096 type = TREE_TYPE (gimple_assign_lhs (last_stmt));
5097 if (types_compatible_p (type, comp_scalar_type)
5098 || ((TREE_CODE (then_clause) != INTEGER_CST
5099 || TREE_CODE (else_clause) != INTEGER_CST)
5100 && !INTEGRAL_TYPE_P (comp_scalar_type))
5101 || !INTEGRAL_TYPE_P (type))
5102 return NULL;
5104 if ((TREE_CODE (then_clause) != INTEGER_CST
5105 && !type_conversion_p (vinfo, then_clause, false,
5106 &orig_type0, &def_stmt0, &promotion))
5107 || (TREE_CODE (else_clause) != INTEGER_CST
5108 && !type_conversion_p (vinfo, else_clause, false,
5109 &orig_type1, &def_stmt1, &promotion)))
5110 return NULL;
5112 if (orig_type0 && orig_type1
5113 && !types_compatible_p (orig_type0, orig_type1))
5114 return NULL;
5116 if (orig_type0)
5118 if (!types_compatible_p (orig_type0, comp_scalar_type))
5119 return NULL;
5120 then_clause = gimple_assign_rhs1 (def_stmt0);
5121 itype = orig_type0;
5124 if (orig_type1)
5126 if (!types_compatible_p (orig_type1, comp_scalar_type))
5127 return NULL;
5128 else_clause = gimple_assign_rhs1 (def_stmt1);
5129 itype = orig_type1;
5133 HOST_WIDE_INT cmp_mode_size
5134 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
5136 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
5137 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
5138 return NULL;
5140 vectype = get_vectype_for_scalar_type (vinfo, type);
5141 if (vectype == NULL_TREE)
5142 return NULL;
5144 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
5145 return NULL;
5147 if (itype == NULL_TREE)
5148 itype = build_nonstandard_integer_type (cmp_mode_size,
5149 TYPE_UNSIGNED (type));
5151 if (itype == NULL_TREE
5152 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
5153 return NULL;
5155 vecitype = get_vectype_for_scalar_type (vinfo, itype);
5156 if (vecitype == NULL_TREE)
5157 return NULL;
5159 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
5160 return NULL;
5162 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
5164 if ((TREE_CODE (then_clause) == INTEGER_CST
5165 && !int_fits_type_p (then_clause, itype))
5166 || (TREE_CODE (else_clause) == INTEGER_CST
5167 && !int_fits_type_p (else_clause, itype)))
5168 return NULL;
5171 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5172 COND_EXPR, unshare_expr (cond_expr),
5173 fold_convert (itype, then_clause),
5174 fold_convert (itype, else_clause));
5175 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
5176 NOP_EXPR, gimple_assign_lhs (def_stmt));
5178 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
5179 *type_out = vectype;
5181 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
5183 return pattern_stmt;
5187 /* Helper function of vect_recog_bool_pattern. Called recursively, return
5188 true if bool VAR can and should be optimized that way. Assume it shouldn't
5189 in case it's a result of a comparison which can be directly vectorized into
5190 a vector comparison. Fills in STMTS with all stmts visited during the
5191 walk. */
5193 static bool
5194 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
5196 tree rhs1;
5197 enum tree_code rhs_code;
5199 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
5200 if (!def_stmt_info)
5201 return false;
5203 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
5204 if (!def_stmt)
5205 return false;
5207 if (stmts.contains (def_stmt))
5208 return true;
5210 rhs1 = gimple_assign_rhs1 (def_stmt);
5211 rhs_code = gimple_assign_rhs_code (def_stmt);
5212 switch (rhs_code)
5214 case SSA_NAME:
5215 if (! check_bool_pattern (rhs1, vinfo, stmts))
5216 return false;
5217 break;
5219 CASE_CONVERT:
5220 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
5221 return false;
5222 if (! check_bool_pattern (rhs1, vinfo, stmts))
5223 return false;
5224 break;
5226 case BIT_NOT_EXPR:
5227 if (! check_bool_pattern (rhs1, vinfo, stmts))
5228 return false;
5229 break;
5231 case BIT_AND_EXPR:
5232 case BIT_IOR_EXPR:
5233 case BIT_XOR_EXPR:
5234 if (! check_bool_pattern (rhs1, vinfo, stmts)
5235 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
5236 return false;
5237 break;
5239 default:
5240 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5242 tree vecitype, comp_vectype;
5244 /* If the comparison can throw, then is_gimple_condexpr will be
5245 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
5246 if (stmt_could_throw_p (cfun, def_stmt))
5247 return false;
5249 comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
5250 if (comp_vectype == NULL_TREE)
5251 return false;
5253 tree mask_type = get_mask_type_for_scalar_type (vinfo,
5254 TREE_TYPE (rhs1));
5255 if (mask_type
5256 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
5257 return false;
5259 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
5261 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
5262 tree itype
5263 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
5264 vecitype = get_vectype_for_scalar_type (vinfo, itype);
5265 if (vecitype == NULL_TREE)
5266 return false;
5268 else
5269 vecitype = comp_vectype;
5270 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
5271 return false;
5273 else
5274 return false;
5275 break;
5278 bool res = stmts.add (def_stmt);
5279 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
5280 gcc_assert (!res);
5282 return true;
5286 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
5287 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
5288 pattern sequence. */
5290 static tree
5291 adjust_bool_pattern_cast (vec_info *vinfo,
5292 tree type, tree var, stmt_vec_info stmt_info)
5294 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
5295 NOP_EXPR, var);
5296 append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
5297 get_vectype_for_scalar_type (vinfo, type));
5298 return gimple_assign_lhs (cast_stmt);
5301 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
5302 VAR is an SSA_NAME that should be transformed from bool to a wider integer
5303 type, OUT_TYPE is the desired final integer type of the whole pattern.
5304 STMT_INFO is the info of the pattern root and is where pattern stmts should
5305 be associated with. DEFS is a map of pattern defs. */
5307 static void
5308 adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
5309 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
5311 gimple *stmt = SSA_NAME_DEF_STMT (var);
5312 enum tree_code rhs_code, def_rhs_code;
5313 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
5314 location_t loc;
5315 gimple *pattern_stmt, *def_stmt;
5316 tree trueval = NULL_TREE;
5318 rhs1 = gimple_assign_rhs1 (stmt);
5319 rhs2 = gimple_assign_rhs2 (stmt);
5320 rhs_code = gimple_assign_rhs_code (stmt);
5321 loc = gimple_location (stmt);
5322 switch (rhs_code)
5324 case SSA_NAME:
5325 CASE_CONVERT:
5326 irhs1 = *defs.get (rhs1);
5327 itype = TREE_TYPE (irhs1);
5328 pattern_stmt
5329 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5330 SSA_NAME, irhs1);
5331 break;
5333 case BIT_NOT_EXPR:
5334 irhs1 = *defs.get (rhs1);
5335 itype = TREE_TYPE (irhs1);
5336 pattern_stmt
5337 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5338 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
5339 break;
5341 case BIT_AND_EXPR:
5342 /* Try to optimize x = y & (a < b ? 1 : 0); into
5343 x = (a < b ? y : 0);
5345 E.g. for:
5346 bool a_b, b_b, c_b;
5347 TYPE d_T;
5349 S1 a_b = x1 CMP1 y1;
5350 S2 b_b = x2 CMP2 y2;
5351 S3 c_b = a_b & b_b;
5352 S4 d_T = (TYPE) c_b;
5354 we would normally emit:
5356 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5357 S2' b_T = x2 CMP2 y2 ? 1 : 0;
5358 S3' c_T = a_T & b_T;
5359 S4' d_T = c_T;
5361 but we can save one stmt by using the
5362 result of one of the COND_EXPRs in the other COND_EXPR and leave
5363 BIT_AND_EXPR stmt out:
5365 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5366 S3' c_T = x2 CMP2 y2 ? a_T : 0;
5367 S4' f_T = c_T;
5369 At least when VEC_COND_EXPR is implemented using masks
5370 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
5371 computes the comparison masks and ands it, in one case with
5372 all ones vector, in the other case with a vector register.
5373 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
5374 often more expensive. */
5375 def_stmt = SSA_NAME_DEF_STMT (rhs2);
5376 def_rhs_code = gimple_assign_rhs_code (def_stmt);
5377 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
5379 irhs1 = *defs.get (rhs1);
5380 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
5381 if (TYPE_PRECISION (TREE_TYPE (irhs1))
5382 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
5384 rhs_code = def_rhs_code;
5385 rhs1 = def_rhs1;
5386 rhs2 = gimple_assign_rhs2 (def_stmt);
5387 trueval = irhs1;
5388 goto do_compare;
5390 else
5391 irhs2 = *defs.get (rhs2);
5392 goto and_ior_xor;
5394 def_stmt = SSA_NAME_DEF_STMT (rhs1);
5395 def_rhs_code = gimple_assign_rhs_code (def_stmt);
5396 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
5398 irhs2 = *defs.get (rhs2);
5399 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
5400 if (TYPE_PRECISION (TREE_TYPE (irhs2))
5401 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
5403 rhs_code = def_rhs_code;
5404 rhs1 = def_rhs1;
5405 rhs2 = gimple_assign_rhs2 (def_stmt);
5406 trueval = irhs2;
5407 goto do_compare;
5409 else
5410 irhs1 = *defs.get (rhs1);
5411 goto and_ior_xor;
5413 /* FALLTHRU */
5414 case BIT_IOR_EXPR:
5415 case BIT_XOR_EXPR:
5416 irhs1 = *defs.get (rhs1);
5417 irhs2 = *defs.get (rhs2);
5418 and_ior_xor:
5419 if (TYPE_PRECISION (TREE_TYPE (irhs1))
5420 != TYPE_PRECISION (TREE_TYPE (irhs2)))
5422 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
5423 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
5424 int out_prec = TYPE_PRECISION (out_type);
5425 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
5426 irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
5427 stmt_info);
5428 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
5429 irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
5430 stmt_info);
5431 else
5433 irhs1 = adjust_bool_pattern_cast (vinfo,
5434 out_type, irhs1, stmt_info);
5435 irhs2 = adjust_bool_pattern_cast (vinfo,
5436 out_type, irhs2, stmt_info);
5439 itype = TREE_TYPE (irhs1);
5440 pattern_stmt
5441 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5442 rhs_code, irhs1, irhs2);
5443 break;
5445 default:
5446 do_compare:
5447 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
5448 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
5449 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
5450 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
5451 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
5453 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
5454 itype
5455 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
5457 else
5458 itype = TREE_TYPE (rhs1);
5459 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
5460 if (trueval == NULL_TREE)
5461 trueval = build_int_cst (itype, 1);
5462 else
5463 gcc_checking_assert (useless_type_conversion_p (itype,
5464 TREE_TYPE (trueval)));
5465 pattern_stmt
5466 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5467 COND_EXPR, cond_expr, trueval,
5468 build_int_cst (itype, 0));
5469 break;
5472 gimple_set_location (pattern_stmt, loc);
5473 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
5474 get_vectype_for_scalar_type (vinfo, itype));
5475 defs.put (var, gimple_assign_lhs (pattern_stmt));
5478 /* Comparison function to qsort a vector of gimple stmts after UID. */
5480 static int
5481 sort_after_uid (const void *p1, const void *p2)
5483 const gimple *stmt1 = *(const gimple * const *)p1;
5484 const gimple *stmt2 = *(const gimple * const *)p2;
5485 return gimple_uid (stmt1) - gimple_uid (stmt2);
5488 /* Create pattern stmts for all stmts participating in the bool pattern
5489 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
5490 OUT_TYPE. Return the def of the pattern root. */
5492 static tree
5493 adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
5494 tree out_type, stmt_vec_info stmt_info)
5496 /* Gather original stmts in the bool pattern in their order of appearance
5497 in the IL. */
5498 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
5499 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
5500 i != bool_stmt_set.end (); ++i)
5501 bool_stmts.quick_push (*i);
5502 bool_stmts.qsort (sort_after_uid);
5504 /* Now process them in that order, producing pattern stmts. */
5505 hash_map <tree, tree> defs;
5506 for (unsigned i = 0; i < bool_stmts.length (); ++i)
5507 adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
5508 out_type, stmt_info, defs);
5510 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
5511 gimple *pattern_stmt
5512 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5513 return gimple_assign_lhs (pattern_stmt);
5516 /* Return the proper type for converting bool VAR into
5517 an integer value or NULL_TREE if no such type exists.
5518 The type is chosen so that the converted value has the
5519 same number of elements as VAR's vector type. */
5521 static tree
5522 integer_type_for_mask (tree var, vec_info *vinfo)
5524 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
5525 return NULL_TREE;
5527 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
5528 if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
5529 return NULL_TREE;
5531 return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
5534 /* Function vect_recog_gcond_pattern
5536 Try to find pattern like following:
5538 if (a op b)
5540 where operator 'op' is not != and convert it to an adjusted boolean pattern
5542 mask = a op b
5543 if (mask != 0)
5545 and set the mask type on MASK.
5547 Input:
5549 * STMT_VINFO: The stmt at the end from which the pattern
5550 search begins, i.e. cast of a bool to
5551 an integer type.
5553 Output:
5555 * TYPE_OUT: The type of the output of this pattern.
5557 * Return value: A new stmt that will be used to replace the pattern. */
5559 static gimple *
5560 vect_recog_gcond_pattern (vec_info *vinfo,
5561 stmt_vec_info stmt_vinfo, tree *type_out)
5563 /* Currently we only support this for loop vectorization and when multiple
5564 exits. */
5565 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5566 if (!loop_vinfo || !LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
5567 return NULL;
5569 gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
5570 gcond* cond = NULL;
5571 if (!(cond = dyn_cast <gcond *> (last_stmt)))
5572 return NULL;
5574 auto lhs = gimple_cond_lhs (cond);
5575 auto rhs = gimple_cond_rhs (cond);
5576 auto code = gimple_cond_code (cond);
5578 tree scalar_type = TREE_TYPE (lhs);
5579 if (VECTOR_TYPE_P (scalar_type))
5580 return NULL;
5582 if (code == NE_EXPR
5583 && zerop (rhs)
5584 && VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
5585 return NULL;
5587 tree vecitype = get_vectype_for_scalar_type (vinfo, scalar_type);
5588 if (vecitype == NULL_TREE)
5589 return NULL;
5591 tree vectype = truth_type_for (vecitype);
5593 tree new_lhs = vect_recog_temp_ssa_var (boolean_type_node, NULL);
5594 gimple *new_stmt = gimple_build_assign (new_lhs, code, lhs, rhs);
5595 append_pattern_def_seq (vinfo, stmt_vinfo, new_stmt, vectype, scalar_type);
5597 gimple *pattern_stmt
5598 = gimple_build_cond (NE_EXPR, new_lhs,
5599 build_int_cst (TREE_TYPE (new_lhs), 0),
5600 NULL_TREE, NULL_TREE);
5601 *type_out = vectype;
5602 vect_pattern_detected ("vect_recog_gcond_pattern", last_stmt);
5603 return pattern_stmt;
5606 /* Function vect_recog_bool_pattern
5608 Try to find pattern like following:
5610 bool a_b, b_b, c_b, d_b, e_b;
5611 TYPE f_T;
5612 loop:
5613 S1 a_b = x1 CMP1 y1;
5614 S2 b_b = x2 CMP2 y2;
5615 S3 c_b = a_b & b_b;
5616 S4 d_b = x3 CMP3 y3;
5617 S5 e_b = c_b | d_b;
5618 S6 f_T = (TYPE) e_b;
5620 where type 'TYPE' is an integral type. Or a similar pattern
5621 ending in
5623 S6 f_Y = e_b ? r_Y : s_Y;
5625 as results from if-conversion of a complex condition.
5627 Input:
5629 * STMT_VINFO: The stmt at the end from which the pattern
5630 search begins, i.e. cast of a bool to
5631 an integer type.
5633 Output:
5635 * TYPE_OUT: The type of the output of this pattern.
5637 * Return value: A new stmt that will be used to replace the pattern.
5639 Assuming size of TYPE is the same as size of all comparisons
5640 (otherwise some casts would be added where needed), the above
5641 sequence we create related pattern stmts:
5642 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5643 S3' c_T = x2 CMP2 y2 ? a_T : 0;
5644 S4' d_T = x3 CMP3 y3 ? 1 : 0;
5645 S5' e_T = c_T | d_T;
5646 S6' f_T = e_T;
5648 Instead of the above S3' we could emit:
5649 S2' b_T = x2 CMP2 y2 ? 1 : 0;
5650 S3' c_T = a_T | b_T;
5651 but the above is more efficient. */
5653 static gimple *
5654 vect_recog_bool_pattern (vec_info *vinfo,
5655 stmt_vec_info stmt_vinfo, tree *type_out)
5657 gimple *last_stmt = stmt_vinfo->stmt;
5658 enum tree_code rhs_code;
5659 tree var, lhs, rhs, vectype;
5660 gimple *pattern_stmt;
5662 if (!is_gimple_assign (last_stmt))
5663 return NULL;
5665 var = gimple_assign_rhs1 (last_stmt);
5666 lhs = gimple_assign_lhs (last_stmt);
5667 rhs_code = gimple_assign_rhs_code (last_stmt);
5669 if (rhs_code == VIEW_CONVERT_EXPR)
5670 var = TREE_OPERAND (var, 0);
5672 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
5673 return NULL;
5675 hash_set<gimple *> bool_stmts;
5677 if (CONVERT_EXPR_CODE_P (rhs_code)
5678 || rhs_code == VIEW_CONVERT_EXPR)
5680 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
5681 || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5682 return NULL;
5683 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5685 if (check_bool_pattern (var, vinfo, bool_stmts))
5687 rhs = adjust_bool_stmts (vinfo, bool_stmts,
5688 TREE_TYPE (lhs), stmt_vinfo);
5689 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5690 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
5691 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
5692 else
5693 pattern_stmt
5694 = gimple_build_assign (lhs, NOP_EXPR, rhs);
5696 else
5698 tree type = integer_type_for_mask (var, vinfo);
5699 tree cst0, cst1, tmp;
5701 if (!type)
5702 return NULL;
5704 /* We may directly use cond with narrowed type to avoid
5705 multiple cond exprs with following result packing and
5706 perform single cond with packed mask instead. In case
5707 of widening we better make cond first and then extract
5708 results. */
5709 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
5710 type = TREE_TYPE (lhs);
5712 cst0 = build_int_cst (type, 0);
5713 cst1 = build_int_cst (type, 1);
5714 tmp = vect_recog_temp_ssa_var (type, NULL);
5715 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
5717 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
5719 tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
5720 append_pattern_def_seq (vinfo, stmt_vinfo,
5721 pattern_stmt, new_vectype);
5723 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5724 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
5728 *type_out = vectype;
5729 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
5731 return pattern_stmt;
5733 else if (rhs_code == COND_EXPR
5734 && TREE_CODE (var) == SSA_NAME)
5736 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5737 if (vectype == NULL_TREE)
5738 return NULL;
5740 /* Build a scalar type for the boolean result that when
5741 vectorized matches the vector type of the result in
5742 size and number of elements. */
5743 unsigned prec
5744 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
5745 TYPE_VECTOR_SUBPARTS (vectype));
5747 tree type
5748 = build_nonstandard_integer_type (prec,
5749 TYPE_UNSIGNED (TREE_TYPE (var)));
5750 if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
5751 return NULL;
5753 if (check_bool_pattern (var, vinfo, bool_stmts))
5754 var = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
5755 else if (integer_type_for_mask (var, vinfo))
5756 return NULL;
5758 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5759 pattern_stmt
5760 = gimple_build_assign (lhs, COND_EXPR,
5761 build2 (NE_EXPR, boolean_type_node,
5762 var, build_int_cst (TREE_TYPE (var), 0)),
5763 gimple_assign_rhs2 (last_stmt),
5764 gimple_assign_rhs3 (last_stmt));
5765 *type_out = vectype;
5766 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
5768 return pattern_stmt;
5770 else if (rhs_code == SSA_NAME
5771 && STMT_VINFO_DATA_REF (stmt_vinfo))
5773 stmt_vec_info pattern_stmt_info;
5774 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5775 if (!vectype || !VECTOR_MODE_P (TYPE_MODE (vectype)))
5776 return NULL;
5778 if (check_bool_pattern (var, vinfo, bool_stmts))
5779 rhs = adjust_bool_stmts (vinfo, bool_stmts,
5780 TREE_TYPE (vectype), stmt_vinfo);
5781 else
5783 tree type = integer_type_for_mask (var, vinfo);
5784 tree cst0, cst1, new_vectype;
5786 if (!type)
5787 return NULL;
5789 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
5790 type = TREE_TYPE (vectype);
5792 cst0 = build_int_cst (type, 0);
5793 cst1 = build_int_cst (type, 1);
5794 new_vectype = get_vectype_for_scalar_type (vinfo, type);
5796 rhs = vect_recog_temp_ssa_var (type, NULL);
5797 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
5798 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
5801 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
5802 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
5804 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5805 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
5806 append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
5807 rhs = rhs2;
5809 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
5810 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
5811 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
5812 *type_out = vectype;
5813 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
5815 return pattern_stmt;
5817 else
5818 return NULL;
5822 /* A helper for vect_recog_mask_conversion_pattern. Build
5823 conversion of MASK to a type suitable for masking VECTYPE.
5824 Built statement gets required vectype and is appended to
5825 a pattern sequence of STMT_VINFO.
5827 Return converted mask. */
5829 static tree
5830 build_mask_conversion (vec_info *vinfo,
5831 tree mask, tree vectype, stmt_vec_info stmt_vinfo)
5833 gimple *stmt;
5834 tree masktype, tmp;
5836 masktype = truth_type_for (vectype);
5837 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
5838 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
5839 append_pattern_def_seq (vinfo, stmt_vinfo,
5840 stmt, masktype, TREE_TYPE (vectype));
5842 return tmp;
5846 /* Function vect_recog_mask_conversion_pattern
5848 Try to find statements which require boolean type
5849 converison. Additional conversion statements are
5850 added to handle such cases. For example:
5852 bool m_1, m_2, m_3;
5853 int i_4, i_5;
5854 double d_6, d_7;
5855 char c_1, c_2, c_3;
5857 S1 m_1 = i_4 > i_5;
5858 S2 m_2 = d_6 < d_7;
5859 S3 m_3 = m_1 & m_2;
5860 S4 c_1 = m_3 ? c_2 : c_3;
5862 Will be transformed into:
5864 S1 m_1 = i_4 > i_5;
5865 S2 m_2 = d_6 < d_7;
5866 S3'' m_2' = (_Bool[bitsize=32])m_2
5867 S3' m_3' = m_1 & m_2';
5868 S4'' m_3'' = (_Bool[bitsize=8])m_3'
5869 S4' c_1' = m_3'' ? c_2 : c_3; */
5871 static gimple *
5872 vect_recog_mask_conversion_pattern (vec_info *vinfo,
5873 stmt_vec_info stmt_vinfo, tree *type_out)
5875 gimple *last_stmt = stmt_vinfo->stmt;
5876 enum tree_code rhs_code;
5877 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
5878 tree vectype1, vectype2;
5879 stmt_vec_info pattern_stmt_info;
5880 tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
5881 tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
5883 /* Check for MASK_LOAD and MASK_STORE as well as COND_OP calls requiring mask
5884 conversion. */
5885 if (is_gimple_call (last_stmt)
5886 && gimple_call_internal_p (last_stmt))
5888 gcall *pattern_stmt;
5890 internal_fn ifn = gimple_call_internal_fn (last_stmt);
5891 int mask_argno = internal_fn_mask_index (ifn);
5892 if (mask_argno < 0)
5893 return NULL;
5895 bool store_p = internal_store_fn_p (ifn);
5896 bool load_p = internal_store_fn_p (ifn);
5897 if (store_p)
5899 int rhs_index = internal_fn_stored_value_index (ifn);
5900 tree rhs = gimple_call_arg (last_stmt, rhs_index);
5901 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
5903 else
5905 lhs = gimple_call_lhs (last_stmt);
5906 if (!lhs)
5907 return NULL;
5908 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5911 if (!vectype1)
5912 return NULL;
5914 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
5915 tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
5916 if (mask_arg_type)
5918 vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
5920 if (!vectype2
5921 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
5922 TYPE_VECTOR_SUBPARTS (vectype2)))
5923 return NULL;
5925 else if (store_p || load_p)
5926 return NULL;
5928 tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
5930 auto_vec<tree, 8> args;
5931 unsigned int nargs = gimple_call_num_args (last_stmt);
5932 args.safe_grow (nargs, true);
5933 for (unsigned int i = 0; i < nargs; ++i)
5934 args[i] = ((int) i == mask_argno
5935 ? tmp
5936 : gimple_call_arg (last_stmt, i));
5937 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
5939 if (!store_p)
5941 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5942 gimple_call_set_lhs (pattern_stmt, lhs);
5945 if (load_p || store_p)
5946 gimple_call_set_nothrow (pattern_stmt, true);
5948 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
5949 if (STMT_VINFO_DATA_REF (stmt_vinfo))
5950 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
5952 *type_out = vectype1;
5953 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
5955 return pattern_stmt;
5958 if (!is_gimple_assign (last_stmt))
5959 return NULL;
5961 gimple *pattern_stmt;
5962 lhs = gimple_assign_lhs (last_stmt);
5963 rhs1 = gimple_assign_rhs1 (last_stmt);
5964 rhs_code = gimple_assign_rhs_code (last_stmt);
5966 /* Check for cond expression requiring mask conversion. */
5967 if (rhs_code == COND_EXPR)
5969 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5971 if (TREE_CODE (rhs1) == SSA_NAME)
5973 rhs1_type = integer_type_for_mask (rhs1, vinfo);
5974 if (!rhs1_type)
5975 return NULL;
5977 else if (COMPARISON_CLASS_P (rhs1))
5979 /* Check whether we're comparing scalar booleans and (if so)
5980 whether a better mask type exists than the mask associated
5981 with boolean-sized elements. This avoids unnecessary packs
5982 and unpacks if the booleans are set from comparisons of
5983 wider types. E.g. in:
5985 int x1, x2, x3, x4, y1, y1;
5987 bool b1 = (x1 == x2);
5988 bool b2 = (x3 == x4);
5989 ... = b1 == b2 ? y1 : y2;
5991 it is better for b1 and b2 to use the mask type associated
5992 with int elements rather bool (byte) elements. */
5993 rhs1_op0 = TREE_OPERAND (rhs1, 0);
5994 rhs1_op1 = TREE_OPERAND (rhs1, 1);
5995 if (!rhs1_op0 || !rhs1_op1)
5996 return NULL;
5997 rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
5998 rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
6000 if (!rhs1_op0_type)
6001 rhs1_type = TREE_TYPE (rhs1_op0);
6002 else if (!rhs1_op1_type)
6003 rhs1_type = TREE_TYPE (rhs1_op1);
6004 else if (TYPE_PRECISION (rhs1_op0_type)
6005 != TYPE_PRECISION (rhs1_op1_type))
6007 int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
6008 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
6009 int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
6010 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
6011 if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
6013 if (abs (tmp0) > abs (tmp1))
6014 rhs1_type = rhs1_op1_type;
6015 else
6016 rhs1_type = rhs1_op0_type;
6018 else
6019 rhs1_type = build_nonstandard_integer_type
6020 (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
6022 else
6023 rhs1_type = rhs1_op0_type;
6025 else
6026 return NULL;
6028 vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
6030 if (!vectype1 || !vectype2)
6031 return NULL;
6033 /* Continue if a conversion is needed. Also continue if we have
6034 a comparison whose vector type would normally be different from
6035 VECTYPE2 when considered in isolation. In that case we'll
6036 replace the comparison with an SSA name (so that we can record
6037 its vector type) and behave as though the comparison was an SSA
6038 name from the outset. */
6039 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
6040 TYPE_VECTOR_SUBPARTS (vectype2))
6041 && !rhs1_op0_type
6042 && !rhs1_op1_type)
6043 return NULL;
6045 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
6046 in place, we can handle it in vectorizable_condition. This avoids
6047 unnecessary promotion stmts and increased vectorization factor. */
6048 if (COMPARISON_CLASS_P (rhs1)
6049 && INTEGRAL_TYPE_P (rhs1_type)
6050 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
6051 TYPE_VECTOR_SUBPARTS (vectype2)))
6053 enum vect_def_type dt;
6054 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
6055 && dt == vect_external_def
6056 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
6057 && (dt == vect_external_def
6058 || dt == vect_constant_def))
6060 tree wide_scalar_type = build_nonstandard_integer_type
6061 (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
6062 tree vectype3 = get_vectype_for_scalar_type (vinfo,
6063 wide_scalar_type);
6064 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
6065 return NULL;
6069 /* If rhs1 is a comparison we need to move it into a
6070 separate statement. */
6071 if (TREE_CODE (rhs1) != SSA_NAME)
6073 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
6074 if (rhs1_op0_type
6075 && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
6076 rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
6077 vectype2, stmt_vinfo);
6078 if (rhs1_op1_type
6079 && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
6080 rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
6081 vectype2, stmt_vinfo);
6082 pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
6083 rhs1_op0, rhs1_op1);
6084 rhs1 = tmp;
6085 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
6086 rhs1_type);
6089 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
6090 TYPE_VECTOR_SUBPARTS (vectype2)))
6091 tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
6092 else
6093 tmp = rhs1;
6095 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6096 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
6097 gimple_assign_rhs2 (last_stmt),
6098 gimple_assign_rhs3 (last_stmt));
6100 *type_out = vectype1;
6101 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
6103 return pattern_stmt;
6106 /* Now check for binary boolean operations requiring conversion for
6107 one of operands. */
6108 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
6109 return NULL;
6111 if (rhs_code != BIT_IOR_EXPR
6112 && rhs_code != BIT_XOR_EXPR
6113 && rhs_code != BIT_AND_EXPR
6114 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
6115 return NULL;
6117 rhs2 = gimple_assign_rhs2 (last_stmt);
6119 rhs1_type = integer_type_for_mask (rhs1, vinfo);
6120 rhs2_type = integer_type_for_mask (rhs2, vinfo);
6122 if (!rhs1_type || !rhs2_type
6123 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
6124 return NULL;
6126 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
6128 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
6129 if (!vectype1)
6130 return NULL;
6131 rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
6133 else
6135 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
6136 if (!vectype1)
6137 return NULL;
6138 rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
6141 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6142 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
6144 *type_out = vectype1;
6145 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
6147 return pattern_stmt;
6150 /* STMT_INFO is a load or store. If the load or store is conditional, return
6151 the boolean condition under which it occurs, otherwise return null. */
6153 static tree
6154 vect_get_load_store_mask (stmt_vec_info stmt_info)
6156 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
6158 gcc_assert (gimple_assign_single_p (def_assign));
6159 return NULL_TREE;
6162 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
6164 internal_fn ifn = gimple_call_internal_fn (def_call);
6165 int mask_index = internal_fn_mask_index (ifn);
6166 return gimple_call_arg (def_call, mask_index);
6169 gcc_unreachable ();
6172 /* Return MASK if MASK is suitable for masking an operation on vectors
6173 of type VECTYPE, otherwise convert it into such a form and return
6174 the result. Associate any conversion statements with STMT_INFO's
6175 pattern. */
6177 static tree
6178 vect_convert_mask_for_vectype (tree mask, tree vectype,
6179 stmt_vec_info stmt_info, vec_info *vinfo)
6181 tree mask_type = integer_type_for_mask (mask, vinfo);
6182 if (mask_type)
6184 tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
6185 if (mask_vectype
6186 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
6187 TYPE_VECTOR_SUBPARTS (mask_vectype)))
6188 mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
6190 return mask;
6193 /* Return the equivalent of:
6195 fold_convert (TYPE, VALUE)
6197 with the expectation that the operation will be vectorized.
6198 If new statements are needed, add them as pattern statements
6199 to STMT_INFO. */
6201 static tree
6202 vect_add_conversion_to_pattern (vec_info *vinfo,
6203 tree type, tree value, stmt_vec_info stmt_info)
6205 if (useless_type_conversion_p (type, TREE_TYPE (value)))
6206 return value;
6208 tree new_value = vect_recog_temp_ssa_var (type, NULL);
6209 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
6210 append_pattern_def_seq (vinfo, stmt_info, conversion,
6211 get_vectype_for_scalar_type (vinfo, type));
6212 return new_value;
6215 /* Try to convert STMT_INFO into a call to a gather load or scatter store
6216 internal function. Return the final statement on success and set
6217 *TYPE_OUT to the vector type being loaded or stored.
6219 This function only handles gathers and scatters that were recognized
6220 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
6222 static gimple *
6223 vect_recog_gather_scatter_pattern (vec_info *vinfo,
6224 stmt_vec_info stmt_info, tree *type_out)
6226 /* Currently we only support this for loop vectorization. */
6227 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6228 if (!loop_vinfo)
6229 return NULL;
6231 /* Make sure that we're looking at a gather load or scatter store. */
6232 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
6233 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6234 return NULL;
6236 /* Get the boolean that controls whether the load or store happens.
6237 This is null if the operation is unconditional. */
6238 tree mask = vect_get_load_store_mask (stmt_info);
6240 /* Make sure that the target supports an appropriate internal
6241 function for the gather/scatter operation. */
6242 gather_scatter_info gs_info;
6243 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
6244 || gs_info.ifn == IFN_LAST)
6245 return NULL;
6247 /* Convert the mask to the right form. */
6248 tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
6249 gs_info.element_type);
6250 if (mask)
6251 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
6252 loop_vinfo);
6253 else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
6254 || gs_info.ifn == IFN_MASK_GATHER_LOAD
6255 || gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE
6256 || gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
6257 mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
6259 /* Get the invariant base and non-invariant offset, converting the
6260 latter to the same width as the vector elements. */
6261 tree base = gs_info.base;
6262 tree offset_type = TREE_TYPE (gs_info.offset_vectype);
6263 tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
6264 gs_info.offset, stmt_info);
6266 /* Build the new pattern statement. */
6267 tree scale = size_int (gs_info.scale);
6268 gcall *pattern_stmt;
6269 if (DR_IS_READ (dr))
6271 tree zero = build_zero_cst (gs_info.element_type);
6272 if (mask != NULL)
6273 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
6274 offset, scale, zero, mask);
6275 else
6276 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
6277 offset, scale, zero);
6278 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
6279 gimple_call_set_lhs (pattern_stmt, load_lhs);
6281 else
6283 tree rhs = vect_get_store_rhs (stmt_info);
6284 if (mask != NULL)
6285 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
6286 base, offset, scale, rhs,
6287 mask);
6288 else
6289 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
6290 base, offset, scale, rhs);
6292 gimple_call_set_nothrow (pattern_stmt, true);
6294 /* Copy across relevant vectorization info and associate DR with the
6295 new pattern statement instead of the original statement. */
6296 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
6297 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
6299 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6300 *type_out = vectype;
6301 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
6303 return pattern_stmt;
6306 /* Return true if TYPE is a non-boolean integer type. These are the types
6307 that we want to consider for narrowing. */
6309 static bool
6310 vect_narrowable_type_p (tree type)
6312 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
6315 /* Return true if the operation given by CODE can be truncated to N bits
6316 when only N bits of the output are needed. This is only true if bit N+1
6317 of the inputs has no effect on the low N bits of the result. */
6319 static bool
6320 vect_truncatable_operation_p (tree_code code)
6322 switch (code)
6324 case NEGATE_EXPR:
6325 case PLUS_EXPR:
6326 case MINUS_EXPR:
6327 case MULT_EXPR:
6328 case BIT_NOT_EXPR:
6329 case BIT_AND_EXPR:
6330 case BIT_IOR_EXPR:
6331 case BIT_XOR_EXPR:
6332 case COND_EXPR:
6333 return true;
6335 default:
6336 return false;
6340 /* Record that STMT_INFO could be changed from operating on TYPE to
6341 operating on a type with the precision and sign given by PRECISION
6342 and SIGN respectively. PRECISION is an arbitrary bit precision;
6343 it might not be a whole number of bytes. */
6345 static void
6346 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
6347 unsigned int precision, signop sign)
6349 /* Round the precision up to a whole number of bytes. */
6350 precision = vect_element_precision (precision);
6351 if (precision < TYPE_PRECISION (type)
6352 && (!stmt_info->operation_precision
6353 || stmt_info->operation_precision > precision))
6355 stmt_info->operation_precision = precision;
6356 stmt_info->operation_sign = sign;
6360 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
6361 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
6362 is an arbitrary bit precision; it might not be a whole number of bytes. */
6364 static void
6365 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
6366 unsigned int min_input_precision)
6368 /* This operation in isolation only requires the inputs to have
6369 MIN_INPUT_PRECISION of precision, However, that doesn't mean
6370 that MIN_INPUT_PRECISION is a natural precision for the chain
6371 as a whole. E.g. consider something like:
6373 unsigned short *x, *y;
6374 *y = ((*x & 0xf0) >> 4) | (*y << 4);
6376 The right shift can be done on unsigned chars, and only requires the
6377 result of "*x & 0xf0" to be done on unsigned chars. But taking that
6378 approach would mean turning a natural chain of single-vector unsigned
6379 short operations into one that truncates "*x" and then extends
6380 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
6381 operation and one vector for each unsigned char operation.
6382 This would be a significant pessimization.
6384 Instead only propagate the maximum of this precision and the precision
6385 required by the users of the result. This means that we don't pessimize
6386 the case above but continue to optimize things like:
6388 unsigned char *y;
6389 unsigned short *x;
6390 *y = ((*x & 0xf0) >> 4) | (*y << 4);
6392 Here we would truncate two vectors of *x to a single vector of
6393 unsigned chars and use single-vector unsigned char operations for
6394 everything else, rather than doing two unsigned short copies of
6395 "(*x & 0xf0) >> 4" and then truncating the result. */
6396 min_input_precision = MAX (min_input_precision,
6397 stmt_info->min_output_precision);
6399 if (min_input_precision < TYPE_PRECISION (type)
6400 && (!stmt_info->min_input_precision
6401 || stmt_info->min_input_precision > min_input_precision))
6402 stmt_info->min_input_precision = min_input_precision;
6405 /* Subroutine of vect_determine_min_output_precision. Return true if
6406 we can calculate a reduced number of output bits for STMT_INFO,
6407 whose result is LHS. */
6409 static bool
6410 vect_determine_min_output_precision_1 (vec_info *vinfo,
6411 stmt_vec_info stmt_info, tree lhs)
6413 /* Take the maximum precision required by users of the result. */
6414 unsigned int precision = 0;
6415 imm_use_iterator iter;
6416 use_operand_p use;
6417 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
6419 gimple *use_stmt = USE_STMT (use);
6420 if (is_gimple_debug (use_stmt))
6421 continue;
6422 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
6423 if (!use_stmt_info || !use_stmt_info->min_input_precision)
6424 return false;
6425 /* The input precision recorded for COND_EXPRs applies only to the
6426 "then" and "else" values. */
6427 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
6428 if (assign
6429 && gimple_assign_rhs_code (assign) == COND_EXPR
6430 && use->use != gimple_assign_rhs2_ptr (assign)
6431 && use->use != gimple_assign_rhs3_ptr (assign))
6432 return false;
6433 precision = MAX (precision, use_stmt_info->min_input_precision);
6436 if (dump_enabled_p ())
6437 dump_printf_loc (MSG_NOTE, vect_location,
6438 "only the low %d bits of %T are significant\n",
6439 precision, lhs);
6440 stmt_info->min_output_precision = precision;
6441 return true;
6444 /* Calculate min_output_precision for STMT_INFO. */
6446 static void
6447 vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
6449 /* We're only interested in statements with a narrowable result. */
6450 tree lhs = gimple_get_lhs (stmt_info->stmt);
6451 if (!lhs
6452 || TREE_CODE (lhs) != SSA_NAME
6453 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
6454 return;
6456 if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
6457 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
6460 /* Use range information to decide whether STMT (described by STMT_INFO)
6461 could be done in a narrower type. This is effectively a forward
6462 propagation, since it uses context-independent information that applies
6463 to all users of an SSA name. */
6465 static void
6466 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
6468 tree lhs = gimple_assign_lhs (stmt);
6469 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
6470 return;
6472 tree type = TREE_TYPE (lhs);
6473 if (!vect_narrowable_type_p (type))
6474 return;
6476 /* First see whether we have any useful range information for the result. */
6477 unsigned int precision = TYPE_PRECISION (type);
6478 signop sign = TYPE_SIGN (type);
6479 wide_int min_value, max_value;
6480 if (!vect_get_range_info (lhs, &min_value, &max_value))
6481 return;
6483 tree_code code = gimple_assign_rhs_code (stmt);
6484 unsigned int nops = gimple_num_ops (stmt);
6486 if (!vect_truncatable_operation_p (code))
6488 /* Handle operations that can be computed in type T if all inputs
6489 and outputs can be represented in type T. Also handle left and
6490 right shifts, where (in addition) the maximum shift amount must
6491 be less than the number of bits in T. */
6492 bool is_shift;
6493 switch (code)
6495 case LSHIFT_EXPR:
6496 case RSHIFT_EXPR:
6497 is_shift = true;
6498 break;
6500 case ABS_EXPR:
6501 case MIN_EXPR:
6502 case MAX_EXPR:
6503 case TRUNC_DIV_EXPR:
6504 case CEIL_DIV_EXPR:
6505 case FLOOR_DIV_EXPR:
6506 case ROUND_DIV_EXPR:
6507 case EXACT_DIV_EXPR:
6508 /* Modulus is excluded because it is typically calculated by doing
6509 a division, for which minimum signed / -1 isn't representable in
6510 the original signed type. We could take the division range into
6511 account instead, if handling modulus ever becomes important. */
6512 is_shift = false;
6513 break;
6515 default:
6516 return;
6518 for (unsigned int i = 1; i < nops; ++i)
6520 tree op = gimple_op (stmt, i);
6521 wide_int op_min_value, op_max_value;
6522 if (TREE_CODE (op) == INTEGER_CST)
6524 unsigned int op_precision = TYPE_PRECISION (TREE_TYPE (op));
6525 op_min_value = op_max_value = wi::to_wide (op, op_precision);
6527 else if (TREE_CODE (op) == SSA_NAME)
6529 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
6530 return;
6532 else
6533 return;
6535 if (is_shift && i == 2)
6537 /* There needs to be one more bit than the maximum shift amount.
6539 If the maximum shift amount is already 1 less than PRECISION
6540 then we can't narrow the shift further. Dealing with that
6541 case first ensures that we can safely use an unsigned range
6542 below.
6544 op_min_value isn't relevant, since shifts by negative amounts
6545 are UB. */
6546 if (wi::geu_p (op_max_value, precision - 1))
6547 return;
6548 unsigned int min_bits = op_max_value.to_uhwi () + 1;
6550 /* As explained below, we can convert a signed shift into an
6551 unsigned shift if the sign bit is always clear. At this
6552 point we've already processed the ranges of the output and
6553 the first input. */
6554 auto op_sign = sign;
6555 if (sign == SIGNED && !wi::neg_p (min_value))
6556 op_sign = UNSIGNED;
6557 op_min_value = wide_int::from (wi::min_value (min_bits, op_sign),
6558 precision, op_sign);
6559 op_max_value = wide_int::from (wi::max_value (min_bits, op_sign),
6560 precision, op_sign);
6562 min_value = wi::min (min_value, op_min_value, sign);
6563 max_value = wi::max (max_value, op_max_value, sign);
6567 /* Try to switch signed types for unsigned types if we can.
6568 This is better for two reasons. First, unsigned ops tend
6569 to be cheaper than signed ops. Second, it means that we can
6570 handle things like:
6572 signed char c;
6573 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
6577 signed char c;
6578 unsigned short res_1 = (unsigned short) c & 0xff00;
6579 int res = (int) res_1;
6581 where the intermediate result res_1 has unsigned rather than
6582 signed type. */
6583 if (sign == SIGNED && !wi::neg_p (min_value))
6584 sign = UNSIGNED;
6586 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
6587 unsigned int precision1 = wi::min_precision (min_value, sign);
6588 unsigned int precision2 = wi::min_precision (max_value, sign);
6589 unsigned int value_precision = MAX (precision1, precision2);
6590 if (value_precision >= precision)
6591 return;
6593 if (dump_enabled_p ())
6594 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
6595 " without loss of precision: %G",
6596 sign == SIGNED ? "signed" : "unsigned",
6597 value_precision, (gimple *) stmt);
6599 vect_set_operation_type (stmt_info, type, value_precision, sign);
6600 vect_set_min_input_precision (stmt_info, type, value_precision);
6603 /* Use information about the users of STMT's result to decide whether
6604 STMT (described by STMT_INFO) could be done in a narrower type.
6605 This is effectively a backward propagation. */
6607 static void
6608 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
6610 tree_code code = gimple_assign_rhs_code (stmt);
6611 unsigned int opno = (code == COND_EXPR ? 2 : 1);
6612 tree type = TREE_TYPE (gimple_op (stmt, opno));
6613 if (!vect_narrowable_type_p (type))
6614 return;
6616 unsigned int precision = TYPE_PRECISION (type);
6617 unsigned int operation_precision, min_input_precision;
6618 switch (code)
6620 CASE_CONVERT:
6621 /* Only the bits that contribute to the output matter. Don't change
6622 the precision of the operation itself. */
6623 operation_precision = precision;
6624 min_input_precision = stmt_info->min_output_precision;
6625 break;
6627 case LSHIFT_EXPR:
6628 case RSHIFT_EXPR:
6630 tree shift = gimple_assign_rhs2 (stmt);
6631 if (TREE_CODE (shift) != INTEGER_CST
6632 || !wi::ltu_p (wi::to_widest (shift), precision))
6633 return;
6634 unsigned int const_shift = TREE_INT_CST_LOW (shift);
6635 if (code == LSHIFT_EXPR)
6637 /* Avoid creating an undefined shift.
6639 ??? We could instead use min_output_precision as-is and
6640 optimize out-of-range shifts to zero. However, only
6641 degenerate testcases shift away all their useful input data,
6642 and it isn't natural to drop input operations in the middle
6643 of vectorization. This sort of thing should really be
6644 handled before vectorization. */
6645 operation_precision = MAX (stmt_info->min_output_precision,
6646 const_shift + 1);
6647 /* We need CONST_SHIFT fewer bits of the input. */
6648 min_input_precision = (MAX (operation_precision, const_shift)
6649 - const_shift);
6651 else
6653 /* We need CONST_SHIFT extra bits to do the operation. */
6654 operation_precision = (stmt_info->min_output_precision
6655 + const_shift);
6656 min_input_precision = operation_precision;
6658 break;
6661 default:
6662 if (vect_truncatable_operation_p (code))
6664 /* Input bit N has no effect on output bits N-1 and lower. */
6665 operation_precision = stmt_info->min_output_precision;
6666 min_input_precision = operation_precision;
6667 break;
6669 return;
6672 if (operation_precision < precision)
6674 if (dump_enabled_p ())
6675 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
6676 " without affecting users: %G",
6677 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
6678 operation_precision, (gimple *) stmt);
6679 vect_set_operation_type (stmt_info, type, operation_precision,
6680 TYPE_SIGN (type));
6682 vect_set_min_input_precision (stmt_info, type, min_input_precision);
6685 /* Return true if the statement described by STMT_INFO sets a boolean
6686 SSA_NAME and if we know how to vectorize this kind of statement using
6687 vector mask types. */
6689 static bool
6690 possible_vector_mask_operation_p (stmt_vec_info stmt_info)
6692 tree lhs = gimple_get_lhs (stmt_info->stmt);
6693 tree_code code = ERROR_MARK;
6694 gassign *assign = NULL;
6695 gcond *cond = NULL;
6697 if ((assign = dyn_cast <gassign *> (stmt_info->stmt)))
6698 code = gimple_assign_rhs_code (assign);
6699 else if ((cond = dyn_cast <gcond *> (stmt_info->stmt)))
6701 lhs = gimple_cond_lhs (cond);
6702 code = gimple_cond_code (cond);
6705 if (!lhs
6706 || TREE_CODE (lhs) != SSA_NAME
6707 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
6708 return false;
6710 if (code != ERROR_MARK)
6712 switch (code)
6714 CASE_CONVERT:
6715 case SSA_NAME:
6716 case BIT_NOT_EXPR:
6717 case BIT_IOR_EXPR:
6718 case BIT_XOR_EXPR:
6719 case BIT_AND_EXPR:
6720 return true;
6722 default:
6723 return TREE_CODE_CLASS (code) == tcc_comparison;
6726 else if (is_a <gphi *> (stmt_info->stmt))
6727 return true;
6728 return false;
6731 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
6732 a vector mask type instead of a normal vector type. Record the
6733 result in STMT_INFO->mask_precision. */
6735 static void
6736 vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
6738 if (!possible_vector_mask_operation_p (stmt_info))
6739 return;
6741 /* If at least one boolean input uses a vector mask type,
6742 pick the mask type with the narrowest elements.
6744 ??? This is the traditional behavior. It should always produce
6745 the smallest number of operations, but isn't necessarily the
6746 optimal choice. For example, if we have:
6748 a = b & c
6750 where:
6752 - the user of a wants it to have a mask type for 16-bit elements (M16)
6753 - b also uses M16
6754 - c uses a mask type for 8-bit elements (M8)
6756 then picking M8 gives:
6758 - 1 M16->M8 pack for b
6759 - 1 M8 AND for a
6760 - 2 M8->M16 unpacks for the user of a
6762 whereas picking M16 would have given:
6764 - 2 M8->M16 unpacks for c
6765 - 2 M16 ANDs for a
6767 The number of operations are equal, but M16 would have given
6768 a shorter dependency chain and allowed more ILP. */
6769 unsigned int precision = ~0U;
6770 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6772 /* If the statement compares two values that shouldn't use vector masks,
6773 try comparing the values as normal scalars instead. */
6774 tree_code code = ERROR_MARK;
6775 tree op0_type;
6776 unsigned int nops = -1;
6777 unsigned int ops_start = 0;
6779 if (gassign *assign = dyn_cast <gassign *> (stmt))
6781 code = gimple_assign_rhs_code (assign);
6782 op0_type = TREE_TYPE (gimple_assign_rhs1 (assign));
6783 nops = gimple_num_ops (assign);
6784 ops_start = 1;
6786 else if (gcond *cond = dyn_cast <gcond *> (stmt))
6788 code = gimple_cond_code (cond);
6789 op0_type = TREE_TYPE (gimple_cond_lhs (cond));
6790 nops = 2;
6791 ops_start = 0;
6794 if (code != ERROR_MARK)
6796 for (unsigned int i = ops_start; i < nops; ++i)
6798 tree rhs = gimple_op (stmt, i);
6799 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
6800 continue;
6802 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
6803 if (!def_stmt_info)
6804 /* Don't let external or constant operands influence the choice.
6805 We can convert them to whichever vector type we pick. */
6806 continue;
6808 if (def_stmt_info->mask_precision)
6810 if (precision > def_stmt_info->mask_precision)
6811 precision = def_stmt_info->mask_precision;
6815 if (precision == ~0U
6816 && TREE_CODE_CLASS (code) == tcc_comparison)
6818 scalar_mode mode;
6819 tree vectype, mask_type;
6820 if (is_a <scalar_mode> (TYPE_MODE (op0_type), &mode)
6821 && (vectype = get_vectype_for_scalar_type (vinfo, op0_type))
6822 && (mask_type = get_mask_type_for_scalar_type (vinfo, op0_type))
6823 && expand_vec_cmp_expr_p (vectype, mask_type, code))
6824 precision = GET_MODE_BITSIZE (mode);
6827 else
6829 gphi *phi = as_a <gphi *> (stmt_info->stmt);
6830 for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
6832 tree rhs = gimple_phi_arg_def (phi, i);
6834 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
6835 if (!def_stmt_info)
6836 /* Don't let external or constant operands influence the choice.
6837 We can convert them to whichever vector type we pick. */
6838 continue;
6840 if (def_stmt_info->mask_precision)
6842 if (precision > def_stmt_info->mask_precision)
6843 precision = def_stmt_info->mask_precision;
6848 if (dump_enabled_p ())
6850 if (precision == ~0U)
6851 dump_printf_loc (MSG_NOTE, vect_location,
6852 "using normal nonmask vectors for %G",
6853 stmt_info->stmt);
6854 else
6855 dump_printf_loc (MSG_NOTE, vect_location,
6856 "using boolean precision %d for %G",
6857 precision, stmt_info->stmt);
6860 stmt_info->mask_precision = precision;
6863 /* Handle vect_determine_precisions for STMT_INFO, given that we
6864 have already done so for the users of its result. */
6866 void
6867 vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
6869 vect_determine_min_output_precision (vinfo, stmt_info);
6870 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
6872 vect_determine_precisions_from_range (stmt_info, stmt);
6873 vect_determine_precisions_from_users (stmt_info, stmt);
6877 /* Walk backwards through the vectorizable region to determine the
6878 values of these fields:
6880 - min_output_precision
6881 - min_input_precision
6882 - operation_precision
6883 - operation_sign. */
6885 void
6886 vect_determine_precisions (vec_info *vinfo)
6888 DUMP_VECT_SCOPE ("vect_determine_precisions");
6890 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
6892 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6893 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
6894 unsigned int nbbs = loop->num_nodes;
6896 for (unsigned int i = 0; i < nbbs; i++)
6898 basic_block bb = bbs[i];
6899 for (auto gsi = gsi_start_phis (bb);
6900 !gsi_end_p (gsi); gsi_next (&gsi))
6902 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6903 if (stmt_info)
6904 vect_determine_mask_precision (vinfo, stmt_info);
6906 for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
6907 if (!is_gimple_debug (gsi_stmt (si)))
6908 vect_determine_mask_precision
6909 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
6911 for (unsigned int i = 0; i < nbbs; i++)
6913 basic_block bb = bbs[nbbs - i - 1];
6914 for (gimple_stmt_iterator si = gsi_last_bb (bb);
6915 !gsi_end_p (si); gsi_prev (&si))
6916 if (!is_gimple_debug (gsi_stmt (si)))
6917 vect_determine_stmt_precisions
6918 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
6919 for (auto gsi = gsi_start_phis (bb);
6920 !gsi_end_p (gsi); gsi_next (&gsi))
6922 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6923 if (stmt_info)
6924 vect_determine_stmt_precisions (vinfo, stmt_info);
6928 else
6930 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
6931 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
6933 basic_block bb = bb_vinfo->bbs[i];
6934 for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6936 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6937 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6938 vect_determine_mask_precision (vinfo, stmt_info);
6940 for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6942 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
6943 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6944 vect_determine_mask_precision (vinfo, stmt_info);
6947 for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
6949 for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]);
6950 !gsi_end_p (gsi); gsi_prev (&gsi))
6952 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
6953 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6954 vect_determine_stmt_precisions (vinfo, stmt_info);
6956 for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]);
6957 !gsi_end_p (gsi); gsi_next (&gsi))
6959 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6960 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6961 vect_determine_stmt_precisions (vinfo, stmt_info);
6967 typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
6969 struct vect_recog_func
6971 vect_recog_func_ptr fn;
6972 const char *name;
6975 /* Note that ordering matters - the first pattern matching on a stmt is
6976 taken which means usually the more complex one needs to preceed the
6977 less comples onex (widen_sum only after dot_prod or sad for example). */
6978 static vect_recog_func vect_vect_recog_func_ptrs[] = {
6979 { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
6980 { vect_recog_bit_insert_pattern, "bit_insert" },
6981 { vect_recog_abd_pattern, "abd" },
6982 { vect_recog_over_widening_pattern, "over_widening" },
6983 /* Must come after over_widening, which narrows the shift as much as
6984 possible beforehand. */
6985 { vect_recog_average_pattern, "average" },
6986 { vect_recog_cond_expr_convert_pattern, "cond_expr_convert" },
6987 { vect_recog_mulhs_pattern, "mult_high" },
6988 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
6989 { vect_recog_widen_mult_pattern, "widen_mult" },
6990 { vect_recog_dot_prod_pattern, "dot_prod" },
6991 { vect_recog_sad_pattern, "sad" },
6992 { vect_recog_widen_sum_pattern, "widen_sum" },
6993 { vect_recog_pow_pattern, "pow" },
6994 { vect_recog_popcount_clz_ctz_ffs_pattern, "popcount_clz_ctz_ffs" },
6995 { vect_recog_ctz_ffs_pattern, "ctz_ffs" },
6996 { vect_recog_widen_shift_pattern, "widen_shift" },
6997 { vect_recog_rotate_pattern, "rotate" },
6998 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
6999 { vect_recog_divmod_pattern, "divmod" },
7000 { vect_recog_mult_pattern, "mult" },
7001 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
7002 { vect_recog_gcond_pattern, "gcond" },
7003 { vect_recog_bool_pattern, "bool" },
7004 /* This must come before mask conversion, and includes the parts
7005 of mask conversion that are needed for gather and scatter
7006 internal functions. */
7007 { vect_recog_gather_scatter_pattern, "gather_scatter" },
7008 { vect_recog_mask_conversion_pattern, "mask_conversion" },
7009 { vect_recog_widen_plus_pattern, "widen_plus" },
7010 { vect_recog_widen_minus_pattern, "widen_minus" },
7011 { vect_recog_widen_abd_pattern, "widen_abd" },
7012 /* These must come after the double widening ones. */
7015 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
7017 /* Mark statements that are involved in a pattern. */
7019 void
7020 vect_mark_pattern_stmts (vec_info *vinfo,
7021 stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
7022 tree pattern_vectype)
7024 stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
7025 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
7027 gimple *orig_pattern_stmt = NULL;
7028 if (is_pattern_stmt_p (orig_stmt_info))
7030 /* We're replacing a statement in an existing pattern definition
7031 sequence. */
7032 orig_pattern_stmt = orig_stmt_info->stmt;
7033 if (dump_enabled_p ())
7034 dump_printf_loc (MSG_NOTE, vect_location,
7035 "replacing earlier pattern %G", orig_pattern_stmt);
7037 /* To keep the book-keeping simple, just swap the lhs of the
7038 old and new statements, so that the old one has a valid but
7039 unused lhs. */
7040 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
7041 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
7042 gimple_set_lhs (pattern_stmt, old_lhs);
7044 if (dump_enabled_p ())
7045 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
7047 /* Switch to the statement that ORIG replaces. */
7048 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
7050 /* We shouldn't be replacing the main pattern statement. */
7051 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
7052 != orig_pattern_stmt);
7055 if (def_seq)
7056 for (gimple_stmt_iterator si = gsi_start (def_seq);
7057 !gsi_end_p (si); gsi_next (&si))
7059 if (dump_enabled_p ())
7060 dump_printf_loc (MSG_NOTE, vect_location,
7061 "extra pattern stmt: %G", gsi_stmt (si));
7062 stmt_vec_info pattern_stmt_info
7063 = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
7064 orig_stmt_info, pattern_vectype);
7065 /* Stmts in the def sequence are not vectorizable cycle or
7066 induction defs, instead they should all be vect_internal_def
7067 feeding the main pattern stmt which retains this def type. */
7068 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
7071 if (orig_pattern_stmt)
7073 vect_init_pattern_stmt (vinfo, pattern_stmt,
7074 orig_stmt_info, pattern_vectype);
7076 /* Insert all the new pattern statements before the original one. */
7077 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
7078 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
7079 orig_def_seq);
7080 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
7081 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
7083 /* Remove the pattern statement that this new pattern replaces. */
7084 gsi_remove (&gsi, false);
7086 else
7087 vect_set_pattern_stmt (vinfo,
7088 pattern_stmt, orig_stmt_info, pattern_vectype);
7090 /* For any conditionals mark them as vect_condition_def. */
7091 if (is_a <gcond *> (pattern_stmt))
7092 STMT_VINFO_DEF_TYPE (STMT_VINFO_RELATED_STMT (orig_stmt_info)) = vect_condition_def;
7094 /* Transfer reduction path info to the pattern. */
7095 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
7097 gimple_match_op op;
7098 if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op))
7099 gcc_unreachable ();
7100 tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)];
7101 /* Search the pattern def sequence and the main pattern stmt. Note
7102 we may have inserted all into a containing pattern def sequence
7103 so the following is a bit awkward. */
7104 gimple_stmt_iterator si;
7105 gimple *s;
7106 if (def_seq)
7108 si = gsi_start (def_seq);
7109 s = gsi_stmt (si);
7110 gsi_next (&si);
7112 else
7114 si = gsi_none ();
7115 s = pattern_stmt;
7119 bool found = false;
7120 if (gimple_extract_op (s, &op))
7121 for (unsigned i = 0; i < op.num_ops; ++i)
7122 if (op.ops[i] == lookfor)
7124 STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
7125 lookfor = gimple_get_lhs (s);
7126 found = true;
7127 break;
7129 if (s == pattern_stmt)
7131 if (!found && dump_enabled_p ())
7132 dump_printf_loc (MSG_NOTE, vect_location,
7133 "failed to update reduction index.\n");
7134 break;
7136 if (gsi_end_p (si))
7137 s = pattern_stmt;
7138 else
7140 s = gsi_stmt (si);
7141 if (s == pattern_stmt)
7142 /* Found the end inside a bigger pattern def seq. */
7143 si = gsi_none ();
7144 else
7145 gsi_next (&si);
7147 } while (1);
7151 /* Function vect_pattern_recog_1
7153 Input:
7154 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
7155 computation pattern.
7156 STMT_INFO: A stmt from which the pattern search should start.
7158 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
7159 a sequence of statements that has the same functionality and can be
7160 used to replace STMT_INFO. It returns the last statement in the sequence
7161 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
7162 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
7163 statement, having first checked that the target supports the new operation
7164 in that type.
7166 This function also does some bookkeeping, as explained in the documentation
7167 for vect_recog_pattern. */
7169 static void
7170 vect_pattern_recog_1 (vec_info *vinfo,
7171 vect_recog_func *recog_func, stmt_vec_info stmt_info)
7173 gimple *pattern_stmt;
7174 loop_vec_info loop_vinfo;
7175 tree pattern_vectype;
7177 /* If this statement has already been replaced with pattern statements,
7178 leave the original statement alone, since the first match wins.
7179 Instead try to match against the definition statements that feed
7180 the main pattern statement. */
7181 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7183 gimple_stmt_iterator gsi;
7184 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
7185 !gsi_end_p (gsi); gsi_next (&gsi))
7186 vect_pattern_recog_1 (vinfo, recog_func,
7187 vinfo->lookup_stmt (gsi_stmt (gsi)));
7188 return;
7191 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
7192 pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
7193 if (!pattern_stmt)
7195 /* Clear any half-formed pattern definition sequence. */
7196 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
7197 return;
7200 loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7202 /* Found a vectorizable pattern. */
7203 if (dump_enabled_p ())
7204 dump_printf_loc (MSG_NOTE, vect_location,
7205 "%s pattern recognized: %G",
7206 recog_func->name, pattern_stmt);
7208 /* Mark the stmts that are involved in the pattern. */
7209 vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
7211 /* Patterns cannot be vectorized using SLP, because they change the order of
7212 computation. */
7213 if (loop_vinfo)
7215 unsigned ix, ix2;
7216 stmt_vec_info *elem_ptr;
7217 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
7218 elem_ptr, *elem_ptr == stmt_info);
7223 /* Function vect_pattern_recog
7225 Input:
7226 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
7227 computation idioms.
7229 Output - for each computation idiom that is detected we create a new stmt
7230 that provides the same functionality and that can be vectorized. We
7231 also record some information in the struct_stmt_info of the relevant
7232 stmts, as explained below:
7234 At the entry to this function we have the following stmts, with the
7235 following initial value in the STMT_VINFO fields:
7237 stmt in_pattern_p related_stmt vec_stmt
7238 S1: a_i = .... - - -
7239 S2: a_2 = ..use(a_i).. - - -
7240 S3: a_1 = ..use(a_2).. - - -
7241 S4: a_0 = ..use(a_1).. - - -
7242 S5: ... = ..use(a_0).. - - -
7244 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
7245 represented by a single stmt. We then:
7246 - create a new stmt S6 equivalent to the pattern (the stmt is not
7247 inserted into the code)
7248 - fill in the STMT_VINFO fields as follows:
7250 in_pattern_p related_stmt vec_stmt
7251 S1: a_i = .... - - -
7252 S2: a_2 = ..use(a_i).. - - -
7253 S3: a_1 = ..use(a_2).. - - -
7254 S4: a_0 = ..use(a_1).. true S6 -
7255 '---> S6: a_new = .... - S4 -
7256 S5: ... = ..use(a_0).. - - -
7258 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
7259 to each other through the RELATED_STMT field).
7261 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
7262 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
7263 remain irrelevant unless used by stmts other than S4.
7265 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
7266 (because they are marked as irrelevant). It will vectorize S6, and record
7267 a pointer to the new vector stmt VS6 from S6 (as usual).
7268 S4 will be skipped, and S5 will be vectorized as usual:
7270 in_pattern_p related_stmt vec_stmt
7271 S1: a_i = .... - - -
7272 S2: a_2 = ..use(a_i).. - - -
7273 S3: a_1 = ..use(a_2).. - - -
7274 > VS6: va_new = .... - - -
7275 S4: a_0 = ..use(a_1).. true S6 VS6
7276 '---> S6: a_new = .... - S4 VS6
7277 > VS5: ... = ..vuse(va_new).. - - -
7278 S5: ... = ..use(a_0).. - - -
7280 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
7281 elsewhere), and we'll end up with:
7283 VS6: va_new = ....
7284 VS5: ... = ..vuse(va_new)..
7286 In case of more than one pattern statements, e.g., widen-mult with
7287 intermediate type:
7289 S1 a_t = ;
7290 S2 a_T = (TYPE) a_t;
7291 '--> S3: a_it = (interm_type) a_t;
7292 S4 prod_T = a_T * CONST;
7293 '--> S5: prod_T' = a_it w* CONST;
7295 there may be other users of a_T outside the pattern. In that case S2 will
7296 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
7297 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
7298 be recorded in S3. */
7300 void
7301 vect_pattern_recog (vec_info *vinfo)
7303 class loop *loop;
7304 basic_block *bbs;
7305 unsigned int nbbs;
7306 gimple_stmt_iterator si;
7307 unsigned int i, j;
7309 vect_determine_precisions (vinfo);
7311 DUMP_VECT_SCOPE ("vect_pattern_recog");
7313 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
7315 loop = LOOP_VINFO_LOOP (loop_vinfo);
7316 bbs = LOOP_VINFO_BBS (loop_vinfo);
7317 nbbs = loop->num_nodes;
7319 /* Scan through the loop stmts, applying the pattern recognition
7320 functions starting at each stmt visited: */
7321 for (i = 0; i < nbbs; i++)
7323 basic_block bb = bbs[i];
7324 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
7326 if (is_gimple_debug (gsi_stmt (si)))
7327 continue;
7328 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
7329 /* Scan over all generic vect_recog_xxx_pattern functions. */
7330 for (j = 0; j < NUM_PATTERNS; j++)
7331 vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j],
7332 stmt_info);
7336 else
7338 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
7339 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
7340 for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]);
7341 !gsi_end_p (gsi); gsi_next (&gsi))
7343 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (gsi));
7344 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
7345 continue;
7347 /* Scan over all generic vect_recog_xxx_pattern functions. */
7348 for (j = 0; j < NUM_PATTERNS; j++)
7349 vect_pattern_recog_1 (vinfo,
7350 &vect_vect_recog_func_ptrs[j], stmt_info);
7354 /* After this no more add_stmt calls are allowed. */
7355 vinfo->stmt_vec_info_ro = true;
7358 /* Build a GIMPLE_ASSIGN or GIMPLE_CALL with the tree_code,
7359 or internal_fn contained in ch, respectively. */
7360 gimple *
7361 vect_gimple_build (tree lhs, code_helper ch, tree op0, tree op1)
7363 gcc_assert (op0 != NULL_TREE);
7364 if (ch.is_tree_code ())
7365 return gimple_build_assign (lhs, (tree_code) ch, op0, op1);
7367 gcc_assert (ch.is_internal_fn ());
7368 gimple* stmt = gimple_build_call_internal (as_internal_fn ((combined_fn) ch),
7369 op1 == NULL_TREE ? 1 : 2,
7370 op0, op1);
7371 gimple_call_set_lhs (stmt, lhs);
7372 return stmt;