Update baseline symbols for hppa-linux.
[official-gcc.git] / gcc / tree-vect-patterns.cc
bloba2ed0365b18cce927e4ebca5d4c15abd820355d3
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2023 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "gimple-iterator.h"
29 #include "gimple-fold.h"
30 #include "ssa.h"
31 #include "expmed.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "tree-eh.h"
38 #include "gimplify.h"
39 #include "gimple-iterator.h"
40 #include "gimple-fold.h"
41 #include "gimplify-me.h"
42 #include "cfgloop.h"
43 #include "tree-vectorizer.h"
44 #include "dumpfile.h"
45 #include "builtins.h"
46 #include "internal-fn.h"
47 #include "case-cfn-macros.h"
48 #include "fold-const-call.h"
49 #include "attribs.h"
50 #include "cgraph.h"
51 #include "omp-simd-clone.h"
52 #include "predict.h"
53 #include "tree-vector-builder.h"
54 #include "vec-perm-indices.h"
55 #include "gimple-range.h"
58 /* TODO: Note the vectorizer still builds COND_EXPRs with GENERIC compares
59 in the first operand. Disentangling this is future work, the
60 IL is properly transfered to VEC_COND_EXPRs with separate compares. */
63 /* Return true if we have a useful VR_RANGE range for VAR, storing it
64 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
66 bool
67 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
69 value_range vr;
70 tree vr_min, vr_max;
71 get_range_query (cfun)->range_of_expr (vr, var);
72 if (vr.undefined_p ())
73 vr.set_varying (TREE_TYPE (var));
74 value_range_kind vr_type = get_legacy_range (vr, vr_min, vr_max);
75 *min_value = wi::to_wide (vr_min);
76 *max_value = wi::to_wide (vr_max);
77 wide_int nonzero = get_nonzero_bits (var);
78 signop sgn = TYPE_SIGN (TREE_TYPE (var));
79 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
80 nonzero, sgn) == VR_RANGE)
82 if (dump_enabled_p ())
84 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
85 dump_printf (MSG_NOTE, " has range [");
86 dump_hex (MSG_NOTE, *min_value);
87 dump_printf (MSG_NOTE, ", ");
88 dump_hex (MSG_NOTE, *max_value);
89 dump_printf (MSG_NOTE, "]\n");
91 return true;
93 else
95 if (dump_enabled_p ())
97 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
98 dump_printf (MSG_NOTE, " has no range info\n");
100 return false;
104 /* Report that we've found an instance of pattern PATTERN in
105 statement STMT. */
107 static void
108 vect_pattern_detected (const char *name, gimple *stmt)
110 if (dump_enabled_p ())
111 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
114 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
115 return the pattern statement's stmt_vec_info. Set its vector type to
116 VECTYPE if it doesn't have one already. */
118 static stmt_vec_info
119 vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
120 stmt_vec_info orig_stmt_info, tree vectype)
122 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
123 if (pattern_stmt_info == NULL)
124 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
125 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
127 pattern_stmt_info->pattern_stmt_p = true;
128 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
129 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
130 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
131 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
133 gcc_assert (!vectype
134 || (VECTOR_BOOLEAN_TYPE_P (vectype)
135 == vect_use_mask_type_p (orig_stmt_info)));
136 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
137 pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
139 return pattern_stmt_info;
142 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
143 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
144 have one already. */
146 static void
147 vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
148 stmt_vec_info orig_stmt_info, tree vectype)
150 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
151 STMT_VINFO_RELATED_STMT (orig_stmt_info)
152 = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
155 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
156 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
157 be different from the vector type of the final pattern statement.
158 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
159 from which it was derived. */
161 static inline void
162 append_pattern_def_seq (vec_info *vinfo,
163 stmt_vec_info stmt_info, gimple *new_stmt,
164 tree vectype = NULL_TREE,
165 tree scalar_type_for_mask = NULL_TREE)
167 gcc_assert (!scalar_type_for_mask
168 == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
169 if (vectype)
171 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
172 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
173 if (scalar_type_for_mask)
174 new_stmt_info->mask_precision
175 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
177 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
178 new_stmt);
181 /* The caller wants to perform new operations on vect_external variable
182 VAR, so that the result of the operations would also be vect_external.
183 Return the edge on which the operations can be performed, if one exists.
184 Return null if the operations should instead be treated as part of
185 the pattern that needs them. */
187 static edge
188 vect_get_external_def_edge (vec_info *vinfo, tree var)
190 edge e = NULL;
191 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
193 e = loop_preheader_edge (loop_vinfo->loop);
194 if (!SSA_NAME_IS_DEFAULT_DEF (var))
196 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
197 if (bb == NULL
198 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
199 e = NULL;
202 return e;
205 /* Return true if the target supports a vector version of CODE,
206 where CODE is known to map to a direct optab with the given SUBTYPE.
207 ITYPE specifies the type of (some of) the scalar inputs and OTYPE
208 specifies the type of the scalar result.
210 If CODE allows the inputs and outputs to have different type
211 (such as for WIDEN_SUM_EXPR), it is the input mode rather
212 than the output mode that determines the appropriate target pattern.
213 Operand 0 of the target pattern then specifies the mode that the output
214 must have.
216 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
217 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
218 is nonnull. */
220 static bool
221 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
222 tree itype, tree *vecotype_out,
223 tree *vecitype_out = NULL,
224 enum optab_subtype subtype = optab_default)
226 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
227 if (!vecitype)
228 return false;
230 tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
231 if (!vecotype)
232 return false;
234 optab optab = optab_for_tree_code (code, vecitype, subtype);
235 if (!optab)
236 return false;
238 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
239 if (icode == CODE_FOR_nothing
240 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
241 return false;
243 *vecotype_out = vecotype;
244 if (vecitype_out)
245 *vecitype_out = vecitype;
246 return true;
249 /* Round bit precision PRECISION up to a full element. */
251 static unsigned int
252 vect_element_precision (unsigned int precision)
254 precision = 1 << ceil_log2 (precision);
255 return MAX (precision, BITS_PER_UNIT);
258 /* If OP is defined by a statement that's being considered for vectorization,
259 return information about that statement, otherwise return NULL. */
261 static stmt_vec_info
262 vect_get_internal_def (vec_info *vinfo, tree op)
264 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
265 if (def_stmt_info
266 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
267 return def_stmt_info;
268 return NULL;
271 /* Check whether NAME, an ssa-name used in STMT_VINFO,
272 is a result of a type promotion, such that:
273 DEF_STMT: NAME = NOP (name0)
274 If CHECK_SIGN is TRUE, check that either both types are signed or both are
275 unsigned. */
277 static bool
278 type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
279 tree *orig_type, gimple **def_stmt, bool *promotion)
281 tree type = TREE_TYPE (name);
282 tree oprnd0;
283 enum vect_def_type dt;
285 stmt_vec_info def_stmt_info;
286 if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
287 return false;
289 if (dt != vect_internal_def
290 && dt != vect_external_def && dt != vect_constant_def)
291 return false;
293 if (!*def_stmt)
294 return false;
296 if (!is_gimple_assign (*def_stmt))
297 return false;
299 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
300 return false;
302 oprnd0 = gimple_assign_rhs1 (*def_stmt);
304 *orig_type = TREE_TYPE (oprnd0);
305 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
306 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
307 return false;
309 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
310 *promotion = true;
311 else
312 *promotion = false;
314 if (!vect_is_simple_use (oprnd0, vinfo, &dt))
315 return false;
317 return true;
320 /* Holds information about an input operand after some sign changes
321 and type promotions have been peeled away. */
322 class vect_unpromoted_value {
323 public:
324 vect_unpromoted_value ();
326 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
328 /* The value obtained after peeling away zero or more casts. */
329 tree op;
331 /* The type of OP. */
332 tree type;
334 /* The definition type of OP. */
335 vect_def_type dt;
337 /* If OP is the result of peeling at least one cast, and if the cast
338 of OP itself is a vectorizable statement, CASTER identifies that
339 statement, otherwise it is null. */
340 stmt_vec_info caster;
343 inline vect_unpromoted_value::vect_unpromoted_value ()
344 : op (NULL_TREE),
345 type (NULL_TREE),
346 dt (vect_uninitialized_def),
347 caster (NULL)
351 /* Set the operand to OP_IN, its definition type to DT_IN, and the
352 statement that casts it to CASTER_IN. */
354 inline void
355 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
356 stmt_vec_info caster_in)
358 op = op_in;
359 type = TREE_TYPE (op);
360 dt = dt_in;
361 caster = caster_in;
364 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
365 to reach some vectorizable inner operand OP', continuing as long as it
366 is possible to convert OP' back to OP using a possible sign change
367 followed by a possible promotion P. Return this OP', or null if OP is
368 not a vectorizable SSA name. If there is a promotion P, describe its
369 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
370 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
371 have more than one user.
373 A successful return means that it is possible to go from OP' to OP
374 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
375 whereas the cast from UNPROM to OP might be a promotion, a sign
376 change, or a nop.
378 E.g. say we have:
380 signed short *ptr = ...;
381 signed short C = *ptr;
382 unsigned short B = (unsigned short) C; // sign change
383 signed int A = (signed int) B; // unsigned promotion
384 ...possible other uses of A...
385 unsigned int OP = (unsigned int) A; // sign change
387 In this case it's possible to go directly from C to OP using:
389 OP = (unsigned int) (unsigned short) C;
390 +------------+ +--------------+
391 promotion sign change
393 so OP' would be C. The input to the promotion is B, so UNPROM
394 would describe B. */
396 static tree
397 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
398 vect_unpromoted_value *unprom,
399 bool *single_use_p = NULL)
401 tree op_type = TREE_TYPE (op);
402 if (!INTEGRAL_TYPE_P (op_type))
403 return NULL_TREE;
405 tree res = NULL_TREE;
406 unsigned int orig_precision = TYPE_PRECISION (op_type);
407 unsigned int min_precision = orig_precision;
408 stmt_vec_info caster = NULL;
409 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
411 /* See whether OP is simple enough to vectorize. */
412 stmt_vec_info def_stmt_info;
413 gimple *def_stmt;
414 vect_def_type dt;
415 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
416 break;
418 /* If OP is the input of a demotion, skip over it to see whether
419 OP is itself the result of a promotion. If so, the combined
420 effect of the promotion and the demotion might fit the required
421 pattern, otherwise neither operation fits.
423 This copes with cases such as the result of an arithmetic
424 operation being truncated before being stored, and where that
425 arithmetic operation has been recognized as an over-widened one. */
426 if (TYPE_PRECISION (op_type) <= min_precision)
428 /* Use OP as the UNPROM described above if we haven't yet
429 found a promotion, or if using the new input preserves the
430 sign of the previous promotion. */
431 if (!res
432 || TYPE_PRECISION (unprom->type) == orig_precision
433 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
435 unprom->set_op (op, dt, caster);
436 min_precision = TYPE_PRECISION (op_type);
438 /* Stop if we've already seen a promotion and if this
439 conversion does more than change the sign. */
440 else if (TYPE_PRECISION (op_type)
441 != TYPE_PRECISION (unprom->type))
442 break;
444 /* The sequence now extends to OP. */
445 res = op;
448 /* See whether OP is defined by a cast. Record it as CASTER if
449 the cast is potentially vectorizable. */
450 if (!def_stmt)
451 break;
452 caster = def_stmt_info;
454 /* Ignore pattern statements, since we don't link uses for them. */
455 if (caster
456 && single_use_p
457 && !STMT_VINFO_RELATED_STMT (caster)
458 && !has_single_use (res))
459 *single_use_p = false;
461 gassign *assign = dyn_cast <gassign *> (def_stmt);
462 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
463 break;
465 /* Continue with the input to the cast. */
466 op = gimple_assign_rhs1 (def_stmt);
467 op_type = TREE_TYPE (op);
469 return res;
472 /* OP is an integer operand to an operation that returns TYPE, and we
473 want to treat the operation as a widening one. So far we can treat
474 it as widening from *COMMON_TYPE.
476 Return true if OP is suitable for such a widening operation,
477 either widening from *COMMON_TYPE or from some supertype of it.
478 Update *COMMON_TYPE to the supertype in the latter case.
480 SHIFT_P is true if OP is a shift amount. */
482 static bool
483 vect_joust_widened_integer (tree type, bool shift_p, tree op,
484 tree *common_type)
486 /* Calculate the minimum precision required by OP, without changing
487 the sign of either operand. */
488 unsigned int precision;
489 if (shift_p)
491 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
492 return false;
493 precision = TREE_INT_CST_LOW (op);
495 else
497 precision = wi::min_precision (wi::to_widest (op),
498 TYPE_SIGN (*common_type));
499 if (precision * 2 > TYPE_PRECISION (type))
500 return false;
503 /* If OP requires a wider type, switch to that type. The checks
504 above ensure that this is still narrower than the result. */
505 precision = vect_element_precision (precision);
506 if (TYPE_PRECISION (*common_type) < precision)
507 *common_type = build_nonstandard_integer_type
508 (precision, TYPE_UNSIGNED (*common_type));
509 return true;
512 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
513 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
515 static bool
516 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
518 if (types_compatible_p (*common_type, new_type))
519 return true;
521 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
522 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
523 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
524 return true;
526 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
527 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
528 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
530 *common_type = new_type;
531 return true;
534 /* We have mismatched signs, with the signed type being
535 no wider than the unsigned type. In this case we need
536 a wider signed type. */
537 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
538 TYPE_PRECISION (new_type));
539 precision *= 2;
541 if (precision * 2 > TYPE_PRECISION (type))
542 return false;
544 *common_type = build_nonstandard_integer_type (precision, false);
545 return true;
548 /* Check whether STMT_INFO can be viewed as a tree of integer operations
549 in which each node either performs CODE or WIDENED_CODE, and where
550 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
551 specifies the maximum number of leaf operands. SHIFT_P says whether
552 CODE and WIDENED_CODE are some sort of shift.
554 If STMT_INFO is such a tree, return the number of leaf operands
555 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
556 to a type that (a) is narrower than the result of STMT_INFO and
557 (b) can hold all leaf operand values.
559 If SUBTYPE then allow that the signs of the operands
560 may differ in signs but not in precision. SUBTYPE is updated to reflect
561 this.
563 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
564 exists. */
566 static unsigned int
567 vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
568 code_helper widened_code, bool shift_p,
569 unsigned int max_nops,
570 vect_unpromoted_value *unprom, tree *common_type,
571 enum optab_subtype *subtype = NULL)
573 /* Check for an integer operation with the right code. */
574 gimple* stmt = stmt_info->stmt;
575 if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
576 return 0;
578 code_helper rhs_code;
579 if (is_gimple_assign (stmt))
580 rhs_code = gimple_assign_rhs_code (stmt);
581 else if (is_gimple_call (stmt))
582 rhs_code = gimple_call_combined_fn (stmt);
583 else
584 return 0;
586 if (rhs_code != code
587 && rhs_code != widened_code)
588 return 0;
590 tree lhs = gimple_get_lhs (stmt);
591 tree type = TREE_TYPE (lhs);
592 if (!INTEGRAL_TYPE_P (type))
593 return 0;
595 /* Assume that both operands will be leaf operands. */
596 max_nops -= 2;
598 /* Check the operands. */
599 unsigned int next_op = 0;
600 for (unsigned int i = 0; i < 2; ++i)
602 vect_unpromoted_value *this_unprom = &unprom[next_op];
603 unsigned int nops = 1;
604 tree op = gimple_arg (stmt, i);
605 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
607 /* We already have a common type from earlier operands.
608 Update it to account for OP. */
609 this_unprom->set_op (op, vect_constant_def);
610 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
611 return 0;
613 else
615 /* Only allow shifts by constants. */
616 if (shift_p && i == 1)
617 return 0;
619 if (rhs_code != code)
621 /* If rhs_code is widened_code, don't look through further
622 possible promotions, there is a promotion already embedded
623 in the WIDEN_*_EXPR. */
624 if (TREE_CODE (op) != SSA_NAME
625 || !INTEGRAL_TYPE_P (TREE_TYPE (op)))
626 return 0;
628 stmt_vec_info def_stmt_info;
629 gimple *def_stmt;
630 vect_def_type dt;
631 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info,
632 &def_stmt))
633 return 0;
634 this_unprom->set_op (op, dt, NULL);
636 else if (!vect_look_through_possible_promotion (vinfo, op,
637 this_unprom))
638 return 0;
640 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
642 /* The operand isn't widened. If STMT_INFO has the code
643 for an unwidened operation, recursively check whether
644 this operand is a node of the tree. */
645 if (rhs_code != code
646 || max_nops == 0
647 || this_unprom->dt != vect_internal_def)
648 return 0;
650 /* Give back the leaf slot allocated above now that we're
651 not treating this as a leaf operand. */
652 max_nops += 1;
654 /* Recursively process the definition of the operand. */
655 stmt_vec_info def_stmt_info
656 = vinfo->lookup_def (this_unprom->op);
657 nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
658 widened_code, shift_p, max_nops,
659 this_unprom, common_type,
660 subtype);
661 if (nops == 0)
662 return 0;
664 max_nops -= nops;
666 else
668 /* Make sure that the operand is narrower than the result. */
669 if (TYPE_PRECISION (this_unprom->type) * 2
670 > TYPE_PRECISION (type))
671 return 0;
673 /* Update COMMON_TYPE for the new operand. */
674 if (i == 0)
675 *common_type = this_unprom->type;
676 else if (!vect_joust_widened_type (type, this_unprom->type,
677 common_type))
679 if (subtype)
681 /* See if we can sign extend the smaller type. */
682 if (TYPE_PRECISION (this_unprom->type)
683 > TYPE_PRECISION (*common_type))
684 *common_type = this_unprom->type;
685 *subtype = optab_vector_mixed_sign;
687 else
688 return 0;
692 next_op += nops;
694 return next_op;
697 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
698 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
700 static tree
701 vect_recog_temp_ssa_var (tree type, gimple *stmt = NULL)
703 return make_temp_ssa_name (type, stmt, "patt");
706 /* STMT2_INFO describes a type conversion that could be split into STMT1
707 followed by a version of STMT2_INFO that takes NEW_RHS as its first
708 input. Try to do this using pattern statements, returning true on
709 success. */
711 static bool
712 vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
713 gimple *stmt1, tree vectype)
715 if (is_pattern_stmt_p (stmt2_info))
717 /* STMT2_INFO is part of a pattern. Get the statement to which
718 the pattern is attached. */
719 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
720 vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
722 if (dump_enabled_p ())
723 dump_printf_loc (MSG_NOTE, vect_location,
724 "Splitting pattern statement: %G", stmt2_info->stmt);
726 /* Since STMT2_INFO is a pattern statement, we can change it
727 in-situ without worrying about changing the code for the
728 containing block. */
729 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
731 if (dump_enabled_p ())
733 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
734 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
735 stmt2_info->stmt);
738 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
739 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
740 /* STMT2_INFO is the actual pattern statement. Add STMT1
741 to the end of the definition sequence. */
742 gimple_seq_add_stmt_without_update (def_seq, stmt1);
743 else
745 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
746 before it. */
747 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
748 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
750 return true;
752 else
754 /* STMT2_INFO doesn't yet have a pattern. Try to create a
755 two-statement pattern now. */
756 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
757 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
758 tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
759 if (!lhs_vectype)
760 return false;
762 if (dump_enabled_p ())
763 dump_printf_loc (MSG_NOTE, vect_location,
764 "Splitting statement: %G", stmt2_info->stmt);
766 /* Add STMT1 as a singleton pattern definition sequence. */
767 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
768 vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
769 gimple_seq_add_stmt_without_update (def_seq, stmt1);
771 /* Build the second of the two pattern statements. */
772 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
773 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
774 vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
776 if (dump_enabled_p ())
778 dump_printf_loc (MSG_NOTE, vect_location,
779 "into pattern statements: %G", stmt1);
780 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
781 (gimple *) new_stmt2);
784 return true;
788 /* Look for the following pattern
789 X = x[i]
790 Y = y[i]
791 DIFF = X - Y
792 DAD = ABS_EXPR<DIFF>
794 ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
795 HALF_TYPE and UNPROM will be set should the statement be found to
796 be a widened operation.
797 DIFF_STMT will be set to the MINUS_EXPR
798 statement that precedes the ABS_STMT unless vect_widened_op_tree
799 succeeds.
801 static bool
802 vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
803 tree *half_type,
804 vect_unpromoted_value unprom[2],
805 gassign **diff_stmt)
807 if (!abs_stmt)
808 return false;
810 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
811 inside the loop (in case we are analyzing an outer-loop). */
812 enum tree_code code = gimple_assign_rhs_code (abs_stmt);
813 if (code != ABS_EXPR && code != ABSU_EXPR)
814 return false;
816 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
817 tree abs_type = TREE_TYPE (abs_oprnd);
818 if (!abs_oprnd)
819 return false;
820 if (!ANY_INTEGRAL_TYPE_P (abs_type)
821 || TYPE_OVERFLOW_WRAPS (abs_type)
822 || TYPE_UNSIGNED (abs_type))
823 return false;
825 /* Peel off conversions from the ABS input. This can involve sign
826 changes (e.g. from an unsigned subtraction to a signed ABS input)
827 or signed promotion, but it can't include unsigned promotion.
828 (Note that ABS of an unsigned promotion should have been folded
829 away before now anyway.) */
830 vect_unpromoted_value unprom_diff;
831 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
832 &unprom_diff);
833 if (!abs_oprnd)
834 return false;
835 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
836 && TYPE_UNSIGNED (unprom_diff.type))
837 return false;
839 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
840 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
841 if (!diff_stmt_vinfo)
842 return false;
844 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
845 inside the loop (in case we are analyzing an outer-loop). */
846 if (vect_widened_op_tree (vinfo, diff_stmt_vinfo,
847 MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
848 false, 2, unprom, half_type))
849 return true;
851 /* Failed to find a widen operation so we check for a regular MINUS_EXPR. */
852 gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
853 if (diff_stmt && diff
854 && gimple_assign_rhs_code (diff) == MINUS_EXPR
855 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd)))
857 *diff_stmt = diff;
858 *half_type = NULL_TREE;
859 return true;
862 return false;
865 /* Convert UNPROM to TYPE and return the result, adding new statements
866 to STMT_INFO's pattern definition statements if no better way is
867 available. VECTYPE is the vector form of TYPE.
869 If SUBTYPE then convert the type based on the subtype. */
871 static tree
872 vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
873 vect_unpromoted_value *unprom, tree vectype,
874 enum optab_subtype subtype = optab_default)
876 /* Update the type if the signs differ. */
877 if (subtype == optab_vector_mixed_sign)
879 gcc_assert (!TYPE_UNSIGNED (type));
880 if (TYPE_UNSIGNED (TREE_TYPE (unprom->op)))
882 type = unsigned_type_for (type);
883 vectype = unsigned_type_for (vectype);
887 /* Check for a no-op conversion. */
888 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
889 return unprom->op;
891 /* Allow the caller to create constant vect_unpromoted_values. */
892 if (TREE_CODE (unprom->op) == INTEGER_CST)
893 return wide_int_to_tree (type, wi::to_widest (unprom->op));
895 tree input = unprom->op;
896 if (unprom->caster)
898 tree lhs = gimple_get_lhs (unprom->caster->stmt);
899 tree lhs_type = TREE_TYPE (lhs);
901 /* If the result of the existing cast is the right width, use it
902 instead of the source of the cast. */
903 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
904 input = lhs;
905 /* If the precision we want is between the source and result
906 precisions of the existing cast, try splitting the cast into
907 two and tapping into a mid-way point. */
908 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
909 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
911 /* In order to preserve the semantics of the original cast,
912 give the mid-way point the same signedness as the input value.
914 It would be possible to use a signed type here instead if
915 TYPE is signed and UNPROM->TYPE is unsigned, but that would
916 make the sign of the midtype sensitive to the order in
917 which we process the statements, since the signedness of
918 TYPE is the signedness required by just one of possibly
919 many users. Also, unsigned promotions are usually as cheap
920 as or cheaper than signed ones, so it's better to keep an
921 unsigned promotion. */
922 tree midtype = build_nonstandard_integer_type
923 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
924 tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
925 if (vec_midtype)
927 input = vect_recog_temp_ssa_var (midtype, NULL);
928 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
929 unprom->op);
930 if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
931 vec_midtype))
932 append_pattern_def_seq (vinfo, stmt_info,
933 new_stmt, vec_midtype);
937 /* See if we can reuse an existing result. */
938 if (types_compatible_p (type, TREE_TYPE (input)))
939 return input;
942 /* We need a new conversion statement. */
943 tree new_op = vect_recog_temp_ssa_var (type, NULL);
944 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
946 /* If OP is an external value, see if we can insert the new statement
947 on an incoming edge. */
948 if (input == unprom->op && unprom->dt == vect_external_def)
949 if (edge e = vect_get_external_def_edge (vinfo, input))
951 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
952 gcc_assert (!new_bb);
953 return new_op;
956 /* As a (common) last resort, add the statement to the pattern itself. */
957 append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
958 return new_op;
961 /* Invoke vect_convert_input for N elements of UNPROM and store the
962 result in the corresponding elements of RESULT.
964 If SUBTYPE then convert the type based on the subtype. */
966 static void
967 vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
968 tree *result, tree type, vect_unpromoted_value *unprom,
969 tree vectype, enum optab_subtype subtype = optab_default)
971 for (unsigned int i = 0; i < n; ++i)
973 unsigned int j;
974 for (j = 0; j < i; ++j)
975 if (unprom[j].op == unprom[i].op)
976 break;
978 if (j < i)
979 result[i] = result[j];
980 else
981 result[i] = vect_convert_input (vinfo, stmt_info,
982 type, &unprom[i], vectype, subtype);
986 /* The caller has created a (possibly empty) sequence of pattern definition
987 statements followed by a single statement PATTERN_STMT. Cast the result
988 of this final statement to TYPE. If a new statement is needed, add
989 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
990 and return the new statement, otherwise return PATTERN_STMT as-is.
991 VECITYPE is the vector form of PATTERN_STMT's result type. */
993 static gimple *
994 vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
995 gimple *pattern_stmt, tree vecitype)
997 tree lhs = gimple_get_lhs (pattern_stmt);
998 if (!types_compatible_p (type, TREE_TYPE (lhs)))
1000 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
1001 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
1002 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
1004 return pattern_stmt;
1007 /* Return true if STMT_VINFO describes a reduction for which reassociation
1008 is allowed. If STMT_INFO is part of a group, assume that it's part of
1009 a reduction chain and optimistically assume that all statements
1010 except the last allow reassociation.
1011 Also require it to have code CODE and to be a reduction
1012 in the outermost loop. When returning true, store the operands in
1013 *OP0_OUT and *OP1_OUT. */
1015 static bool
1016 vect_reassociating_reduction_p (vec_info *vinfo,
1017 stmt_vec_info stmt_info, tree_code code,
1018 tree *op0_out, tree *op1_out)
1020 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
1021 if (!loop_info)
1022 return false;
1024 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
1025 if (!assign || gimple_assign_rhs_code (assign) != code)
1026 return false;
1028 /* We don't allow changing the order of the computation in the inner-loop
1029 when doing outer-loop vectorization. */
1030 class loop *loop = LOOP_VINFO_LOOP (loop_info);
1031 if (loop && nested_in_vect_loop_p (loop, stmt_info))
1032 return false;
1034 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
1036 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
1037 code))
1038 return false;
1040 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
1041 return false;
1043 *op0_out = gimple_assign_rhs1 (assign);
1044 *op1_out = gimple_assign_rhs2 (assign);
1045 if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
1046 std::swap (*op0_out, *op1_out);
1047 return true;
1050 /* match.pd function to match
1051 (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
1052 with conditions:
1053 1) @1, @2, c, d, a, b are all integral type.
1054 2) There's single_use for both @1 and @2.
1055 3) a, c have same precision.
1056 4) c and @1 have different precision.
1057 5) c, d are the same type or they can differ in sign when convert is
1058 truncation.
1060 record a and c and d and @3. */
1062 extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree));
1064 /* Function vect_recog_cond_expr_convert
1066 Try to find the following pattern:
1068 TYPE_AB A,B;
1069 TYPE_CD C,D;
1070 TYPE_E E;
1071 TYPE_E op_true = (TYPE_E) A;
1072 TYPE_E op_false = (TYPE_E) B;
1074 E = C cmp D ? op_true : op_false;
1076 where
1077 TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
1078 TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
1079 single_use of op_true and op_false.
1080 TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
1082 Input:
1084 * STMT_VINFO: The stmt from which the pattern search begins.
1085 here it starts with E = c cmp D ? op_true : op_false;
1087 Output:
1089 TYPE1 E' = C cmp D ? A : B;
1090 TYPE3 E = (TYPE3) E';
1092 There may extra nop_convert for A or B to handle different signness.
1094 * TYPE_OUT: The vector type of the output of this pattern.
1096 * Return value: A new stmt that will be used to replace the sequence of
1097 stmts that constitute the pattern. In this case it will be:
1098 E = (TYPE3)E';
1099 E' = C cmp D ? A : B; is recorded in pattern definition statements; */
1101 static gimple *
1102 vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
1103 stmt_vec_info stmt_vinfo, tree *type_out)
1105 gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
1106 tree lhs, match[4], temp, type, new_lhs, op2;
1107 gimple *cond_stmt;
1108 gimple *pattern_stmt;
1110 if (!last_stmt)
1111 return NULL;
1113 lhs = gimple_assign_lhs (last_stmt);
1115 /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
1116 TYPE_PRECISION (A) == TYPE_PRECISION (C). */
1117 if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
1118 return NULL;
1120 vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
1122 op2 = match[2];
1123 type = TREE_TYPE (match[1]);
1124 if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
1126 op2 = vect_recog_temp_ssa_var (type, NULL);
1127 gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
1128 append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt,
1129 get_vectype_for_scalar_type (vinfo, type));
1132 temp = vect_recog_temp_ssa_var (type, NULL);
1133 cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
1134 match[1], op2));
1135 append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt,
1136 get_vectype_for_scalar_type (vinfo, type));
1137 new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
1138 pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp);
1139 *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
1141 if (dump_enabled_p ())
1142 dump_printf_loc (MSG_NOTE, vect_location,
1143 "created pattern stmt: %G", pattern_stmt);
1144 return pattern_stmt;
1147 /* Function vect_recog_dot_prod_pattern
1149 Try to find the following pattern:
1151 type1a x_t
1152 type1b y_t;
1153 TYPE1 prod;
1154 TYPE2 sum = init;
1155 loop:
1156 sum_0 = phi <init, sum_1>
1157 S1 x_t = ...
1158 S2 y_t = ...
1159 S3 x_T = (TYPE1) x_t;
1160 S4 y_T = (TYPE1) y_t;
1161 S5 prod = x_T * y_T;
1162 [S6 prod = (TYPE2) prod; #optional]
1163 S7 sum_1 = prod + sum_0;
1165 where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
1166 the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
1167 'type1a' and 'type1b' can differ.
1169 Input:
1171 * STMT_VINFO: The stmt from which the pattern search begins. In the
1172 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
1173 will be detected.
1175 Output:
1177 * TYPE_OUT: The type of the output of this pattern.
1179 * Return value: A new stmt that will be used to replace the sequence of
1180 stmts that constitute the pattern. In this case it will be:
1181 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
1183 Note: The dot-prod idiom is a widening reduction pattern that is
1184 vectorized without preserving all the intermediate results. It
1185 produces only N/2 (widened) results (by summing up pairs of
1186 intermediate results) rather than all N results. Therefore, we
1187 cannot allow this pattern when we want to get all the results and in
1188 the correct order (as is the case when this computation is in an
1189 inner-loop nested in an outer-loop that us being vectorized). */
1191 static gimple *
1192 vect_recog_dot_prod_pattern (vec_info *vinfo,
1193 stmt_vec_info stmt_vinfo, tree *type_out)
1195 tree oprnd0, oprnd1;
1196 gimple *last_stmt = stmt_vinfo->stmt;
1197 tree type, half_type;
1198 gimple *pattern_stmt;
1199 tree var;
1201 /* Look for the following pattern
1202 DX = (TYPE1) X;
1203 DY = (TYPE1) Y;
1204 DPROD = DX * DY;
1205 DDPROD = (TYPE2) DPROD;
1206 sum_1 = DDPROD + sum_0;
1207 In which
1208 - DX is double the size of X
1209 - DY is double the size of Y
1210 - DX, DY, DPROD all have the same type but the sign
1211 between X, Y and DPROD can differ.
1212 - sum is the same size of DPROD or bigger
1213 - sum has been recognized as a reduction variable.
1215 This is equivalent to:
1216 DPROD = X w* Y; #widen mult
1217 sum_1 = DPROD w+ sum_0; #widen summation
1219 DPROD = X w* Y; #widen mult
1220 sum_1 = DPROD + sum_0; #summation
1223 /* Starting from LAST_STMT, follow the defs of its uses in search
1224 of the above pattern. */
1226 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1227 &oprnd0, &oprnd1))
1228 return NULL;
1230 type = TREE_TYPE (gimple_get_lhs (last_stmt));
1232 vect_unpromoted_value unprom_mult;
1233 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
1235 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1236 we know that oprnd1 is the reduction variable (defined by a loop-header
1237 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1238 Left to check that oprnd0 is defined by a (widen_)mult_expr */
1239 if (!oprnd0)
1240 return NULL;
1242 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
1243 if (!mult_vinfo)
1244 return NULL;
1246 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1247 inside the loop (in case we are analyzing an outer-loop). */
1248 vect_unpromoted_value unprom0[2];
1249 enum optab_subtype subtype = optab_vector;
1250 if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
1251 false, 2, unprom0, &half_type, &subtype))
1252 return NULL;
1254 /* If there are two widening operations, make sure they agree on the sign
1255 of the extension. The result of an optab_vector_mixed_sign operation
1256 is signed; otherwise, the result has the same sign as the operands. */
1257 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
1258 && (subtype == optab_vector_mixed_sign
1259 ? TYPE_UNSIGNED (unprom_mult.type)
1260 : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
1261 return NULL;
1263 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
1265 /* If the inputs have mixed signs, canonicalize on using the signed
1266 input type for analysis. This also helps when emulating mixed-sign
1267 operations using signed operations. */
1268 if (subtype == optab_vector_mixed_sign)
1269 half_type = signed_type_for (half_type);
1271 tree half_vectype;
1272 if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1273 type_out, &half_vectype, subtype))
1275 /* We can emulate a mixed-sign dot-product using a sequence of
1276 signed dot-products; see vect_emulate_mixed_dot_prod for details. */
1277 if (subtype != optab_vector_mixed_sign
1278 || !vect_supportable_direct_optab_p (vinfo, signed_type_for (type),
1279 DOT_PROD_EXPR, half_type,
1280 type_out, &half_vectype,
1281 optab_vector))
1282 return NULL;
1284 *type_out = signed_or_unsigned_type_for (TYPE_UNSIGNED (type),
1285 *type_out);
1288 /* Get the inputs in the appropriate types. */
1289 tree mult_oprnd[2];
1290 vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
1291 unprom0, half_vectype, subtype);
1293 var = vect_recog_temp_ssa_var (type, NULL);
1294 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1295 mult_oprnd[0], mult_oprnd[1], oprnd1);
1297 return pattern_stmt;
1301 /* Function vect_recog_sad_pattern
1303 Try to find the following Sum of Absolute Difference (SAD) pattern:
1305 type x_t, y_t;
1306 signed TYPE1 diff, abs_diff;
1307 TYPE2 sum = init;
1308 loop:
1309 sum_0 = phi <init, sum_1>
1310 S1 x_t = ...
1311 S2 y_t = ...
1312 S3 x_T = (TYPE1) x_t;
1313 S4 y_T = (TYPE1) y_t;
1314 S5 diff = x_T - y_T;
1315 S6 abs_diff = ABS_EXPR <diff>;
1316 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1317 S8 sum_1 = abs_diff + sum_0;
1319 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1320 same size of 'TYPE1' or bigger. This is a special case of a reduction
1321 computation.
1323 Input:
1325 * STMT_VINFO: The stmt from which the pattern search begins. In the
1326 example, when this function is called with S8, the pattern
1327 {S3,S4,S5,S6,S7,S8} will be detected.
1329 Output:
1331 * TYPE_OUT: The type of the output of this pattern.
1333 * Return value: A new stmt that will be used to replace the sequence of
1334 stmts that constitute the pattern. In this case it will be:
1335 SAD_EXPR <x_t, y_t, sum_0>
1338 static gimple *
1339 vect_recog_sad_pattern (vec_info *vinfo,
1340 stmt_vec_info stmt_vinfo, tree *type_out)
1342 gimple *last_stmt = stmt_vinfo->stmt;
1343 tree half_type;
1345 /* Look for the following pattern
1346 DX = (TYPE1) X;
1347 DY = (TYPE1) Y;
1348 DDIFF = DX - DY;
1349 DAD = ABS_EXPR <DDIFF>;
1350 DDPROD = (TYPE2) DPROD;
1351 sum_1 = DAD + sum_0;
1352 In which
1353 - DX is at least double the size of X
1354 - DY is at least double the size of Y
1355 - DX, DY, DDIFF, DAD all have the same type
1356 - sum is the same size of DAD or bigger
1357 - sum has been recognized as a reduction variable.
1359 This is equivalent to:
1360 DDIFF = X w- Y; #widen sub
1361 DAD = ABS_EXPR <DDIFF>;
1362 sum_1 = DAD w+ sum_0; #widen summation
1364 DDIFF = X w- Y; #widen sub
1365 DAD = ABS_EXPR <DDIFF>;
1366 sum_1 = DAD + sum_0; #summation
1369 /* Starting from LAST_STMT, follow the defs of its uses in search
1370 of the above pattern. */
1372 tree plus_oprnd0, plus_oprnd1;
1373 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1374 &plus_oprnd0, &plus_oprnd1))
1375 return NULL;
1377 tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
1379 /* Any non-truncating sequence of conversions is OK here, since
1380 with a successful match, the result of the ABS(U) is known to fit
1381 within the nonnegative range of the result type. (It cannot be the
1382 negative of the minimum signed value due to the range of the widening
1383 MINUS_EXPR.) */
1384 vect_unpromoted_value unprom_abs;
1385 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1386 &unprom_abs);
1388 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1389 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1390 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1391 Then check that plus_oprnd0 is defined by an abs_expr. */
1393 if (!plus_oprnd0)
1394 return NULL;
1396 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1397 if (!abs_stmt_vinfo)
1398 return NULL;
1400 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1401 inside the loop (in case we are analyzing an outer-loop). */
1402 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1403 vect_unpromoted_value unprom[2];
1405 if (!abs_stmt)
1407 gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
1408 if (!abd_stmt
1409 || !gimple_call_internal_p (abd_stmt)
1410 || gimple_call_num_args (abd_stmt) != 2)
1411 return NULL;
1413 tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
1414 tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
1416 if (gimple_call_internal_fn (abd_stmt) == IFN_ABD)
1418 if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0,
1419 &unprom[0])
1420 || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
1421 &unprom[1]))
1422 return NULL;
1424 else if (gimple_call_internal_fn (abd_stmt) == IFN_VEC_WIDEN_ABD)
1426 unprom[0].op = abd_oprnd0;
1427 unprom[0].type = TREE_TYPE (abd_oprnd0);
1428 unprom[1].op = abd_oprnd1;
1429 unprom[1].type = TREE_TYPE (abd_oprnd1);
1431 else
1432 return NULL;
1434 half_type = unprom[0].type;
1436 else if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
1437 unprom, NULL))
1438 return NULL;
1440 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1442 tree half_vectype;
1443 if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1444 type_out, &half_vectype))
1445 return NULL;
1447 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1448 tree sad_oprnd[2];
1449 vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
1450 unprom, half_vectype);
1452 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1453 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1454 sad_oprnd[1], plus_oprnd1);
1456 return pattern_stmt;
1459 /* Function vect_recog_abd_pattern
1461 Try to find the following ABsolute Difference (ABD) or
1462 widening ABD (WIDEN_ABD) pattern:
1464 TYPE1 x;
1465 TYPE2 y;
1466 TYPE3 x_cast = (TYPE3) x; // widening or no-op
1467 TYPE3 y_cast = (TYPE3) y; // widening or no-op
1468 TYPE3 diff = x_cast - y_cast;
1469 TYPE4 diff_cast = (TYPE4) diff; // widening or no-op
1470 TYPE5 abs = ABS(U)_EXPR <diff_cast>;
1472 WIDEN_ABD exists to optimize the case where TYPE4 is at least
1473 twice as wide as TYPE3.
1475 Input:
1477 * STMT_VINFO: The stmt from which the pattern search begins
1479 Output:
1481 * TYPE_OUT: The type of the output of this pattern
1483 * Return value: A new stmt that will be used to replace the sequence of
1484 stmts that constitute the pattern, principally:
1485 out = IFN_ABD (x, y)
1486 out = IFN_WIDEN_ABD (x, y)
1489 static gimple *
1490 vect_recog_abd_pattern (vec_info *vinfo,
1491 stmt_vec_info stmt_vinfo, tree *type_out)
1493 gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
1494 if (!last_stmt)
1495 return NULL;
1497 tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1499 vect_unpromoted_value unprom[2];
1500 gassign *diff_stmt;
1501 tree half_type;
1502 if (!vect_recog_absolute_difference (vinfo, last_stmt, &half_type,
1503 unprom, &diff_stmt))
1504 return NULL;
1506 tree abd_in_type, abd_out_type;
1508 if (half_type)
1510 abd_in_type = half_type;
1511 abd_out_type = abd_in_type;
1513 else
1515 unprom[0].op = gimple_assign_rhs1 (diff_stmt);
1516 unprom[1].op = gimple_assign_rhs2 (diff_stmt);
1517 abd_in_type = signed_type_for (out_type);
1518 abd_out_type = abd_in_type;
1521 tree vectype_in = get_vectype_for_scalar_type (vinfo, abd_in_type);
1522 if (!vectype_in)
1523 return NULL;
1525 internal_fn ifn = IFN_ABD;
1526 tree vectype_out = vectype_in;
1528 if (TYPE_PRECISION (out_type) >= TYPE_PRECISION (abd_in_type) * 2
1529 && stmt_vinfo->min_output_precision >= TYPE_PRECISION (abd_in_type) * 2)
1531 tree mid_type
1532 = build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
1533 TYPE_UNSIGNED (abd_in_type));
1534 tree mid_vectype = get_vectype_for_scalar_type (vinfo, mid_type);
1536 code_helper dummy_code;
1537 int dummy_int;
1538 auto_vec<tree> dummy_vec;
1539 if (mid_vectype
1540 && supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD,
1541 stmt_vinfo, mid_vectype,
1542 vectype_in,
1543 &dummy_code, &dummy_code,
1544 &dummy_int, &dummy_vec))
1546 ifn = IFN_VEC_WIDEN_ABD;
1547 abd_out_type = mid_type;
1548 vectype_out = mid_vectype;
1552 if (ifn == IFN_ABD
1553 && !direct_internal_fn_supported_p (ifn, vectype_in,
1554 OPTIMIZE_FOR_SPEED))
1555 return NULL;
1557 vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
1559 tree abd_oprnds[2];
1560 vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
1561 abd_in_type, unprom, vectype_in);
1563 *type_out = get_vectype_for_scalar_type (vinfo, out_type);
1565 tree abd_result = vect_recog_temp_ssa_var (abd_out_type, NULL);
1566 gcall *abd_stmt = gimple_build_call_internal (ifn, 2,
1567 abd_oprnds[0], abd_oprnds[1]);
1568 gimple_call_set_lhs (abd_stmt, abd_result);
1569 gimple_set_location (abd_stmt, gimple_location (last_stmt));
1571 gimple *stmt = abd_stmt;
1572 if (TYPE_PRECISION (abd_in_type) == TYPE_PRECISION (abd_out_type)
1573 && TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type)
1574 && !TYPE_UNSIGNED (abd_out_type))
1576 tree unsign = unsigned_type_for (abd_out_type);
1577 tree unsign_vectype = get_vectype_for_scalar_type (vinfo, unsign);
1578 stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt,
1579 unsign_vectype);
1582 return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype_out);
1585 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1586 so that it can be treated as though it had the form:
1588 A_TYPE a;
1589 B_TYPE b;
1590 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1591 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1592 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1593 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1594 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1596 Try to replace the pattern with:
1598 A_TYPE a;
1599 B_TYPE b;
1600 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1601 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1602 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1603 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1605 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1607 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1608 name of the pattern being matched, for dump purposes. */
1610 static gimple *
1611 vect_recog_widen_op_pattern (vec_info *vinfo,
1612 stmt_vec_info last_stmt_info, tree *type_out,
1613 tree_code orig_code, code_helper wide_code,
1614 bool shift_p, const char *name)
1616 gimple *last_stmt = last_stmt_info->stmt;
1618 vect_unpromoted_value unprom[2];
1619 tree half_type;
1620 if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
1621 shift_p, 2, unprom, &half_type))
1623 return NULL;
1625 /* Pattern detected. */
1626 vect_pattern_detected (name, last_stmt);
1628 tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
1629 tree itype = type;
1630 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1631 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1632 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1633 TYPE_UNSIGNED (half_type));
1635 /* Check target support */
1636 tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1637 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1638 tree ctype = itype;
1639 tree vecctype = vecitype;
1640 if (orig_code == MINUS_EXPR
1641 && TYPE_UNSIGNED (itype)
1642 && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
1644 /* Subtraction is special, even if half_type is unsigned and no matter
1645 whether type is signed or unsigned, if type is wider than itype,
1646 we need to sign-extend from the widening operation result to the
1647 result type.
1648 Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1649 itype unsigned short and type either int or unsigned int.
1650 Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1651 (unsigned short) 0xffff, but for type int we want the result -1
1652 and for type unsigned int 0xffffffff rather than 0xffff. */
1653 ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
1654 vecctype = get_vectype_for_scalar_type (vinfo, ctype);
1657 code_helper dummy_code;
1658 int dummy_int;
1659 auto_vec<tree> dummy_vec;
1660 if (!vectype
1661 || !vecitype
1662 || !vecctype
1663 || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1664 vecitype, vectype,
1665 &dummy_code, &dummy_code,
1666 &dummy_int, &dummy_vec))
1667 return NULL;
1669 *type_out = get_vectype_for_scalar_type (vinfo, type);
1670 if (!*type_out)
1671 return NULL;
1673 tree oprnd[2];
1674 vect_convert_inputs (vinfo, last_stmt_info,
1675 2, oprnd, half_type, unprom, vectype);
1677 tree var = vect_recog_temp_ssa_var (itype, NULL);
1678 gimple *pattern_stmt = vect_gimple_build (var, wide_code, oprnd[0], oprnd[1]);
1680 if (vecctype != vecitype)
1681 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
1682 pattern_stmt, vecitype);
1684 return vect_convert_output (vinfo, last_stmt_info,
1685 type, pattern_stmt, vecctype);
1688 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1689 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1691 static gimple *
1692 vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1693 tree *type_out)
1695 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1696 MULT_EXPR, WIDEN_MULT_EXPR, false,
1697 "vect_recog_widen_mult_pattern");
1700 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1701 to IFN_VEC_WIDEN_PLUS. See vect_recog_widen_op_pattern for details. */
1703 static gimple *
1704 vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1705 tree *type_out)
1707 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1708 PLUS_EXPR, IFN_VEC_WIDEN_PLUS,
1709 false, "vect_recog_widen_plus_pattern");
1712 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1713 to IFN_VEC_WIDEN_MINUS. See vect_recog_widen_op_pattern for details. */
1714 static gimple *
1715 vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1716 tree *type_out)
1718 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1719 MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
1720 false, "vect_recog_widen_minus_pattern");
1723 /* Try to detect abd on widened inputs, converting IFN_ABD
1724 to IFN_VEC_WIDEN_ABD. */
1725 static gimple *
1726 vect_recog_widen_abd_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1727 tree *type_out)
1729 gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
1730 if (!last_stmt || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (last_stmt)))
1731 return NULL;
1733 tree last_rhs = gimple_assign_rhs1 (last_stmt);
1735 tree in_type = TREE_TYPE (last_rhs);
1736 tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1737 if (!INTEGRAL_TYPE_P (in_type)
1738 || !INTEGRAL_TYPE_P (out_type)
1739 || TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type)
1740 || !TYPE_UNSIGNED (in_type))
1741 return NULL;
1743 vect_unpromoted_value unprom;
1744 tree op = vect_look_through_possible_promotion (vinfo, last_rhs, &unprom);
1745 if (!op || TYPE_PRECISION (TREE_TYPE (op)) != TYPE_PRECISION (in_type))
1746 return NULL;
1748 stmt_vec_info abd_pattern_vinfo = vect_get_internal_def (vinfo, op);
1749 if (!abd_pattern_vinfo)
1750 return NULL;
1752 abd_pattern_vinfo = vect_stmt_to_vectorize (abd_pattern_vinfo);
1753 gcall *abd_stmt = dyn_cast <gcall *> (STMT_VINFO_STMT (abd_pattern_vinfo));
1754 if (!abd_stmt
1755 || !gimple_call_internal_p (abd_stmt)
1756 || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
1757 return NULL;
1759 tree vectype_in = get_vectype_for_scalar_type (vinfo, in_type);
1760 tree vectype_out = get_vectype_for_scalar_type (vinfo, out_type);
1762 code_helper dummy_code;
1763 int dummy_int;
1764 auto_vec<tree> dummy_vec;
1765 if (!supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD, stmt_vinfo,
1766 vectype_out, vectype_in,
1767 &dummy_code, &dummy_code,
1768 &dummy_int, &dummy_vec))
1769 return NULL;
1771 vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt);
1773 *type_out = vectype_out;
1775 tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
1776 tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
1777 tree widen_abd_result = vect_recog_temp_ssa_var (out_type, NULL);
1778 gcall *widen_abd_stmt = gimple_build_call_internal (IFN_VEC_WIDEN_ABD, 2,
1779 abd_oprnd0, abd_oprnd1);
1780 gimple_call_set_lhs (widen_abd_stmt, widen_abd_result);
1781 gimple_set_location (widen_abd_stmt, gimple_location (last_stmt));
1782 return widen_abd_stmt;
1785 /* Function vect_recog_ctz_ffs_pattern
1787 Try to find the following pattern:
1789 TYPE1 A;
1790 TYPE1 B;
1792 B = __builtin_ctz{,l,ll} (A);
1796 B = __builtin_ffs{,l,ll} (A);
1798 Input:
1800 * STMT_VINFO: The stmt from which the pattern search begins.
1801 here it starts with B = __builtin_* (A);
1803 Output:
1805 * TYPE_OUT: The vector type of the output of this pattern.
1807 * Return value: A new stmt that will be used to replace the sequence of
1808 stmts that constitute the pattern, using clz or popcount builtins. */
1810 static gimple *
1811 vect_recog_ctz_ffs_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1812 tree *type_out)
1814 gimple *call_stmt = stmt_vinfo->stmt;
1815 gimple *pattern_stmt;
1816 tree rhs_oprnd, rhs_type, lhs_oprnd, lhs_type, vec_type, vec_rhs_type;
1817 tree new_var;
1818 internal_fn ifn = IFN_LAST, ifnnew = IFN_LAST;
1819 bool defined_at_zero = true, defined_at_zero_new = false;
1820 int val = 0, val_new = 0;
1821 int prec;
1822 int sub = 0, add = 0;
1823 location_t loc;
1825 if (!is_gimple_call (call_stmt))
1826 return NULL;
1828 if (gimple_call_num_args (call_stmt) != 1)
1829 return NULL;
1831 rhs_oprnd = gimple_call_arg (call_stmt, 0);
1832 rhs_type = TREE_TYPE (rhs_oprnd);
1833 lhs_oprnd = gimple_call_lhs (call_stmt);
1834 if (!lhs_oprnd)
1835 return NULL;
1836 lhs_type = TREE_TYPE (lhs_oprnd);
1837 if (!INTEGRAL_TYPE_P (lhs_type)
1838 || !INTEGRAL_TYPE_P (rhs_type)
1839 || !type_has_mode_precision_p (rhs_type)
1840 || TREE_CODE (rhs_oprnd) != SSA_NAME)
1841 return NULL;
1843 switch (gimple_call_combined_fn (call_stmt))
1845 CASE_CFN_CTZ:
1846 ifn = IFN_CTZ;
1847 if (!gimple_call_internal_p (call_stmt)
1848 || CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
1849 val) != 2)
1850 defined_at_zero = false;
1851 break;
1852 CASE_CFN_FFS:
1853 ifn = IFN_FFS;
1854 break;
1855 default:
1856 return NULL;
1859 prec = TYPE_PRECISION (rhs_type);
1860 loc = gimple_location (call_stmt);
1862 vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
1863 if (!vec_type)
1864 return NULL;
1866 vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
1867 if (!vec_rhs_type)
1868 return NULL;
1870 /* Do it only if the backend doesn't have ctz<vector_mode>2 or
1871 ffs<vector_mode>2 pattern but does have clz<vector_mode>2 or
1872 popcount<vector_mode>2. */
1873 if (!vec_type
1874 || direct_internal_fn_supported_p (ifn, vec_rhs_type,
1875 OPTIMIZE_FOR_SPEED))
1876 return NULL;
1878 if (ifn == IFN_FFS
1879 && direct_internal_fn_supported_p (IFN_CTZ, vec_rhs_type,
1880 OPTIMIZE_FOR_SPEED))
1882 ifnnew = IFN_CTZ;
1883 defined_at_zero_new
1884 = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
1885 val_new) == 2;
1887 else if (direct_internal_fn_supported_p (IFN_CLZ, vec_rhs_type,
1888 OPTIMIZE_FOR_SPEED))
1890 ifnnew = IFN_CLZ;
1891 defined_at_zero_new
1892 = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
1893 val_new) == 2;
1895 if ((ifnnew == IFN_LAST
1896 || (defined_at_zero && !defined_at_zero_new))
1897 && direct_internal_fn_supported_p (IFN_POPCOUNT, vec_rhs_type,
1898 OPTIMIZE_FOR_SPEED))
1900 ifnnew = IFN_POPCOUNT;
1901 defined_at_zero_new = true;
1902 val_new = prec;
1904 if (ifnnew == IFN_LAST)
1905 return NULL;
1907 vect_pattern_detected ("vec_recog_ctz_ffs_pattern", call_stmt);
1909 if ((ifnnew == IFN_CLZ
1910 && defined_at_zero
1911 && defined_at_zero_new
1912 && val == prec
1913 && val_new == prec)
1914 || (ifnnew == IFN_POPCOUNT && ifn == IFN_CTZ))
1916 /* .CTZ (X) = PREC - .CLZ ((X - 1) & ~X)
1917 .CTZ (X) = .POPCOUNT ((X - 1) & ~X). */
1918 if (ifnnew == IFN_CLZ)
1919 sub = prec;
1920 val_new = prec;
1922 if (!TYPE_UNSIGNED (rhs_type))
1924 rhs_type = unsigned_type_for (rhs_type);
1925 vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
1926 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1927 pattern_stmt = gimple_build_assign (new_var, NOP_EXPR, rhs_oprnd);
1928 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
1929 vec_rhs_type);
1930 rhs_oprnd = new_var;
1933 tree m1 = vect_recog_temp_ssa_var (rhs_type, NULL);
1934 pattern_stmt = gimple_build_assign (m1, PLUS_EXPR, rhs_oprnd,
1935 build_int_cst (rhs_type, -1));
1936 gimple_set_location (pattern_stmt, loc);
1937 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1939 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1940 pattern_stmt = gimple_build_assign (new_var, BIT_NOT_EXPR, rhs_oprnd);
1941 gimple_set_location (pattern_stmt, loc);
1942 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1943 rhs_oprnd = new_var;
1945 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1946 pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
1947 m1, rhs_oprnd);
1948 gimple_set_location (pattern_stmt, loc);
1949 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1950 rhs_oprnd = new_var;
1952 else if (ifnnew == IFN_CLZ)
1954 /* .CTZ (X) = (PREC - 1) - .CLZ (X & -X)
1955 .FFS (X) = PREC - .CLZ (X & -X). */
1956 sub = prec - (ifn == IFN_CTZ);
1957 val_new = sub - val_new;
1959 tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
1960 pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
1961 gimple_set_location (pattern_stmt, loc);
1962 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1964 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1965 pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
1966 rhs_oprnd, neg);
1967 gimple_set_location (pattern_stmt, loc);
1968 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1969 rhs_oprnd = new_var;
1971 else if (ifnnew == IFN_POPCOUNT)
1973 /* .CTZ (X) = PREC - .POPCOUNT (X | -X)
1974 .FFS (X) = (PREC + 1) - .POPCOUNT (X | -X). */
1975 sub = prec + (ifn == IFN_FFS);
1976 val_new = sub;
1978 tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
1979 pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
1980 gimple_set_location (pattern_stmt, loc);
1981 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1983 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1984 pattern_stmt = gimple_build_assign (new_var, BIT_IOR_EXPR,
1985 rhs_oprnd, neg);
1986 gimple_set_location (pattern_stmt, loc);
1987 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1988 rhs_oprnd = new_var;
1990 else if (ifnnew == IFN_CTZ)
1992 /* .FFS (X) = .CTZ (X) + 1. */
1993 add = 1;
1994 val_new++;
1997 /* Create B = .IFNNEW (A). */
1998 new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1999 pattern_stmt = gimple_build_call_internal (ifnnew, 1, rhs_oprnd);
2000 gimple_call_set_lhs (pattern_stmt, new_var);
2001 gimple_set_location (pattern_stmt, loc);
2002 *type_out = vec_type;
2004 if (sub)
2006 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2007 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2008 pattern_stmt = gimple_build_assign (ret_var, MINUS_EXPR,
2009 build_int_cst (lhs_type, sub),
2010 new_var);
2011 gimple_set_location (pattern_stmt, loc);
2012 new_var = ret_var;
2014 else if (add)
2016 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2017 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2018 pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
2019 build_int_cst (lhs_type, add));
2020 gimple_set_location (pattern_stmt, loc);
2021 new_var = ret_var;
2024 if (defined_at_zero
2025 && (!defined_at_zero_new || val != val_new))
2027 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2028 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2029 rhs_oprnd = gimple_call_arg (call_stmt, 0);
2030 rhs_type = TREE_TYPE (rhs_oprnd);
2031 tree cmp = build2_loc (loc, NE_EXPR, boolean_type_node,
2032 rhs_oprnd, build_zero_cst (rhs_type));
2033 pattern_stmt = gimple_build_assign (ret_var, COND_EXPR, cmp,
2034 new_var,
2035 build_int_cst (lhs_type, val));
2038 if (dump_enabled_p ())
2039 dump_printf_loc (MSG_NOTE, vect_location,
2040 "created pattern stmt: %G", pattern_stmt);
2042 return pattern_stmt;
2045 /* Function vect_recog_popcount_clz_ctz_ffs_pattern
2047 Try to find the following pattern:
2049 UTYPE1 A;
2050 TYPE1 B;
2051 UTYPE2 temp_in;
2052 TYPE3 temp_out;
2053 temp_in = (UTYPE2)A;
2055 temp_out = __builtin_popcount{,l,ll} (temp_in);
2056 B = (TYPE1) temp_out;
2058 TYPE2 may or may not be equal to TYPE3.
2059 i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
2060 i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
2062 Input:
2064 * STMT_VINFO: The stmt from which the pattern search begins.
2065 here it starts with B = (TYPE1) temp_out;
2067 Output:
2069 * TYPE_OUT: The vector type of the output of this pattern.
2071 * Return value: A new stmt that will be used to replace the sequence of
2072 stmts that constitute the pattern. In this case it will be:
2073 B = .POPCOUNT (A);
2075 Similarly for clz, ctz and ffs.
2078 static gimple *
2079 vect_recog_popcount_clz_ctz_ffs_pattern (vec_info *vinfo,
2080 stmt_vec_info stmt_vinfo,
2081 tree *type_out)
2083 gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
2084 gimple *call_stmt, *pattern_stmt;
2085 tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
2086 internal_fn ifn = IFN_LAST;
2087 int addend = 0;
2089 /* Find B = (TYPE1) temp_out. */
2090 if (!last_stmt)
2091 return NULL;
2092 tree_code code = gimple_assign_rhs_code (last_stmt);
2093 if (!CONVERT_EXPR_CODE_P (code))
2094 return NULL;
2096 lhs_oprnd = gimple_assign_lhs (last_stmt);
2097 lhs_type = TREE_TYPE (lhs_oprnd);
2098 if (!INTEGRAL_TYPE_P (lhs_type))
2099 return NULL;
2101 rhs_oprnd = gimple_assign_rhs1 (last_stmt);
2102 if (TREE_CODE (rhs_oprnd) != SSA_NAME
2103 || !has_single_use (rhs_oprnd))
2104 return NULL;
2105 call_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
2107 /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
2108 if (!is_gimple_call (call_stmt))
2109 return NULL;
2110 switch (gimple_call_combined_fn (call_stmt))
2112 int val;
2113 CASE_CFN_POPCOUNT:
2114 ifn = IFN_POPCOUNT;
2115 break;
2116 CASE_CFN_CLZ:
2117 ifn = IFN_CLZ;
2118 /* Punt if call result is unsigned and defined value at zero
2119 is negative, as the negative value doesn't extend correctly. */
2120 if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
2121 && gimple_call_internal_p (call_stmt)
2122 && CLZ_DEFINED_VALUE_AT_ZERO
2123 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
2124 && val < 0)
2125 return NULL;
2126 break;
2127 CASE_CFN_CTZ:
2128 ifn = IFN_CTZ;
2129 /* Punt if call result is unsigned and defined value at zero
2130 is negative, as the negative value doesn't extend correctly. */
2131 if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
2132 && gimple_call_internal_p (call_stmt)
2133 && CTZ_DEFINED_VALUE_AT_ZERO
2134 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
2135 && val < 0)
2136 return NULL;
2137 break;
2138 CASE_CFN_FFS:
2139 ifn = IFN_FFS;
2140 break;
2141 default:
2142 return NULL;
2145 if (gimple_call_num_args (call_stmt) != 1)
2146 return NULL;
2148 rhs_oprnd = gimple_call_arg (call_stmt, 0);
2149 vect_unpromoted_value unprom_diff;
2150 rhs_origin
2151 = vect_look_through_possible_promotion (vinfo, rhs_oprnd, &unprom_diff);
2153 if (!rhs_origin)
2154 return NULL;
2156 /* Input and output of .POPCOUNT should be same-precision integer. */
2157 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type))
2158 return NULL;
2160 /* Also A should be unsigned or same precision as temp_in, otherwise
2161 different builtins/internal functions have different behaviors. */
2162 if (TYPE_PRECISION (unprom_diff.type)
2163 != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))
2164 switch (ifn)
2166 case IFN_POPCOUNT:
2167 /* For popcount require zero extension, which doesn't add any
2168 further bits to the count. */
2169 if (!TYPE_UNSIGNED (unprom_diff.type))
2170 return NULL;
2171 break;
2172 case IFN_CLZ:
2173 /* clzll (x) == clz (x) + 32 for unsigned x != 0, so ok
2174 if it is undefined at zero or if it matches also for the
2175 defined value there. */
2176 if (!TYPE_UNSIGNED (unprom_diff.type))
2177 return NULL;
2178 if (!type_has_mode_precision_p (lhs_type)
2179 || !type_has_mode_precision_p (TREE_TYPE (rhs_oprnd)))
2180 return NULL;
2181 addend = (TYPE_PRECISION (TREE_TYPE (rhs_oprnd))
2182 - TYPE_PRECISION (lhs_type));
2183 if (gimple_call_internal_p (call_stmt))
2185 int val1, val2;
2186 int d1
2187 = CLZ_DEFINED_VALUE_AT_ZERO
2188 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val1);
2189 int d2
2190 = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2191 val2);
2192 if (d1 != 2)
2193 break;
2194 if (d2 != 2 || val1 != val2 + addend)
2195 return NULL;
2197 break;
2198 case IFN_CTZ:
2199 /* ctzll (x) == ctz (x) for unsigned or signed x != 0, so ok
2200 if it is undefined at zero or if it matches also for the
2201 defined value there. */
2202 if (gimple_call_internal_p (call_stmt))
2204 int val1, val2;
2205 int d1
2206 = CTZ_DEFINED_VALUE_AT_ZERO
2207 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val1);
2208 int d2
2209 = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2210 val2);
2211 if (d1 != 2)
2212 break;
2213 if (d2 != 2 || val1 != val2)
2214 return NULL;
2216 break;
2217 case IFN_FFS:
2218 /* ffsll (x) == ffs (x) for unsigned or signed x. */
2219 break;
2220 default:
2221 gcc_unreachable ();
2224 vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
2225 /* Do it only if the backend has popcount<vector_mode>2 etc. pattern. */
2226 if (!vec_type)
2227 return NULL;
2229 bool supported
2230 = direct_internal_fn_supported_p (ifn, vec_type, OPTIMIZE_FOR_SPEED);
2231 if (!supported)
2232 switch (ifn)
2234 case IFN_POPCOUNT:
2235 case IFN_CLZ:
2236 return NULL;
2237 case IFN_FFS:
2238 /* vect_recog_ctz_ffs_pattern can implement ffs using ctz. */
2239 if (direct_internal_fn_supported_p (IFN_CTZ, vec_type,
2240 OPTIMIZE_FOR_SPEED))
2241 break;
2242 /* FALLTHRU */
2243 case IFN_CTZ:
2244 /* vect_recog_ctz_ffs_pattern can implement ffs or ctz using
2245 clz or popcount. */
2246 if (direct_internal_fn_supported_p (IFN_CLZ, vec_type,
2247 OPTIMIZE_FOR_SPEED))
2248 break;
2249 if (direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
2250 OPTIMIZE_FOR_SPEED))
2251 break;
2252 return NULL;
2253 default:
2254 gcc_unreachable ();
2257 vect_pattern_detected ("vec_recog_popcount_clz_ctz_ffs_pattern",
2258 call_stmt);
2260 /* Create B = .POPCOUNT (A). */
2261 new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2262 pattern_stmt = gimple_build_call_internal (ifn, 1, unprom_diff.op);
2263 gimple_call_set_lhs (pattern_stmt, new_var);
2264 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2265 *type_out = vec_type;
2267 if (dump_enabled_p ())
2268 dump_printf_loc (MSG_NOTE, vect_location,
2269 "created pattern stmt: %G", pattern_stmt);
2271 if (addend)
2273 gcc_assert (supported);
2274 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2275 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2276 pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
2277 build_int_cst (lhs_type, addend));
2279 else if (!supported)
2281 stmt_vec_info new_stmt_info = vinfo->add_stmt (pattern_stmt);
2282 STMT_VINFO_VECTYPE (new_stmt_info) = vec_type;
2283 pattern_stmt
2284 = vect_recog_ctz_ffs_pattern (vinfo, new_stmt_info, type_out);
2285 if (pattern_stmt == NULL)
2286 return NULL;
2287 if (gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (new_stmt_info))
2289 gimple_seq *pseq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
2290 gimple_seq_add_seq_without_update (pseq, seq);
2293 return pattern_stmt;
2296 /* Function vect_recog_pow_pattern
2298 Try to find the following pattern:
2300 x = POW (y, N);
2302 with POW being one of pow, powf, powi, powif and N being
2303 either 2 or 0.5.
2305 Input:
2307 * STMT_VINFO: The stmt from which the pattern search begins.
2309 Output:
2311 * TYPE_OUT: The type of the output of this pattern.
2313 * Return value: A new stmt that will be used to replace the sequence of
2314 stmts that constitute the pattern. In this case it will be:
2315 x = x * x
2317 x = sqrt (x)
2320 static gimple *
2321 vect_recog_pow_pattern (vec_info *vinfo,
2322 stmt_vec_info stmt_vinfo, tree *type_out)
2324 gimple *last_stmt = stmt_vinfo->stmt;
2325 tree base, exp;
2326 gimple *stmt;
2327 tree var;
2329 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
2330 return NULL;
2332 switch (gimple_call_combined_fn (last_stmt))
2334 CASE_CFN_POW:
2335 CASE_CFN_POWI:
2336 break;
2338 default:
2339 return NULL;
2342 base = gimple_call_arg (last_stmt, 0);
2343 exp = gimple_call_arg (last_stmt, 1);
2344 if (TREE_CODE (exp) != REAL_CST
2345 && TREE_CODE (exp) != INTEGER_CST)
2347 if (flag_unsafe_math_optimizations
2348 && TREE_CODE (base) == REAL_CST
2349 && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
2351 combined_fn log_cfn;
2352 built_in_function exp_bfn;
2353 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
2355 case BUILT_IN_POW:
2356 log_cfn = CFN_BUILT_IN_LOG;
2357 exp_bfn = BUILT_IN_EXP;
2358 break;
2359 case BUILT_IN_POWF:
2360 log_cfn = CFN_BUILT_IN_LOGF;
2361 exp_bfn = BUILT_IN_EXPF;
2362 break;
2363 case BUILT_IN_POWL:
2364 log_cfn = CFN_BUILT_IN_LOGL;
2365 exp_bfn = BUILT_IN_EXPL;
2366 break;
2367 default:
2368 return NULL;
2370 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
2371 tree exp_decl = builtin_decl_implicit (exp_bfn);
2372 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
2373 does that, but if C is a power of 2, we want to use
2374 exp2 (log2 (C) * x) in the non-vectorized version, but for
2375 vectorization we don't have vectorized exp2. */
2376 if (logc
2377 && TREE_CODE (logc) == REAL_CST
2378 && exp_decl
2379 && lookup_attribute ("omp declare simd",
2380 DECL_ATTRIBUTES (exp_decl)))
2382 cgraph_node *node = cgraph_node::get_create (exp_decl);
2383 if (node->simd_clones == NULL)
2385 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
2386 || node->definition)
2387 return NULL;
2388 expand_simd_clones (node);
2389 if (node->simd_clones == NULL)
2390 return NULL;
2392 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
2393 if (!*type_out)
2394 return NULL;
2395 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2396 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
2397 append_pattern_def_seq (vinfo, stmt_vinfo, g);
2398 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2399 g = gimple_build_call (exp_decl, 1, def);
2400 gimple_call_set_lhs (g, res);
2401 return g;
2405 return NULL;
2408 /* We now have a pow or powi builtin function call with a constant
2409 exponent. */
2411 /* Catch squaring. */
2412 if ((tree_fits_shwi_p (exp)
2413 && tree_to_shwi (exp) == 2)
2414 || (TREE_CODE (exp) == REAL_CST
2415 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
2417 if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
2418 TREE_TYPE (base), type_out))
2419 return NULL;
2421 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2422 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
2423 return stmt;
2426 /* Catch square root. */
2427 if (TREE_CODE (exp) == REAL_CST
2428 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
2430 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
2431 if (*type_out
2432 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
2433 OPTIMIZE_FOR_SPEED))
2435 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
2436 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
2437 gimple_call_set_lhs (stmt, var);
2438 gimple_call_set_nothrow (stmt, true);
2439 return stmt;
2443 return NULL;
2447 /* Function vect_recog_widen_sum_pattern
2449 Try to find the following pattern:
2451 type x_t;
2452 TYPE x_T, sum = init;
2453 loop:
2454 sum_0 = phi <init, sum_1>
2455 S1 x_t = *p;
2456 S2 x_T = (TYPE) x_t;
2457 S3 sum_1 = x_T + sum_0;
2459 where type 'TYPE' is at least double the size of type 'type', i.e - we're
2460 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
2461 a special case of a reduction computation.
2463 Input:
2465 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
2466 when this function is called with S3, the pattern {S2,S3} will be detected.
2468 Output:
2470 * TYPE_OUT: The type of the output of this pattern.
2472 * Return value: A new stmt that will be used to replace the sequence of
2473 stmts that constitute the pattern. In this case it will be:
2474 WIDEN_SUM <x_t, sum_0>
2476 Note: The widening-sum idiom is a widening reduction pattern that is
2477 vectorized without preserving all the intermediate results. It
2478 produces only N/2 (widened) results (by summing up pairs of
2479 intermediate results) rather than all N results. Therefore, we
2480 cannot allow this pattern when we want to get all the results and in
2481 the correct order (as is the case when this computation is in an
2482 inner-loop nested in an outer-loop that us being vectorized). */
2484 static gimple *
2485 vect_recog_widen_sum_pattern (vec_info *vinfo,
2486 stmt_vec_info stmt_vinfo, tree *type_out)
2488 gimple *last_stmt = stmt_vinfo->stmt;
2489 tree oprnd0, oprnd1;
2490 tree type;
2491 gimple *pattern_stmt;
2492 tree var;
2494 /* Look for the following pattern
2495 DX = (TYPE) X;
2496 sum_1 = DX + sum_0;
2497 In which DX is at least double the size of X, and sum_1 has been
2498 recognized as a reduction variable.
2501 /* Starting from LAST_STMT, follow the defs of its uses in search
2502 of the above pattern. */
2504 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
2505 &oprnd0, &oprnd1)
2506 || TREE_CODE (oprnd0) != SSA_NAME
2507 || !vinfo->lookup_def (oprnd0))
2508 return NULL;
2510 type = TREE_TYPE (gimple_get_lhs (last_stmt));
2512 /* So far so good. Since last_stmt was detected as a (summation) reduction,
2513 we know that oprnd1 is the reduction variable (defined by a loop-header
2514 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
2515 Left to check that oprnd0 is defined by a cast from type 'type' to type
2516 'TYPE'. */
2518 vect_unpromoted_value unprom0;
2519 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
2520 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
2521 return NULL;
2523 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
2525 if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
2526 unprom0.type, type_out))
2527 return NULL;
2529 var = vect_recog_temp_ssa_var (type, NULL);
2530 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
2532 return pattern_stmt;
2535 /* Function vect_recog_bitfield_ref_pattern
2537 Try to find the following pattern:
2539 bf_value = BIT_FIELD_REF (container, bitsize, bitpos);
2540 result = (type_out) bf_value;
2542 where type_out is a non-bitfield type, that is to say, it's precision matches
2543 2^(TYPE_SIZE(type_out) - (TYPE_UNSIGNED (type_out) ? 1 : 2)).
2545 Input:
2547 * STMT_VINFO: The stmt from which the pattern search begins.
2548 here it starts with:
2549 result = (type_out) bf_value;
2551 Output:
2553 * TYPE_OUT: The vector type of the output of this pattern.
2555 * Return value: A new stmt that will be used to replace the sequence of
2556 stmts that constitute the pattern. If the precision of type_out is bigger
2557 than the precision type of _1 we perform the widening before the shifting,
2558 since the new precision will be large enough to shift the value and moving
2559 widening operations up the statement chain enables the generation of
2560 widening loads. If we are widening and the operation after the pattern is
2561 an addition then we mask first and shift later, to enable the generation of
2562 shifting adds. In the case of narrowing we will always mask first, shift
2563 last and then perform a narrowing operation. This will enable the
2564 generation of narrowing shifts.
2566 Widening with mask first, shift later:
2567 container = (type_out) container;
2568 masked = container & (((1 << bitsize) - 1) << bitpos);
2569 result = masked >> bitpos;
2571 Widening with shift first, mask last:
2572 container = (type_out) container;
2573 shifted = container >> bitpos;
2574 result = shifted & ((1 << bitsize) - 1);
2576 Narrowing:
2577 masked = container & (((1 << bitsize) - 1) << bitpos);
2578 result = masked >> bitpos;
2579 result = (type_out) result;
2581 If the bitfield is signed and it's wider than type_out, we need to
2582 keep the result sign-extended:
2583 container = (type) container;
2584 masked = container << (prec - bitsize - bitpos);
2585 result = (type_out) (masked >> (prec - bitsize));
2587 Here type is the signed variant of the wider of type_out and the type
2588 of container.
2590 The shifting is always optional depending on whether bitpos != 0.
2594 static gimple *
2595 vect_recog_bitfield_ref_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
2596 tree *type_out)
2598 gassign *first_stmt = dyn_cast <gassign *> (stmt_info->stmt);
2600 if (!first_stmt)
2601 return NULL;
2603 gassign *bf_stmt;
2604 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (first_stmt))
2605 && TREE_CODE (gimple_assign_rhs1 (first_stmt)) == SSA_NAME)
2607 gimple *second_stmt
2608 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (first_stmt));
2609 bf_stmt = dyn_cast <gassign *> (second_stmt);
2610 if (!bf_stmt
2611 || gimple_assign_rhs_code (bf_stmt) != BIT_FIELD_REF)
2612 return NULL;
2614 else
2615 return NULL;
2617 tree bf_ref = gimple_assign_rhs1 (bf_stmt);
2618 tree container = TREE_OPERAND (bf_ref, 0);
2620 if (!bit_field_offset (bf_ref).is_constant ()
2621 || !bit_field_size (bf_ref).is_constant ()
2622 || !tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (container))))
2623 return NULL;
2625 if (!INTEGRAL_TYPE_P (TREE_TYPE (bf_ref))
2626 || !INTEGRAL_TYPE_P (TREE_TYPE (container))
2627 || TYPE_MODE (TREE_TYPE (container)) == E_BLKmode)
2628 return NULL;
2630 gimple *use_stmt, *pattern_stmt;
2631 use_operand_p use_p;
2632 tree ret = gimple_assign_lhs (first_stmt);
2633 tree ret_type = TREE_TYPE (ret);
2634 bool shift_first = true;
2635 tree container_type = TREE_TYPE (container);
2636 tree vectype = get_vectype_for_scalar_type (vinfo, container_type);
2638 /* Calculate shift_n before the adjustments for widening loads, otherwise
2639 the container may change and we have to consider offset change for
2640 widening loads on big endianness. The shift_n calculated here can be
2641 independent of widening. */
2642 unsigned HOST_WIDE_INT shift_n = bit_field_offset (bf_ref).to_constant ();
2643 unsigned HOST_WIDE_INT mask_width = bit_field_size (bf_ref).to_constant ();
2644 unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
2645 if (BYTES_BIG_ENDIAN)
2646 shift_n = prec - shift_n - mask_width;
2648 bool ref_sext = (!TYPE_UNSIGNED (TREE_TYPE (bf_ref)) &&
2649 TYPE_PRECISION (ret_type) > mask_width);
2650 bool load_widen = (TYPE_PRECISION (TREE_TYPE (container)) <
2651 TYPE_PRECISION (ret_type));
2653 /* We move the conversion earlier if the loaded type is smaller than the
2654 return type to enable the use of widening loads. And if we need a
2655 sign extension, we need to convert the loaded value early to a signed
2656 type as well. */
2657 if (ref_sext || load_widen)
2659 tree type = load_widen ? ret_type : container_type;
2660 if (ref_sext)
2661 type = gimple_signed_type (type);
2662 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type),
2663 NOP_EXPR, container);
2664 container = gimple_get_lhs (pattern_stmt);
2665 container_type = TREE_TYPE (container);
2666 prec = tree_to_uhwi (TYPE_SIZE (container_type));
2667 vectype = get_vectype_for_scalar_type (vinfo, container_type);
2668 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2670 else if (!useless_type_conversion_p (TREE_TYPE (container), ret_type))
2671 /* If we are doing the conversion last then also delay the shift as we may
2672 be able to combine the shift and conversion in certain cases. */
2673 shift_first = false;
2675 /* If the only use of the result of this BIT_FIELD_REF + CONVERT is a
2676 PLUS_EXPR then do the shift last as some targets can combine the shift and
2677 add into a single instruction. */
2678 if (single_imm_use (gimple_assign_lhs (first_stmt), &use_p, &use_stmt))
2680 if (gimple_code (use_stmt) == GIMPLE_ASSIGN
2681 && gimple_assign_rhs_code (use_stmt) == PLUS_EXPR)
2682 shift_first = false;
2685 /* If we don't have to shift we only generate the mask, so just fix the
2686 code-path to shift_first. */
2687 if (shift_n == 0)
2688 shift_first = true;
2690 tree result;
2691 if (shift_first && !ref_sext)
2693 tree shifted = container;
2694 if (shift_n)
2696 pattern_stmt
2697 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2698 RSHIFT_EXPR, container,
2699 build_int_cst (sizetype, shift_n));
2700 shifted = gimple_assign_lhs (pattern_stmt);
2701 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2704 tree mask = wide_int_to_tree (container_type,
2705 wi::mask (mask_width, false, prec));
2707 pattern_stmt
2708 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2709 BIT_AND_EXPR, shifted, mask);
2710 result = gimple_assign_lhs (pattern_stmt);
2712 else
2714 tree temp = vect_recog_temp_ssa_var (container_type);
2715 if (!ref_sext)
2717 tree mask = wide_int_to_tree (container_type,
2718 wi::shifted_mask (shift_n,
2719 mask_width,
2720 false, prec));
2721 pattern_stmt = gimple_build_assign (temp, BIT_AND_EXPR,
2722 container, mask);
2724 else
2726 HOST_WIDE_INT shl = prec - shift_n - mask_width;
2727 shift_n += shl;
2728 pattern_stmt = gimple_build_assign (temp, LSHIFT_EXPR,
2729 container,
2730 build_int_cst (sizetype,
2731 shl));
2734 tree masked = gimple_assign_lhs (pattern_stmt);
2735 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2736 pattern_stmt
2737 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2738 RSHIFT_EXPR, masked,
2739 build_int_cst (sizetype, shift_n));
2740 result = gimple_assign_lhs (pattern_stmt);
2743 if (!useless_type_conversion_p (TREE_TYPE (result), ret_type))
2745 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2746 pattern_stmt
2747 = gimple_build_assign (vect_recog_temp_ssa_var (ret_type),
2748 NOP_EXPR, result);
2751 *type_out = STMT_VINFO_VECTYPE (stmt_info);
2752 vect_pattern_detected ("bitfield_ref pattern", stmt_info->stmt);
2754 return pattern_stmt;
2757 /* Function vect_recog_bit_insert_pattern
2759 Try to find the following pattern:
2761 written = BIT_INSERT_EXPR (container, value, bitpos);
2763 Input:
2765 * STMT_VINFO: The stmt we want to replace.
2767 Output:
2769 * TYPE_OUT: The vector type of the output of this pattern.
2771 * Return value: A new stmt that will be used to replace the sequence of
2772 stmts that constitute the pattern. In this case it will be:
2773 value = (container_type) value; // Make sure
2774 shifted = value << bitpos; // Shift value into place
2775 masked = shifted & (mask << bitpos); // Mask off the non-relevant bits in
2776 // the 'to-write value'.
2777 cleared = container & ~(mask << bitpos); // Clearing the bits we want to
2778 // write to from the value we want
2779 // to write to.
2780 written = cleared | masked; // Write bits.
2783 where mask = ((1 << TYPE_PRECISION (value)) - 1), a mask to keep the number of
2784 bits corresponding to the real size of the bitfield value we are writing to.
2785 The shifting is always optional depending on whether bitpos != 0.
2789 static gimple *
2790 vect_recog_bit_insert_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
2791 tree *type_out)
2793 gassign *bf_stmt = dyn_cast <gassign *> (stmt_info->stmt);
2794 if (!bf_stmt || gimple_assign_rhs_code (bf_stmt) != BIT_INSERT_EXPR)
2795 return NULL;
2797 tree container = gimple_assign_rhs1 (bf_stmt);
2798 tree value = gimple_assign_rhs2 (bf_stmt);
2799 tree shift = gimple_assign_rhs3 (bf_stmt);
2801 tree bf_type = TREE_TYPE (value);
2802 tree container_type = TREE_TYPE (container);
2804 if (!INTEGRAL_TYPE_P (container_type)
2805 || !tree_fits_uhwi_p (TYPE_SIZE (container_type)))
2806 return NULL;
2808 gimple *pattern_stmt;
2810 vect_unpromoted_value unprom;
2811 unprom.set_op (value, vect_internal_def);
2812 value = vect_convert_input (vinfo, stmt_info, container_type, &unprom,
2813 get_vectype_for_scalar_type (vinfo,
2814 container_type));
2816 unsigned HOST_WIDE_INT mask_width = TYPE_PRECISION (bf_type);
2817 unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
2818 unsigned HOST_WIDE_INT shift_n = tree_to_uhwi (shift);
2819 if (BYTES_BIG_ENDIAN)
2821 shift_n = prec - shift_n - mask_width;
2822 shift = build_int_cst (TREE_TYPE (shift), shift_n);
2825 if (!useless_type_conversion_p (TREE_TYPE (value), container_type))
2827 pattern_stmt =
2828 gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2829 NOP_EXPR, value);
2830 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2831 value = gimple_get_lhs (pattern_stmt);
2834 /* Shift VALUE into place. */
2835 tree shifted = value;
2836 if (shift_n)
2838 gimple_seq stmts = NULL;
2839 shifted
2840 = gimple_build (&stmts, LSHIFT_EXPR, container_type, value, shift);
2841 if (!gimple_seq_empty_p (stmts))
2842 append_pattern_def_seq (vinfo, stmt_info,
2843 gimple_seq_first_stmt (stmts));
2846 tree mask_t
2847 = wide_int_to_tree (container_type,
2848 wi::shifted_mask (shift_n, mask_width, false, prec));
2850 /* Clear bits we don't want to write back from SHIFTED. */
2851 gimple_seq stmts = NULL;
2852 tree masked = gimple_build (&stmts, BIT_AND_EXPR, container_type, shifted,
2853 mask_t);
2854 if (!gimple_seq_empty_p (stmts))
2856 pattern_stmt = gimple_seq_first_stmt (stmts);
2857 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2860 /* Mask off the bits in the container that we are to write to. */
2861 mask_t = wide_int_to_tree (container_type,
2862 wi::shifted_mask (shift_n, mask_width, true, prec));
2863 tree cleared = vect_recog_temp_ssa_var (container_type);
2864 pattern_stmt = gimple_build_assign (cleared, BIT_AND_EXPR, container, mask_t);
2865 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2867 /* Write MASKED into CLEARED. */
2868 pattern_stmt
2869 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2870 BIT_IOR_EXPR, cleared, masked);
2872 *type_out = STMT_VINFO_VECTYPE (stmt_info);
2873 vect_pattern_detected ("bit_insert pattern", stmt_info->stmt);
2875 return pattern_stmt;
2879 /* Recognize cases in which an operation is performed in one type WTYPE
2880 but could be done more efficiently in a narrower type NTYPE. For example,
2881 if we have:
2883 ATYPE a; // narrower than NTYPE
2884 BTYPE b; // narrower than NTYPE
2885 WTYPE aw = (WTYPE) a;
2886 WTYPE bw = (WTYPE) b;
2887 WTYPE res = aw + bw; // only uses of aw and bw
2889 then it would be more efficient to do:
2891 NTYPE an = (NTYPE) a;
2892 NTYPE bn = (NTYPE) b;
2893 NTYPE resn = an + bn;
2894 WTYPE res = (WTYPE) resn;
2896 Other situations include things like:
2898 ATYPE a; // NTYPE or narrower
2899 WTYPE aw = (WTYPE) a;
2900 WTYPE res = aw + b;
2902 when only "(NTYPE) res" is significant. In that case it's more efficient
2903 to truncate "b" and do the operation on NTYPE instead:
2905 NTYPE an = (NTYPE) a;
2906 NTYPE bn = (NTYPE) b; // truncation
2907 NTYPE resn = an + bn;
2908 WTYPE res = (WTYPE) resn;
2910 All users of "res" should then use "resn" instead, making the final
2911 statement dead (not marked as relevant). The final statement is still
2912 needed to maintain the type correctness of the IR.
2914 vect_determine_precisions has already determined the minimum
2915 precison of the operation and the minimum precision required
2916 by users of the result. */
2918 static gimple *
2919 vect_recog_over_widening_pattern (vec_info *vinfo,
2920 stmt_vec_info last_stmt_info, tree *type_out)
2922 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2923 if (!last_stmt)
2924 return NULL;
2926 /* See whether we have found that this operation can be done on a
2927 narrower type without changing its semantics. */
2928 unsigned int new_precision = last_stmt_info->operation_precision;
2929 if (!new_precision)
2930 return NULL;
2932 tree lhs = gimple_assign_lhs (last_stmt);
2933 tree type = TREE_TYPE (lhs);
2934 tree_code code = gimple_assign_rhs_code (last_stmt);
2936 /* Punt for reductions where we don't handle the type conversions. */
2937 if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
2938 return NULL;
2940 /* Keep the first operand of a COND_EXPR as-is: only the other two
2941 operands are interesting. */
2942 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
2944 /* Check the operands. */
2945 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
2946 auto_vec <vect_unpromoted_value, 3> unprom (nops);
2947 unprom.quick_grow (nops);
2948 unsigned int min_precision = 0;
2949 bool single_use_p = false;
2950 for (unsigned int i = 0; i < nops; ++i)
2952 tree op = gimple_op (last_stmt, first_op + i);
2953 if (TREE_CODE (op) == INTEGER_CST)
2954 unprom[i].set_op (op, vect_constant_def);
2955 else if (TREE_CODE (op) == SSA_NAME)
2957 bool op_single_use_p = true;
2958 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
2959 &op_single_use_p))
2960 return NULL;
2961 /* If:
2963 (1) N bits of the result are needed;
2964 (2) all inputs are widened from M<N bits; and
2965 (3) one operand OP is a single-use SSA name
2967 we can shift the M->N widening from OP to the output
2968 without changing the number or type of extensions involved.
2969 This then reduces the number of copies of STMT_INFO.
2971 If instead of (3) more than one operand is a single-use SSA name,
2972 shifting the extension to the output is even more of a win.
2974 If instead:
2976 (1) N bits of the result are needed;
2977 (2) one operand OP2 is widened from M2<N bits;
2978 (3) another operand OP1 is widened from M1<M2 bits; and
2979 (4) both OP1 and OP2 are single-use
2981 the choice is between:
2983 (a) truncating OP2 to M1, doing the operation on M1,
2984 and then widening the result to N
2986 (b) widening OP1 to M2, doing the operation on M2, and then
2987 widening the result to N
2989 Both shift the M2->N widening of the inputs to the output.
2990 (a) additionally shifts the M1->M2 widening to the output;
2991 it requires fewer copies of STMT_INFO but requires an extra
2992 M2->M1 truncation.
2994 Which is better will depend on the complexity and cost of
2995 STMT_INFO, which is hard to predict at this stage. However,
2996 a clear tie-breaker in favor of (b) is the fact that the
2997 truncation in (a) increases the length of the operation chain.
2999 If instead of (4) only one of OP1 or OP2 is single-use,
3000 (b) is still a win over doing the operation in N bits:
3001 it still shifts the M2->N widening on the single-use operand
3002 to the output and reduces the number of STMT_INFO copies.
3004 If neither operand is single-use then operating on fewer than
3005 N bits might lead to more extensions overall. Whether it does
3006 or not depends on global information about the vectorization
3007 region, and whether that's a good trade-off would again
3008 depend on the complexity and cost of the statements involved,
3009 as well as things like register pressure that are not normally
3010 modelled at this stage. We therefore ignore these cases
3011 and just optimize the clear single-use wins above.
3013 Thus we take the maximum precision of the unpromoted operands
3014 and record whether any operand is single-use. */
3015 if (unprom[i].dt == vect_internal_def)
3017 min_precision = MAX (min_precision,
3018 TYPE_PRECISION (unprom[i].type));
3019 single_use_p |= op_single_use_p;
3022 else
3023 return NULL;
3026 /* Although the operation could be done in operation_precision, we have
3027 to balance that against introducing extra truncations or extensions.
3028 Calculate the minimum precision that can be handled efficiently.
3030 The loop above determined that the operation could be handled
3031 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
3032 extension from the inputs to the output without introducing more
3033 instructions, and would reduce the number of instructions required
3034 for STMT_INFO itself.
3036 vect_determine_precisions has also determined that the result only
3037 needs min_output_precision bits. Truncating by a factor of N times
3038 requires a tree of N - 1 instructions, so if TYPE is N times wider
3039 than min_output_precision, doing the operation in TYPE and truncating
3040 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
3041 In contrast:
3043 - truncating the input to a unary operation and doing the operation
3044 in the new type requires at most N - 1 + 1 = N instructions per
3045 output vector
3047 - doing the same for a binary operation requires at most
3048 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
3050 Both unary and binary operations require fewer instructions than
3051 this if the operands were extended from a suitable truncated form.
3052 Thus there is usually nothing to lose by doing operations in
3053 min_output_precision bits, but there can be something to gain. */
3054 if (!single_use_p)
3055 min_precision = last_stmt_info->min_output_precision;
3056 else
3057 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
3059 /* Apply the minimum efficient precision we just calculated. */
3060 if (new_precision < min_precision)
3061 new_precision = min_precision;
3062 new_precision = vect_element_precision (new_precision);
3063 if (new_precision >= TYPE_PRECISION (type))
3064 return NULL;
3066 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
3068 *type_out = get_vectype_for_scalar_type (vinfo, type);
3069 if (!*type_out)
3070 return NULL;
3072 /* We've found a viable pattern. Get the new type of the operation. */
3073 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
3074 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
3076 /* If we're truncating an operation, we need to make sure that we
3077 don't introduce new undefined overflow. The codes tested here are
3078 a subset of those accepted by vect_truncatable_operation_p. */
3079 tree op_type = new_type;
3080 if (TYPE_OVERFLOW_UNDEFINED (new_type)
3081 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
3082 op_type = build_nonstandard_integer_type (new_precision, true);
3084 /* We specifically don't check here whether the target supports the
3085 new operation, since it might be something that a later pattern
3086 wants to rewrite anyway. If targets have a minimum element size
3087 for some optabs, we should pattern-match smaller ops to larger ops
3088 where beneficial. */
3089 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3090 tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
3091 if (!new_vectype || !op_vectype)
3092 return NULL;
3094 if (dump_enabled_p ())
3095 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
3096 type, new_type);
3098 /* Calculate the rhs operands for an operation on OP_TYPE. */
3099 tree ops[3] = {};
3100 for (unsigned int i = 1; i < first_op; ++i)
3101 ops[i - 1] = gimple_op (last_stmt, i);
3102 /* For right shifts limit the shift operand. */
3103 vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
3104 op_type, &unprom[0], op_vectype);
3106 /* Limit shift operands. */
3107 if (code == RSHIFT_EXPR)
3109 wide_int min_value, max_value;
3110 if (TREE_CODE (ops[1]) == INTEGER_CST)
3111 ops[1] = wide_int_to_tree (op_type,
3112 wi::umin (wi::to_wide (ops[1]),
3113 new_precision - 1));
3114 else if (!vect_get_range_info (ops[1], &min_value, &max_value)
3115 || wi::ge_p (max_value, new_precision, TYPE_SIGN (op_type)))
3117 /* ??? Note the following bad for SLP as that only supports
3118 same argument widened shifts and it un-CSEs same arguments. */
3119 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
3120 gimple *pattern_stmt
3121 = gimple_build_assign (new_var, MIN_EXPR, ops[1],
3122 build_int_cst (op_type, new_precision - 1));
3123 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3124 if (ops[1] == unprom[1].op && unprom[1].dt == vect_external_def)
3126 if (edge e = vect_get_external_def_edge (vinfo, ops[1]))
3128 basic_block new_bb
3129 = gsi_insert_on_edge_immediate (e, pattern_stmt);
3130 gcc_assert (!new_bb);
3132 else
3133 return NULL;
3135 else
3136 append_pattern_def_seq (vinfo, last_stmt_info, pattern_stmt,
3137 op_vectype);
3138 ops[1] = new_var;
3142 /* Use the operation to produce a result of type OP_TYPE. */
3143 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
3144 gimple *pattern_stmt = gimple_build_assign (new_var, code,
3145 ops[0], ops[1], ops[2]);
3146 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3148 if (dump_enabled_p ())
3149 dump_printf_loc (MSG_NOTE, vect_location,
3150 "created pattern stmt: %G", pattern_stmt);
3152 /* Convert back to the original signedness, if OP_TYPE is different
3153 from NEW_TYPE. */
3154 if (op_type != new_type)
3155 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
3156 pattern_stmt, op_vectype);
3158 /* Promote the result to the original type. */
3159 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
3160 pattern_stmt, new_vectype);
3162 return pattern_stmt;
3165 /* Recognize the following patterns:
3167 ATYPE a; // narrower than TYPE
3168 BTYPE b; // narrower than TYPE
3170 1) Multiply high with scaling
3171 TYPE res = ((TYPE) a * (TYPE) b) >> c;
3172 Here, c is bitsize (TYPE) / 2 - 1.
3174 2) ... or also with rounding
3175 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
3176 Here, d is bitsize (TYPE) / 2 - 2.
3178 3) Normal multiply high
3179 TYPE res = ((TYPE) a * (TYPE) b) >> e;
3180 Here, e is bitsize (TYPE) / 2.
3182 where only the bottom half of res is used. */
3184 static gimple *
3185 vect_recog_mulhs_pattern (vec_info *vinfo,
3186 stmt_vec_info last_stmt_info, tree *type_out)
3188 /* Check for a right shift. */
3189 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3190 if (!last_stmt
3191 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
3192 return NULL;
3194 /* Check that the shift result is wider than the users of the
3195 result need (i.e. that narrowing would be a natural choice). */
3196 tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
3197 unsigned int target_precision
3198 = vect_element_precision (last_stmt_info->min_output_precision);
3199 if (!INTEGRAL_TYPE_P (lhs_type)
3200 || target_precision >= TYPE_PRECISION (lhs_type))
3201 return NULL;
3203 /* Look through any change in sign on the outer shift input. */
3204 vect_unpromoted_value unprom_rshift_input;
3205 tree rshift_input = vect_look_through_possible_promotion
3206 (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
3207 if (!rshift_input
3208 || TYPE_PRECISION (TREE_TYPE (rshift_input))
3209 != TYPE_PRECISION (lhs_type))
3210 return NULL;
3212 /* Get the definition of the shift input. */
3213 stmt_vec_info rshift_input_stmt_info
3214 = vect_get_internal_def (vinfo, rshift_input);
3215 if (!rshift_input_stmt_info)
3216 return NULL;
3217 gassign *rshift_input_stmt
3218 = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
3219 if (!rshift_input_stmt)
3220 return NULL;
3222 stmt_vec_info mulh_stmt_info;
3223 tree scale_term;
3224 bool rounding_p = false;
3226 /* Check for the presence of the rounding term. */
3227 if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
3229 /* Check that the outer shift was by 1. */
3230 if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
3231 return NULL;
3233 /* Check that the second operand of the PLUS_EXPR is 1. */
3234 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
3235 return NULL;
3237 /* Look through any change in sign on the addition input. */
3238 vect_unpromoted_value unprom_plus_input;
3239 tree plus_input = vect_look_through_possible_promotion
3240 (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
3241 if (!plus_input
3242 || TYPE_PRECISION (TREE_TYPE (plus_input))
3243 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
3244 return NULL;
3246 /* Get the definition of the multiply-high-scale part. */
3247 stmt_vec_info plus_input_stmt_info
3248 = vect_get_internal_def (vinfo, plus_input);
3249 if (!plus_input_stmt_info)
3250 return NULL;
3251 gassign *plus_input_stmt
3252 = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
3253 if (!plus_input_stmt
3254 || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
3255 return NULL;
3257 /* Look through any change in sign on the scaling input. */
3258 vect_unpromoted_value unprom_scale_input;
3259 tree scale_input = vect_look_through_possible_promotion
3260 (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
3261 if (!scale_input
3262 || TYPE_PRECISION (TREE_TYPE (scale_input))
3263 != TYPE_PRECISION (TREE_TYPE (plus_input)))
3264 return NULL;
3266 /* Get the definition of the multiply-high part. */
3267 mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
3268 if (!mulh_stmt_info)
3269 return NULL;
3271 /* Get the scaling term. */
3272 scale_term = gimple_assign_rhs2 (plus_input_stmt);
3273 rounding_p = true;
3275 else
3277 mulh_stmt_info = rshift_input_stmt_info;
3278 scale_term = gimple_assign_rhs2 (last_stmt);
3281 /* Check that the scaling factor is constant. */
3282 if (TREE_CODE (scale_term) != INTEGER_CST)
3283 return NULL;
3285 /* Check whether the scaling input term can be seen as two widened
3286 inputs multiplied together. */
3287 vect_unpromoted_value unprom_mult[2];
3288 tree new_type;
3289 unsigned int nops
3290 = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
3291 false, 2, unprom_mult, &new_type);
3292 if (nops != 2)
3293 return NULL;
3295 /* Adjust output precision. */
3296 if (TYPE_PRECISION (new_type) < target_precision)
3297 new_type = build_nonstandard_integer_type
3298 (target_precision, TYPE_UNSIGNED (new_type));
3300 unsigned mult_precision = TYPE_PRECISION (new_type);
3301 internal_fn ifn;
3302 /* Check that the scaling factor is expected. Instead of
3303 target_precision, we should use the one that we actually
3304 use for internal function. */
3305 if (rounding_p)
3307 /* Check pattern 2). */
3308 if (wi::to_widest (scale_term) + mult_precision + 2
3309 != TYPE_PRECISION (lhs_type))
3310 return NULL;
3312 ifn = IFN_MULHRS;
3314 else
3316 /* Check for pattern 1). */
3317 if (wi::to_widest (scale_term) + mult_precision + 1
3318 == TYPE_PRECISION (lhs_type))
3319 ifn = IFN_MULHS;
3320 /* Check for pattern 3). */
3321 else if (wi::to_widest (scale_term) + mult_precision
3322 == TYPE_PRECISION (lhs_type))
3323 ifn = IFN_MULH;
3324 else
3325 return NULL;
3328 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
3330 /* Check for target support. */
3331 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3332 if (!new_vectype
3333 || !direct_internal_fn_supported_p
3334 (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
3335 return NULL;
3337 /* The IR requires a valid vector type for the cast result, even though
3338 it's likely to be discarded. */
3339 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
3340 if (!*type_out)
3341 return NULL;
3343 /* Generate the IFN_MULHRS call. */
3344 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
3345 tree new_ops[2];
3346 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
3347 unprom_mult, new_vectype);
3348 gcall *mulhrs_stmt
3349 = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
3350 gimple_call_set_lhs (mulhrs_stmt, new_var);
3351 gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
3353 if (dump_enabled_p ())
3354 dump_printf_loc (MSG_NOTE, vect_location,
3355 "created pattern stmt: %G", (gimple *) mulhrs_stmt);
3357 return vect_convert_output (vinfo, last_stmt_info, lhs_type,
3358 mulhrs_stmt, new_vectype);
3361 /* Recognize the patterns:
3363 ATYPE a; // narrower than TYPE
3364 BTYPE b; // narrower than TYPE
3365 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
3366 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
3368 where only the bottom half of avg is used. Try to transform them into:
3370 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
3371 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
3373 followed by:
3375 TYPE avg = (TYPE) avg';
3377 where NTYPE is no wider than half of TYPE. Since only the bottom half
3378 of avg is used, all or part of the cast of avg' should become redundant.
3380 If there is no target support available, generate code to distribute rshift
3381 over plus and add a carry. */
3383 static gimple *
3384 vect_recog_average_pattern (vec_info *vinfo,
3385 stmt_vec_info last_stmt_info, tree *type_out)
3387 /* Check for a shift right by one bit. */
3388 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3389 if (!last_stmt
3390 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
3391 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
3392 return NULL;
3394 /* Check that the shift result is wider than the users of the
3395 result need (i.e. that narrowing would be a natural choice). */
3396 tree lhs = gimple_assign_lhs (last_stmt);
3397 tree type = TREE_TYPE (lhs);
3398 unsigned int target_precision
3399 = vect_element_precision (last_stmt_info->min_output_precision);
3400 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
3401 return NULL;
3403 /* Look through any change in sign on the shift input. */
3404 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
3405 vect_unpromoted_value unprom_plus;
3406 rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
3407 &unprom_plus);
3408 if (!rshift_rhs
3409 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
3410 return NULL;
3412 /* Get the definition of the shift input. */
3413 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
3414 if (!plus_stmt_info)
3415 return NULL;
3417 /* Check whether the shift input can be seen as a tree of additions on
3418 2 or 3 widened inputs.
3420 Note that the pattern should be a win even if the result of one or
3421 more additions is reused elsewhere: if the pattern matches, we'd be
3422 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
3423 internal_fn ifn = IFN_AVG_FLOOR;
3424 vect_unpromoted_value unprom[3];
3425 tree new_type;
3426 unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
3427 IFN_VEC_WIDEN_PLUS, false, 3,
3428 unprom, &new_type);
3429 if (nops == 0)
3430 return NULL;
3431 if (nops == 3)
3433 /* Check that one operand is 1. */
3434 unsigned int i;
3435 for (i = 0; i < 3; ++i)
3436 if (integer_onep (unprom[i].op))
3437 break;
3438 if (i == 3)
3439 return NULL;
3440 /* Throw away the 1 operand and keep the other two. */
3441 if (i < 2)
3442 unprom[i] = unprom[2];
3443 ifn = IFN_AVG_CEIL;
3446 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
3448 /* We know that:
3450 (a) the operation can be viewed as:
3452 TYPE widened0 = (TYPE) UNPROM[0];
3453 TYPE widened1 = (TYPE) UNPROM[1];
3454 TYPE tmp1 = widened0 + widened1 {+ 1};
3455 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
3457 (b) the first two statements are equivalent to:
3459 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
3460 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
3462 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
3463 where sensible;
3465 (d) all the operations can be performed correctly at twice the width of
3466 NEW_TYPE, due to the nature of the average operation; and
3468 (e) users of the result of the right shift need only TARGET_PRECISION
3469 bits, where TARGET_PRECISION is no more than half of TYPE's
3470 precision.
3472 Under these circumstances, the only situation in which NEW_TYPE
3473 could be narrower than TARGET_PRECISION is if widened0, widened1
3474 and an addition result are all used more than once. Thus we can
3475 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
3476 as "free", whereas widening the result of the average instruction
3477 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
3478 therefore better not to go narrower than TARGET_PRECISION. */
3479 if (TYPE_PRECISION (new_type) < target_precision)
3480 new_type = build_nonstandard_integer_type (target_precision,
3481 TYPE_UNSIGNED (new_type));
3483 /* Check for target support. */
3484 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3485 if (!new_vectype)
3486 return NULL;
3488 bool fallback_p = false;
3490 if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
3492 else if (TYPE_UNSIGNED (new_type)
3493 && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
3494 && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
3495 && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
3496 && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
3497 fallback_p = true;
3498 else
3499 return NULL;
3501 /* The IR requires a valid vector type for the cast result, even though
3502 it's likely to be discarded. */
3503 *type_out = get_vectype_for_scalar_type (vinfo, type);
3504 if (!*type_out)
3505 return NULL;
3507 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
3508 tree new_ops[2];
3509 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
3510 unprom, new_vectype);
3512 if (fallback_p)
3514 /* As a fallback, generate code for following sequence:
3516 shifted_op0 = new_ops[0] >> 1;
3517 shifted_op1 = new_ops[1] >> 1;
3518 sum_of_shifted = shifted_op0 + shifted_op1;
3519 unmasked_carry = new_ops[0] and/or new_ops[1];
3520 carry = unmasked_carry & 1;
3521 new_var = sum_of_shifted + carry;
3524 tree one_cst = build_one_cst (new_type);
3525 gassign *g;
3527 tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
3528 g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
3529 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3531 tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
3532 g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
3533 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3535 tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
3536 g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
3537 shifted_op0, shifted_op1);
3538 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3540 tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
3541 tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
3542 g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
3543 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3545 tree carry = vect_recog_temp_ssa_var (new_type, NULL);
3546 g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
3547 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3549 g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
3550 return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
3553 /* Generate the IFN_AVG* call. */
3554 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
3555 new_ops[1]);
3556 gimple_call_set_lhs (average_stmt, new_var);
3557 gimple_set_location (average_stmt, gimple_location (last_stmt));
3559 if (dump_enabled_p ())
3560 dump_printf_loc (MSG_NOTE, vect_location,
3561 "created pattern stmt: %G", (gimple *) average_stmt);
3563 return vect_convert_output (vinfo, last_stmt_info,
3564 type, average_stmt, new_vectype);
3567 /* Recognize cases in which the input to a cast is wider than its
3568 output, and the input is fed by a widening operation. Fold this
3569 by removing the unnecessary intermediate widening. E.g.:
3571 unsigned char a;
3572 unsigned int b = (unsigned int) a;
3573 unsigned short c = (unsigned short) b;
3577 unsigned short c = (unsigned short) a;
3579 Although this is rare in input IR, it is an expected side-effect
3580 of the over-widening pattern above.
3582 This is beneficial also for integer-to-float conversions, if the
3583 widened integer has more bits than the float, and if the unwidened
3584 input doesn't. */
3586 static gimple *
3587 vect_recog_cast_forwprop_pattern (vec_info *vinfo,
3588 stmt_vec_info last_stmt_info, tree *type_out)
3590 /* Check for a cast, including an integer-to-float conversion. */
3591 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3592 if (!last_stmt)
3593 return NULL;
3594 tree_code code = gimple_assign_rhs_code (last_stmt);
3595 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
3596 return NULL;
3598 /* Make sure that the rhs is a scalar with a natural bitsize. */
3599 tree lhs = gimple_assign_lhs (last_stmt);
3600 if (!lhs)
3601 return NULL;
3602 tree lhs_type = TREE_TYPE (lhs);
3603 scalar_mode lhs_mode;
3604 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
3605 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
3606 return NULL;
3608 /* Check for a narrowing operation (from a vector point of view). */
3609 tree rhs = gimple_assign_rhs1 (last_stmt);
3610 tree rhs_type = TREE_TYPE (rhs);
3611 if (!INTEGRAL_TYPE_P (rhs_type)
3612 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
3613 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
3614 return NULL;
3616 /* Try to find an unpromoted input. */
3617 vect_unpromoted_value unprom;
3618 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
3619 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
3620 return NULL;
3622 /* If the bits above RHS_TYPE matter, make sure that they're the
3623 same when extending from UNPROM as they are when extending from RHS. */
3624 if (!INTEGRAL_TYPE_P (lhs_type)
3625 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
3626 return NULL;
3628 /* We can get the same result by casting UNPROM directly, to avoid
3629 the unnecessary widening and narrowing. */
3630 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
3632 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
3633 if (!*type_out)
3634 return NULL;
3636 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
3637 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
3638 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3640 return pattern_stmt;
3643 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
3644 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
3646 static gimple *
3647 vect_recog_widen_shift_pattern (vec_info *vinfo,
3648 stmt_vec_info last_stmt_info, tree *type_out)
3650 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
3651 LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
3652 "vect_recog_widen_shift_pattern");
3655 /* Detect a rotate pattern wouldn't be otherwise vectorized:
3657 type a_t, b_t, c_t;
3659 S0 a_t = b_t r<< c_t;
3661 Input/Output:
3663 * STMT_VINFO: The stmt from which the pattern search begins,
3664 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
3665 with a sequence:
3667 S1 d_t = -c_t;
3668 S2 e_t = d_t & (B - 1);
3669 S3 f_t = b_t << c_t;
3670 S4 g_t = b_t >> e_t;
3671 S0 a_t = f_t | g_t;
3673 where B is element bitsize of type.
3675 Output:
3677 * TYPE_OUT: The type of the output of this pattern.
3679 * Return value: A new stmt that will be used to replace the rotate
3680 S0 stmt. */
3682 static gimple *
3683 vect_recog_rotate_pattern (vec_info *vinfo,
3684 stmt_vec_info stmt_vinfo, tree *type_out)
3686 gimple *last_stmt = stmt_vinfo->stmt;
3687 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
3688 gimple *pattern_stmt, *def_stmt;
3689 enum tree_code rhs_code;
3690 enum vect_def_type dt;
3691 optab optab1, optab2;
3692 edge ext_def = NULL;
3693 bool bswap16_p = false;
3695 if (is_gimple_assign (last_stmt))
3697 rhs_code = gimple_assign_rhs_code (last_stmt);
3698 switch (rhs_code)
3700 case LROTATE_EXPR:
3701 case RROTATE_EXPR:
3702 break;
3703 default:
3704 return NULL;
3707 lhs = gimple_assign_lhs (last_stmt);
3708 oprnd0 = gimple_assign_rhs1 (last_stmt);
3709 type = TREE_TYPE (oprnd0);
3710 oprnd1 = gimple_assign_rhs2 (last_stmt);
3712 else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
3714 /* __builtin_bswap16 (x) is another form of x r>> 8.
3715 The vectorizer has bswap support, but only if the argument isn't
3716 promoted. */
3717 lhs = gimple_call_lhs (last_stmt);
3718 oprnd0 = gimple_call_arg (last_stmt, 0);
3719 type = TREE_TYPE (oprnd0);
3720 if (!lhs
3721 || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
3722 || TYPE_PRECISION (type) <= 16
3723 || TREE_CODE (oprnd0) != SSA_NAME
3724 || BITS_PER_UNIT != 8)
3725 return NULL;
3727 stmt_vec_info def_stmt_info;
3728 if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
3729 return NULL;
3731 if (dt != vect_internal_def)
3732 return NULL;
3734 if (gimple_assign_cast_p (def_stmt))
3736 def = gimple_assign_rhs1 (def_stmt);
3737 if (INTEGRAL_TYPE_P (TREE_TYPE (def))
3738 && TYPE_PRECISION (TREE_TYPE (def)) == 16)
3739 oprnd0 = def;
3742 type = TREE_TYPE (lhs);
3743 vectype = get_vectype_for_scalar_type (vinfo, type);
3744 if (vectype == NULL_TREE)
3745 return NULL;
3747 if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
3749 /* The encoding uses one stepped pattern for each byte in the
3750 16-bit word. */
3751 vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
3752 for (unsigned i = 0; i < 3; ++i)
3753 for (unsigned j = 0; j < 2; ++j)
3754 elts.quick_push ((i + 1) * 2 - j - 1);
3756 vec_perm_indices indices (elts, 1,
3757 TYPE_VECTOR_SUBPARTS (char_vectype));
3758 machine_mode vmode = TYPE_MODE (char_vectype);
3759 if (can_vec_perm_const_p (vmode, vmode, indices))
3761 /* vectorizable_bswap can handle the __builtin_bswap16 if we
3762 undo the argument promotion. */
3763 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
3765 def = vect_recog_temp_ssa_var (type, NULL);
3766 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3767 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3768 oprnd0 = def;
3771 /* Pattern detected. */
3772 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3774 *type_out = vectype;
3776 /* Pattern supported. Create a stmt to be used to replace the
3777 pattern, with the unpromoted argument. */
3778 var = vect_recog_temp_ssa_var (type, NULL);
3779 pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
3780 1, oprnd0);
3781 gimple_call_set_lhs (pattern_stmt, var);
3782 gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
3783 gimple_call_fntype (last_stmt));
3784 return pattern_stmt;
3788 oprnd1 = build_int_cst (integer_type_node, 8);
3789 rhs_code = LROTATE_EXPR;
3790 bswap16_p = true;
3792 else
3793 return NULL;
3795 if (TREE_CODE (oprnd0) != SSA_NAME
3796 || !INTEGRAL_TYPE_P (type)
3797 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type))
3798 return NULL;
3800 stmt_vec_info def_stmt_info;
3801 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
3802 return NULL;
3804 if (dt != vect_internal_def
3805 && dt != vect_constant_def
3806 && dt != vect_external_def)
3807 return NULL;
3809 vectype = get_vectype_for_scalar_type (vinfo, type);
3810 if (vectype == NULL_TREE)
3811 return NULL;
3813 /* If vector/vector or vector/scalar rotate is supported by the target,
3814 don't do anything here. */
3815 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
3816 if (optab1
3817 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3819 use_rotate:
3820 if (bswap16_p)
3822 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
3824 def = vect_recog_temp_ssa_var (type, NULL);
3825 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3826 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3827 oprnd0 = def;
3830 /* Pattern detected. */
3831 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3833 *type_out = vectype;
3835 /* Pattern supported. Create a stmt to be used to replace the
3836 pattern. */
3837 var = vect_recog_temp_ssa_var (type, NULL);
3838 pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
3839 oprnd1);
3840 return pattern_stmt;
3842 return NULL;
3845 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
3847 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
3848 if (optab2
3849 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3850 goto use_rotate;
3853 tree utype = unsigned_type_for (type);
3854 tree uvectype = get_vectype_for_scalar_type (vinfo, utype);
3855 if (!uvectype)
3856 return NULL;
3858 /* If vector/vector or vector/scalar shifts aren't supported by the target,
3859 don't do anything here either. */
3860 optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_vector);
3861 optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_vector);
3862 if (!optab1
3863 || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
3864 || !optab2
3865 || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
3867 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
3868 return NULL;
3869 optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_scalar);
3870 optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_scalar);
3871 if (!optab1
3872 || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
3873 || !optab2
3874 || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
3875 return NULL;
3878 *type_out = vectype;
3880 if (!useless_type_conversion_p (utype, TREE_TYPE (oprnd0)))
3882 def = vect_recog_temp_ssa_var (utype, NULL);
3883 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3884 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3885 oprnd0 = def;
3888 if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
3889 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
3891 def = NULL_TREE;
3892 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (utype);
3893 if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
3894 def = oprnd1;
3895 else if (def_stmt && gimple_assign_cast_p (def_stmt))
3897 tree rhs1 = gimple_assign_rhs1 (def_stmt);
3898 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
3899 && TYPE_PRECISION (TREE_TYPE (rhs1))
3900 == TYPE_PRECISION (type))
3901 def = rhs1;
3904 if (def == NULL_TREE)
3906 def = vect_recog_temp_ssa_var (utype, NULL);
3907 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
3908 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3910 stype = TREE_TYPE (def);
3912 if (TREE_CODE (def) == INTEGER_CST)
3914 if (!tree_fits_uhwi_p (def)
3915 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
3916 || integer_zerop (def))
3917 return NULL;
3918 def2 = build_int_cst (stype,
3919 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
3921 else
3923 tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
3925 if (vecstype == NULL_TREE)
3926 return NULL;
3927 def2 = vect_recog_temp_ssa_var (stype, NULL);
3928 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
3929 if (ext_def)
3931 basic_block new_bb
3932 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
3933 gcc_assert (!new_bb);
3935 else
3936 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
3938 def2 = vect_recog_temp_ssa_var (stype, NULL);
3939 tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
3940 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
3941 gimple_assign_lhs (def_stmt), mask);
3942 if (ext_def)
3944 basic_block new_bb
3945 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
3946 gcc_assert (!new_bb);
3948 else
3949 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
3952 var1 = vect_recog_temp_ssa_var (utype, NULL);
3953 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
3954 ? LSHIFT_EXPR : RSHIFT_EXPR,
3955 oprnd0, def);
3956 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3958 var2 = vect_recog_temp_ssa_var (utype, NULL);
3959 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
3960 ? RSHIFT_EXPR : LSHIFT_EXPR,
3961 oprnd0, def2);
3962 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3964 /* Pattern detected. */
3965 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3967 /* Pattern supported. Create a stmt to be used to replace the pattern. */
3968 var = vect_recog_temp_ssa_var (utype, NULL);
3969 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
3971 if (!useless_type_conversion_p (type, utype))
3973 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, uvectype);
3974 tree result = vect_recog_temp_ssa_var (type, NULL);
3975 pattern_stmt = gimple_build_assign (result, NOP_EXPR, var);
3977 return pattern_stmt;
3980 /* Detect a vector by vector shift pattern that wouldn't be otherwise
3981 vectorized:
3983 type a_t;
3984 TYPE b_T, res_T;
3986 S1 a_t = ;
3987 S2 b_T = ;
3988 S3 res_T = b_T op a_t;
3990 where type 'TYPE' is a type with different size than 'type',
3991 and op is <<, >> or rotate.
3993 Also detect cases:
3995 type a_t;
3996 TYPE b_T, c_T, res_T;
3998 S0 c_T = ;
3999 S1 a_t = (type) c_T;
4000 S2 b_T = ;
4001 S3 res_T = b_T op a_t;
4003 Input/Output:
4005 * STMT_VINFO: The stmt from which the pattern search begins,
4006 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
4007 with a shift/rotate which has same type on both operands, in the
4008 second case just b_T op c_T, in the first case with added cast
4009 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
4011 Output:
4013 * TYPE_OUT: The type of the output of this pattern.
4015 * Return value: A new stmt that will be used to replace the shift/rotate
4016 S3 stmt. */
4018 static gimple *
4019 vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
4020 stmt_vec_info stmt_vinfo,
4021 tree *type_out)
4023 gimple *last_stmt = stmt_vinfo->stmt;
4024 tree oprnd0, oprnd1, lhs, var;
4025 gimple *pattern_stmt;
4026 enum tree_code rhs_code;
4028 if (!is_gimple_assign (last_stmt))
4029 return NULL;
4031 rhs_code = gimple_assign_rhs_code (last_stmt);
4032 switch (rhs_code)
4034 case LSHIFT_EXPR:
4035 case RSHIFT_EXPR:
4036 case LROTATE_EXPR:
4037 case RROTATE_EXPR:
4038 break;
4039 default:
4040 return NULL;
4043 lhs = gimple_assign_lhs (last_stmt);
4044 oprnd0 = gimple_assign_rhs1 (last_stmt);
4045 oprnd1 = gimple_assign_rhs2 (last_stmt);
4046 if (TREE_CODE (oprnd0) != SSA_NAME
4047 || TREE_CODE (oprnd1) != SSA_NAME
4048 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
4049 || !INTEGRAL_TYPE_P (TREE_TYPE (oprnd0))
4050 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
4051 || TYPE_PRECISION (TREE_TYPE (lhs))
4052 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
4053 return NULL;
4055 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
4056 if (!def_vinfo)
4057 return NULL;
4059 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
4060 if (*type_out == NULL_TREE)
4061 return NULL;
4063 tree def = NULL_TREE;
4064 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
4065 if (def_stmt && gimple_assign_cast_p (def_stmt))
4067 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4068 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
4069 && TYPE_PRECISION (TREE_TYPE (rhs1))
4070 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
4072 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
4073 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
4074 def = rhs1;
4075 else
4077 tree mask
4078 = build_low_bits_mask (TREE_TYPE (rhs1),
4079 TYPE_PRECISION (TREE_TYPE (oprnd1)));
4080 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4081 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
4082 tree vecstype = get_vectype_for_scalar_type (vinfo,
4083 TREE_TYPE (rhs1));
4084 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
4089 if (def == NULL_TREE)
4091 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
4092 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
4093 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4096 /* Pattern detected. */
4097 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
4099 /* Pattern supported. Create a stmt to be used to replace the pattern. */
4100 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
4101 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
4103 return pattern_stmt;
4106 /* Return true iff the target has a vector optab implementing the operation
4107 CODE on type VECTYPE. */
4109 static bool
4110 target_has_vecop_for_code (tree_code code, tree vectype)
4112 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
4113 return voptab
4114 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
4117 /* Verify that the target has optabs of VECTYPE to perform all the steps
4118 needed by the multiplication-by-immediate synthesis algorithm described by
4119 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
4120 present. Return true iff the target supports all the steps. */
4122 static bool
4123 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
4124 tree vectype, bool synth_shift_p)
4126 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
4127 return false;
4129 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
4130 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
4132 if (var == negate_variant
4133 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
4134 return false;
4136 /* If we must synthesize shifts with additions make sure that vector
4137 addition is available. */
4138 if ((var == add_variant || synth_shift_p) && !supports_vplus)
4139 return false;
4141 for (int i = 1; i < alg->ops; i++)
4143 switch (alg->op[i])
4145 case alg_shift:
4146 break;
4147 case alg_add_t_m2:
4148 case alg_add_t2_m:
4149 case alg_add_factor:
4150 if (!supports_vplus)
4151 return false;
4152 break;
4153 case alg_sub_t_m2:
4154 case alg_sub_t2_m:
4155 case alg_sub_factor:
4156 if (!supports_vminus)
4157 return false;
4158 break;
4159 case alg_unknown:
4160 case alg_m:
4161 case alg_zero:
4162 case alg_impossible:
4163 return false;
4164 default:
4165 gcc_unreachable ();
4169 return true;
4172 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
4173 putting the final result in DEST. Append all statements but the last into
4174 VINFO. Return the last statement. */
4176 static gimple *
4177 synth_lshift_by_additions (vec_info *vinfo,
4178 tree dest, tree op, HOST_WIDE_INT amnt,
4179 stmt_vec_info stmt_info)
4181 HOST_WIDE_INT i;
4182 tree itype = TREE_TYPE (op);
4183 tree prev_res = op;
4184 gcc_assert (amnt >= 0);
4185 for (i = 0; i < amnt; i++)
4187 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
4188 : dest;
4189 gimple *stmt
4190 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
4191 prev_res = tmp_var;
4192 if (i < amnt - 1)
4193 append_pattern_def_seq (vinfo, stmt_info, stmt);
4194 else
4195 return stmt;
4197 gcc_unreachable ();
4198 return NULL;
4201 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
4202 CODE to operands OP1 and OP2, creating a new temporary SSA var in
4203 the process if necessary. Append the resulting assignment statements
4204 to the sequence in STMT_VINFO. Return the SSA variable that holds the
4205 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
4206 left shifts using additions. */
4208 static tree
4209 apply_binop_and_append_stmt (vec_info *vinfo,
4210 tree_code code, tree op1, tree op2,
4211 stmt_vec_info stmt_vinfo, bool synth_shift_p)
4213 if (integer_zerop (op2)
4214 && (code == LSHIFT_EXPR
4215 || code == PLUS_EXPR))
4217 gcc_assert (TREE_CODE (op1) == SSA_NAME);
4218 return op1;
4221 gimple *stmt;
4222 tree itype = TREE_TYPE (op1);
4223 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
4225 if (code == LSHIFT_EXPR
4226 && synth_shift_p)
4228 stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
4229 TREE_INT_CST_LOW (op2), stmt_vinfo);
4230 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4231 return tmp_var;
4234 stmt = gimple_build_assign (tmp_var, code, op1, op2);
4235 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4236 return tmp_var;
4239 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
4240 and simple arithmetic operations to be vectorized. Record the statements
4241 produced in STMT_VINFO and return the last statement in the sequence or
4242 NULL if it's not possible to synthesize such a multiplication.
4243 This function mirrors the behavior of expand_mult_const in expmed.cc but
4244 works on tree-ssa form. */
4246 static gimple *
4247 vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
4248 stmt_vec_info stmt_vinfo)
4250 tree itype = TREE_TYPE (op);
4251 machine_mode mode = TYPE_MODE (itype);
4252 struct algorithm alg;
4253 mult_variant variant;
4254 if (!tree_fits_shwi_p (val))
4255 return NULL;
4257 /* Multiplication synthesis by shifts, adds and subs can introduce
4258 signed overflow where the original operation didn't. Perform the
4259 operations on an unsigned type and cast back to avoid this.
4260 In the future we may want to relax this for synthesis algorithms
4261 that we can prove do not cause unexpected overflow. */
4262 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
4264 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
4265 tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
4266 if (!vectype)
4267 return NULL;
4269 /* Targets that don't support vector shifts but support vector additions
4270 can synthesize shifts that way. */
4271 bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
4273 HOST_WIDE_INT hwval = tree_to_shwi (val);
4274 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
4275 The vectorizer's benefit analysis will decide whether it's beneficial
4276 to do this. */
4277 bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
4278 ? TYPE_MODE (vectype) : mode,
4279 hwval, &alg, &variant, MAX_COST);
4280 if (!possible)
4281 return NULL;
4283 if (!target_supports_mult_synth_alg (&alg, variant, vectype, synth_shift_p))
4284 return NULL;
4286 tree accumulator;
4288 /* Clear out the sequence of statements so we can populate it below. */
4289 gimple *stmt = NULL;
4291 if (cast_to_unsigned_p)
4293 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
4294 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
4295 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4296 op = tmp_op;
4299 if (alg.op[0] == alg_zero)
4300 accumulator = build_int_cst (multtype, 0);
4301 else
4302 accumulator = op;
4304 bool needs_fixup = (variant == negate_variant)
4305 || (variant == add_variant);
4307 for (int i = 1; i < alg.ops; i++)
4309 tree shft_log = build_int_cst (multtype, alg.log[i]);
4310 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4311 tree tmp_var = NULL_TREE;
4313 switch (alg.op[i])
4315 case alg_shift:
4316 if (synth_shift_p)
4317 stmt
4318 = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
4319 alg.log[i], stmt_vinfo);
4320 else
4321 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
4322 shft_log);
4323 break;
4324 case alg_add_t_m2:
4325 tmp_var
4326 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
4327 stmt_vinfo, synth_shift_p);
4328 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
4329 tmp_var);
4330 break;
4331 case alg_sub_t_m2:
4332 tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
4333 shft_log, stmt_vinfo,
4334 synth_shift_p);
4335 /* In some algorithms the first step involves zeroing the
4336 accumulator. If subtracting from such an accumulator
4337 just emit the negation directly. */
4338 if (integer_zerop (accumulator))
4339 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
4340 else
4341 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
4342 tmp_var);
4343 break;
4344 case alg_add_t2_m:
4345 tmp_var
4346 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4347 shft_log, stmt_vinfo, synth_shift_p);
4348 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
4349 break;
4350 case alg_sub_t2_m:
4351 tmp_var
4352 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4353 shft_log, stmt_vinfo, synth_shift_p);
4354 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
4355 break;
4356 case alg_add_factor:
4357 tmp_var
4358 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4359 shft_log, stmt_vinfo, synth_shift_p);
4360 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
4361 tmp_var);
4362 break;
4363 case alg_sub_factor:
4364 tmp_var
4365 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4366 shft_log, stmt_vinfo, synth_shift_p);
4367 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
4368 accumulator);
4369 break;
4370 default:
4371 gcc_unreachable ();
4373 /* We don't want to append the last stmt in the sequence to stmt_vinfo
4374 but rather return it directly. */
4376 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
4377 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4378 accumulator = accum_tmp;
4380 if (variant == negate_variant)
4382 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4383 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
4384 accumulator = accum_tmp;
4385 if (cast_to_unsigned_p)
4386 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4388 else if (variant == add_variant)
4390 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4391 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
4392 accumulator = accum_tmp;
4393 if (cast_to_unsigned_p)
4394 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4396 /* Move back to a signed if needed. */
4397 if (cast_to_unsigned_p)
4399 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
4400 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
4403 return stmt;
4406 /* Detect multiplication by constant and convert it into a sequence of
4407 shifts and additions, subtractions, negations. We reuse the
4408 choose_mult_variant algorithms from expmed.cc
4410 Input/Output:
4412 STMT_VINFO: The stmt from which the pattern search begins,
4413 i.e. the mult stmt.
4415 Output:
4417 * TYPE_OUT: The type of the output of this pattern.
4419 * Return value: A new stmt that will be used to replace
4420 the multiplication. */
4422 static gimple *
4423 vect_recog_mult_pattern (vec_info *vinfo,
4424 stmt_vec_info stmt_vinfo, tree *type_out)
4426 gimple *last_stmt = stmt_vinfo->stmt;
4427 tree oprnd0, oprnd1, vectype, itype;
4428 gimple *pattern_stmt;
4430 if (!is_gimple_assign (last_stmt))
4431 return NULL;
4433 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
4434 return NULL;
4436 oprnd0 = gimple_assign_rhs1 (last_stmt);
4437 oprnd1 = gimple_assign_rhs2 (last_stmt);
4438 itype = TREE_TYPE (oprnd0);
4440 if (TREE_CODE (oprnd0) != SSA_NAME
4441 || TREE_CODE (oprnd1) != INTEGER_CST
4442 || !INTEGRAL_TYPE_P (itype)
4443 || !type_has_mode_precision_p (itype))
4444 return NULL;
4446 vectype = get_vectype_for_scalar_type (vinfo, itype);
4447 if (vectype == NULL_TREE)
4448 return NULL;
4450 /* If the target can handle vectorized multiplication natively,
4451 don't attempt to optimize this. */
4452 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
4453 if (mul_optab != unknown_optab)
4455 machine_mode vec_mode = TYPE_MODE (vectype);
4456 int icode = (int) optab_handler (mul_optab, vec_mode);
4457 if (icode != CODE_FOR_nothing)
4458 return NULL;
4461 pattern_stmt = vect_synth_mult_by_constant (vinfo,
4462 oprnd0, oprnd1, stmt_vinfo);
4463 if (!pattern_stmt)
4464 return NULL;
4466 /* Pattern detected. */
4467 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
4469 *type_out = vectype;
4471 return pattern_stmt;
4474 /* Detect a signed division by a constant that wouldn't be
4475 otherwise vectorized:
4477 type a_t, b_t;
4479 S1 a_t = b_t / N;
4481 where type 'type' is an integral type and N is a constant.
4483 Similarly handle modulo by a constant:
4485 S4 a_t = b_t % N;
4487 Input/Output:
4489 * STMT_VINFO: The stmt from which the pattern search begins,
4490 i.e. the division stmt. S1 is replaced by if N is a power
4491 of two constant and type is signed:
4492 S3 y_t = b_t < 0 ? N - 1 : 0;
4493 S2 x_t = b_t + y_t;
4494 S1' a_t = x_t >> log2 (N);
4496 S4 is replaced if N is a power of two constant and
4497 type is signed by (where *_T temporaries have unsigned type):
4498 S9 y_T = b_t < 0 ? -1U : 0U;
4499 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
4500 S7 z_t = (type) z_T;
4501 S6 w_t = b_t + z_t;
4502 S5 x_t = w_t & (N - 1);
4503 S4' a_t = x_t - z_t;
4505 Output:
4507 * TYPE_OUT: The type of the output of this pattern.
4509 * Return value: A new stmt that will be used to replace the division
4510 S1 or modulo S4 stmt. */
4512 static gimple *
4513 vect_recog_divmod_pattern (vec_info *vinfo,
4514 stmt_vec_info stmt_vinfo, tree *type_out)
4516 gimple *last_stmt = stmt_vinfo->stmt;
4517 tree oprnd0, oprnd1, vectype, itype, cond;
4518 gimple *pattern_stmt, *def_stmt;
4519 enum tree_code rhs_code;
4520 optab optab;
4521 tree q, cst;
4522 int dummy_int, prec;
4524 if (!is_gimple_assign (last_stmt))
4525 return NULL;
4527 rhs_code = gimple_assign_rhs_code (last_stmt);
4528 switch (rhs_code)
4530 case TRUNC_DIV_EXPR:
4531 case EXACT_DIV_EXPR:
4532 case TRUNC_MOD_EXPR:
4533 break;
4534 default:
4535 return NULL;
4538 oprnd0 = gimple_assign_rhs1 (last_stmt);
4539 oprnd1 = gimple_assign_rhs2 (last_stmt);
4540 itype = TREE_TYPE (oprnd0);
4541 if (TREE_CODE (oprnd0) != SSA_NAME
4542 || TREE_CODE (oprnd1) != INTEGER_CST
4543 || TREE_CODE (itype) != INTEGER_TYPE
4544 || !type_has_mode_precision_p (itype))
4545 return NULL;
4547 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
4548 vectype = get_vectype_for_scalar_type (vinfo, itype);
4549 if (vectype == NULL_TREE)
4550 return NULL;
4552 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
4554 /* If the target can handle vectorized division or modulo natively,
4555 don't attempt to optimize this, since native division is likely
4556 to give smaller code. */
4557 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
4558 if (optab != unknown_optab)
4560 machine_mode vec_mode = TYPE_MODE (vectype);
4561 int icode = (int) optab_handler (optab, vec_mode);
4562 if (icode != CODE_FOR_nothing)
4563 return NULL;
4567 prec = TYPE_PRECISION (itype);
4568 if (integer_pow2p (oprnd1))
4570 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
4571 return NULL;
4573 /* Pattern detected. */
4574 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
4576 *type_out = vectype;
4578 /* Check if the target supports this internal function. */
4579 internal_fn ifn = IFN_DIV_POW2;
4580 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
4582 tree shift = build_int_cst (itype, tree_log2 (oprnd1));
4584 tree var_div = vect_recog_temp_ssa_var (itype, NULL);
4585 gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
4586 gimple_call_set_lhs (div_stmt, var_div);
4588 if (rhs_code == TRUNC_MOD_EXPR)
4590 append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
4591 def_stmt
4592 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4593 LSHIFT_EXPR, var_div, shift);
4594 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4595 pattern_stmt
4596 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4597 MINUS_EXPR, oprnd0,
4598 gimple_assign_lhs (def_stmt));
4600 else
4601 pattern_stmt = div_stmt;
4602 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
4604 return pattern_stmt;
4607 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
4608 build_int_cst (itype, 0));
4609 if (rhs_code == TRUNC_DIV_EXPR
4610 || rhs_code == EXACT_DIV_EXPR)
4612 tree var = vect_recog_temp_ssa_var (itype, NULL);
4613 tree shift;
4614 def_stmt
4615 = gimple_build_assign (var, COND_EXPR, cond,
4616 fold_build2 (MINUS_EXPR, itype, oprnd1,
4617 build_int_cst (itype, 1)),
4618 build_int_cst (itype, 0));
4619 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4620 var = vect_recog_temp_ssa_var (itype, NULL);
4621 def_stmt
4622 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
4623 gimple_assign_lhs (def_stmt));
4624 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4626 shift = build_int_cst (itype, tree_log2 (oprnd1));
4627 pattern_stmt
4628 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4629 RSHIFT_EXPR, var, shift);
4631 else
4633 tree signmask;
4634 if (compare_tree_int (oprnd1, 2) == 0)
4636 signmask = vect_recog_temp_ssa_var (itype, NULL);
4637 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
4638 build_int_cst (itype, 1),
4639 build_int_cst (itype, 0));
4640 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4642 else
4644 tree utype
4645 = build_nonstandard_integer_type (prec, 1);
4646 tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
4647 tree shift
4648 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
4649 - tree_log2 (oprnd1));
4650 tree var = vect_recog_temp_ssa_var (utype, NULL);
4652 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
4653 build_int_cst (utype, -1),
4654 build_int_cst (utype, 0));
4655 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
4656 var = vect_recog_temp_ssa_var (utype, NULL);
4657 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
4658 gimple_assign_lhs (def_stmt),
4659 shift);
4660 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
4661 signmask = vect_recog_temp_ssa_var (itype, NULL);
4662 def_stmt
4663 = gimple_build_assign (signmask, NOP_EXPR, var);
4664 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4666 def_stmt
4667 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4668 PLUS_EXPR, oprnd0, signmask);
4669 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4670 def_stmt
4671 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4672 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
4673 fold_build2 (MINUS_EXPR, itype, oprnd1,
4674 build_int_cst (itype, 1)));
4675 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4677 pattern_stmt
4678 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4679 MINUS_EXPR, gimple_assign_lhs (def_stmt),
4680 signmask);
4683 return pattern_stmt;
4686 if ((cst = uniform_integer_cst_p (oprnd1))
4687 && TYPE_UNSIGNED (itype)
4688 && rhs_code == TRUNC_DIV_EXPR
4689 && vectype
4690 && targetm.vectorize.preferred_div_as_shifts_over_mult (vectype))
4692 /* We can use the relationship:
4694 x // N == ((x+N+2) // (N+1) + x) // (N+1) for 0 <= x < N(N+3)
4696 to optimize cases where N+1 is a power of 2, and where // (N+1)
4697 is therefore a shift right. When operating in modes that are
4698 multiples of a byte in size, there are two cases:
4700 (1) N(N+3) is not representable, in which case the question
4701 becomes whether the replacement expression overflows.
4702 It is enough to test that x+N+2 does not overflow,
4703 i.e. that x < MAX-(N+1).
4705 (2) N(N+3) is representable, in which case it is the (only)
4706 bound that we need to check.
4708 ??? For now we just handle the case where // (N+1) is a shift
4709 right by half the precision, since some architectures can
4710 optimize the associated addition and shift combinations
4711 into single instructions. */
4713 auto wcst = wi::to_wide (cst);
4714 int pow = wi::exact_log2 (wcst + 1);
4715 if (pow == prec / 2)
4717 gimple *stmt = SSA_NAME_DEF_STMT (oprnd0);
4719 gimple_ranger ranger;
4720 int_range_max r;
4722 /* Check that no overflow will occur. If we don't have range
4723 information we can't perform the optimization. */
4725 if (ranger.range_of_expr (r, oprnd0, stmt) && !r.undefined_p ())
4727 wide_int max = r.upper_bound ();
4728 wide_int one = wi::shwi (1, prec);
4729 wide_int adder = wi::add (one, wi::lshift (one, pow));
4730 wi::overflow_type ovf;
4731 wi::add (max, adder, UNSIGNED, &ovf);
4732 if (ovf == wi::OVF_NONE)
4734 *type_out = vectype;
4735 tree tadder = wide_int_to_tree (itype, adder);
4736 tree rshift = wide_int_to_tree (itype, pow);
4738 tree new_lhs1 = vect_recog_temp_ssa_var (itype, NULL);
4739 gassign *patt1
4740 = gimple_build_assign (new_lhs1, PLUS_EXPR, oprnd0, tadder);
4741 append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
4743 tree new_lhs2 = vect_recog_temp_ssa_var (itype, NULL);
4744 patt1 = gimple_build_assign (new_lhs2, RSHIFT_EXPR, new_lhs1,
4745 rshift);
4746 append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
4748 tree new_lhs3 = vect_recog_temp_ssa_var (itype, NULL);
4749 patt1 = gimple_build_assign (new_lhs3, PLUS_EXPR, new_lhs2,
4750 oprnd0);
4751 append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
4753 tree new_lhs4 = vect_recog_temp_ssa_var (itype, NULL);
4754 pattern_stmt = gimple_build_assign (new_lhs4, RSHIFT_EXPR,
4755 new_lhs3, rshift);
4757 return pattern_stmt;
4763 if (prec > HOST_BITS_PER_WIDE_INT
4764 || integer_zerop (oprnd1))
4765 return NULL;
4767 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
4768 return NULL;
4770 if (TYPE_UNSIGNED (itype))
4772 unsigned HOST_WIDE_INT mh, ml;
4773 int pre_shift, post_shift;
4774 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
4775 & GET_MODE_MASK (itype_mode));
4776 tree t1, t2, t3, t4;
4778 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
4779 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
4780 return NULL;
4782 /* Find a suitable multiplier and right shift count
4783 instead of multiplying with D. */
4784 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
4786 /* If the suggested multiplier is more than SIZE bits, we can do better
4787 for even divisors, using an initial right shift. */
4788 if (mh != 0 && (d & 1) == 0)
4790 pre_shift = ctz_or_zero (d);
4791 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
4792 &ml, &post_shift, &dummy_int);
4793 gcc_assert (!mh);
4795 else
4796 pre_shift = 0;
4798 if (mh != 0)
4800 if (post_shift - 1 >= prec)
4801 return NULL;
4803 /* t1 = oprnd0 h* ml;
4804 t2 = oprnd0 - t1;
4805 t3 = t2 >> 1;
4806 t4 = t1 + t3;
4807 q = t4 >> (post_shift - 1); */
4808 t1 = vect_recog_temp_ssa_var (itype, NULL);
4809 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
4810 build_int_cst (itype, ml));
4811 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4813 t2 = vect_recog_temp_ssa_var (itype, NULL);
4814 def_stmt
4815 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
4816 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4818 t3 = vect_recog_temp_ssa_var (itype, NULL);
4819 def_stmt
4820 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
4821 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4823 t4 = vect_recog_temp_ssa_var (itype, NULL);
4824 def_stmt
4825 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
4827 if (post_shift != 1)
4829 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4831 q = vect_recog_temp_ssa_var (itype, NULL);
4832 pattern_stmt
4833 = gimple_build_assign (q, RSHIFT_EXPR, t4,
4834 build_int_cst (itype, post_shift - 1));
4836 else
4838 q = t4;
4839 pattern_stmt = def_stmt;
4842 else
4844 if (pre_shift >= prec || post_shift >= prec)
4845 return NULL;
4847 /* t1 = oprnd0 >> pre_shift;
4848 t2 = t1 h* ml;
4849 q = t2 >> post_shift; */
4850 if (pre_shift)
4852 t1 = vect_recog_temp_ssa_var (itype, NULL);
4853 def_stmt
4854 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
4855 build_int_cst (NULL, pre_shift));
4856 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4858 else
4859 t1 = oprnd0;
4861 t2 = vect_recog_temp_ssa_var (itype, NULL);
4862 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
4863 build_int_cst (itype, ml));
4865 if (post_shift)
4867 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4869 q = vect_recog_temp_ssa_var (itype, NULL);
4870 def_stmt
4871 = gimple_build_assign (q, RSHIFT_EXPR, t2,
4872 build_int_cst (itype, post_shift));
4874 else
4875 q = t2;
4877 pattern_stmt = def_stmt;
4880 else
4882 unsigned HOST_WIDE_INT ml;
4883 int post_shift;
4884 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
4885 unsigned HOST_WIDE_INT abs_d;
4886 bool add = false;
4887 tree t1, t2, t3, t4;
4889 /* Give up for -1. */
4890 if (d == -1)
4891 return NULL;
4893 /* Since d might be INT_MIN, we have to cast to
4894 unsigned HOST_WIDE_INT before negating to avoid
4895 undefined signed overflow. */
4896 abs_d = (d >= 0
4897 ? (unsigned HOST_WIDE_INT) d
4898 : - (unsigned HOST_WIDE_INT) d);
4900 /* n rem d = n rem -d */
4901 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
4903 d = abs_d;
4904 oprnd1 = build_int_cst (itype, abs_d);
4906 if (HOST_BITS_PER_WIDE_INT >= prec
4907 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
4908 /* This case is not handled correctly below. */
4909 return NULL;
4911 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
4912 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
4914 add = true;
4915 ml |= HOST_WIDE_INT_M1U << (prec - 1);
4917 if (post_shift >= prec)
4918 return NULL;
4920 /* t1 = oprnd0 h* ml; */
4921 t1 = vect_recog_temp_ssa_var (itype, NULL);
4922 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
4923 build_int_cst (itype, ml));
4925 if (add)
4927 /* t2 = t1 + oprnd0; */
4928 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4929 t2 = vect_recog_temp_ssa_var (itype, NULL);
4930 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
4932 else
4933 t2 = t1;
4935 if (post_shift)
4937 /* t3 = t2 >> post_shift; */
4938 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4939 t3 = vect_recog_temp_ssa_var (itype, NULL);
4940 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
4941 build_int_cst (itype, post_shift));
4943 else
4944 t3 = t2;
4946 int msb = 1;
4947 value_range r;
4948 get_range_query (cfun)->range_of_expr (r, oprnd0);
4949 if (!r.varying_p () && !r.undefined_p ())
4951 if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
4952 msb = 0;
4953 else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
4954 msb = -1;
4957 if (msb == 0 && d >= 0)
4959 /* q = t3; */
4960 q = t3;
4961 pattern_stmt = def_stmt;
4963 else
4965 /* t4 = oprnd0 >> (prec - 1);
4966 or if we know from VRP that oprnd0 >= 0
4967 t4 = 0;
4968 or if we know from VRP that oprnd0 < 0
4969 t4 = -1; */
4970 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4971 t4 = vect_recog_temp_ssa_var (itype, NULL);
4972 if (msb != 1)
4973 def_stmt = gimple_build_assign (t4, INTEGER_CST,
4974 build_int_cst (itype, msb));
4975 else
4976 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
4977 build_int_cst (itype, prec - 1));
4978 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4980 /* q = t3 - t4; or q = t4 - t3; */
4981 q = vect_recog_temp_ssa_var (itype, NULL);
4982 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
4983 d < 0 ? t3 : t4);
4987 if (rhs_code == TRUNC_MOD_EXPR)
4989 tree r, t1;
4991 /* We divided. Now finish by:
4992 t1 = q * oprnd1;
4993 r = oprnd0 - t1; */
4994 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
4996 t1 = vect_recog_temp_ssa_var (itype, NULL);
4997 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
4998 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5000 r = vect_recog_temp_ssa_var (itype, NULL);
5001 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
5004 /* Pattern detected. */
5005 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
5007 *type_out = vectype;
5008 return pattern_stmt;
5011 /* Function vect_recog_mixed_size_cond_pattern
5013 Try to find the following pattern:
5015 type x_t, y_t;
5016 TYPE a_T, b_T, c_T;
5017 loop:
5018 S1 a_T = x_t CMP y_t ? b_T : c_T;
5020 where type 'TYPE' is an integral type which has different size
5021 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
5022 than 'type', the constants need to fit into an integer type
5023 with the same width as 'type') or results of conversion from 'type'.
5025 Input:
5027 * STMT_VINFO: The stmt from which the pattern search begins.
5029 Output:
5031 * TYPE_OUT: The type of the output of this pattern.
5033 * Return value: A new stmt that will be used to replace the pattern.
5034 Additionally a def_stmt is added.
5036 a_it = x_t CMP y_t ? b_it : c_it;
5037 a_T = (TYPE) a_it; */
5039 static gimple *
5040 vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
5041 stmt_vec_info stmt_vinfo, tree *type_out)
5043 gimple *last_stmt = stmt_vinfo->stmt;
5044 tree cond_expr, then_clause, else_clause;
5045 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
5046 gimple *pattern_stmt, *def_stmt;
5047 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
5048 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
5049 bool promotion;
5050 tree comp_scalar_type;
5052 if (!is_gimple_assign (last_stmt)
5053 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
5054 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
5055 return NULL;
5057 cond_expr = gimple_assign_rhs1 (last_stmt);
5058 then_clause = gimple_assign_rhs2 (last_stmt);
5059 else_clause = gimple_assign_rhs3 (last_stmt);
5061 if (!COMPARISON_CLASS_P (cond_expr))
5062 return NULL;
5064 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
5065 comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
5066 if (comp_vectype == NULL_TREE)
5067 return NULL;
5069 type = TREE_TYPE (gimple_assign_lhs (last_stmt));
5070 if (types_compatible_p (type, comp_scalar_type)
5071 || ((TREE_CODE (then_clause) != INTEGER_CST
5072 || TREE_CODE (else_clause) != INTEGER_CST)
5073 && !INTEGRAL_TYPE_P (comp_scalar_type))
5074 || !INTEGRAL_TYPE_P (type))
5075 return NULL;
5077 if ((TREE_CODE (then_clause) != INTEGER_CST
5078 && !type_conversion_p (vinfo, then_clause, false,
5079 &orig_type0, &def_stmt0, &promotion))
5080 || (TREE_CODE (else_clause) != INTEGER_CST
5081 && !type_conversion_p (vinfo, else_clause, false,
5082 &orig_type1, &def_stmt1, &promotion)))
5083 return NULL;
5085 if (orig_type0 && orig_type1
5086 && !types_compatible_p (orig_type0, orig_type1))
5087 return NULL;
5089 if (orig_type0)
5091 if (!types_compatible_p (orig_type0, comp_scalar_type))
5092 return NULL;
5093 then_clause = gimple_assign_rhs1 (def_stmt0);
5094 itype = orig_type0;
5097 if (orig_type1)
5099 if (!types_compatible_p (orig_type1, comp_scalar_type))
5100 return NULL;
5101 else_clause = gimple_assign_rhs1 (def_stmt1);
5102 itype = orig_type1;
5106 HOST_WIDE_INT cmp_mode_size
5107 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
5109 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
5110 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
5111 return NULL;
5113 vectype = get_vectype_for_scalar_type (vinfo, type);
5114 if (vectype == NULL_TREE)
5115 return NULL;
5117 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
5118 return NULL;
5120 if (itype == NULL_TREE)
5121 itype = build_nonstandard_integer_type (cmp_mode_size,
5122 TYPE_UNSIGNED (type));
5124 if (itype == NULL_TREE
5125 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
5126 return NULL;
5128 vecitype = get_vectype_for_scalar_type (vinfo, itype);
5129 if (vecitype == NULL_TREE)
5130 return NULL;
5132 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
5133 return NULL;
5135 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
5137 if ((TREE_CODE (then_clause) == INTEGER_CST
5138 && !int_fits_type_p (then_clause, itype))
5139 || (TREE_CODE (else_clause) == INTEGER_CST
5140 && !int_fits_type_p (else_clause, itype)))
5141 return NULL;
5144 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5145 COND_EXPR, unshare_expr (cond_expr),
5146 fold_convert (itype, then_clause),
5147 fold_convert (itype, else_clause));
5148 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
5149 NOP_EXPR, gimple_assign_lhs (def_stmt));
5151 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
5152 *type_out = vectype;
5154 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
5156 return pattern_stmt;
5160 /* Helper function of vect_recog_bool_pattern. Called recursively, return
5161 true if bool VAR can and should be optimized that way. Assume it shouldn't
5162 in case it's a result of a comparison which can be directly vectorized into
5163 a vector comparison. Fills in STMTS with all stmts visited during the
5164 walk. */
5166 static bool
5167 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
5169 tree rhs1;
5170 enum tree_code rhs_code;
5172 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
5173 if (!def_stmt_info)
5174 return false;
5176 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
5177 if (!def_stmt)
5178 return false;
5180 if (stmts.contains (def_stmt))
5181 return true;
5183 rhs1 = gimple_assign_rhs1 (def_stmt);
5184 rhs_code = gimple_assign_rhs_code (def_stmt);
5185 switch (rhs_code)
5187 case SSA_NAME:
5188 if (! check_bool_pattern (rhs1, vinfo, stmts))
5189 return false;
5190 break;
5192 CASE_CONVERT:
5193 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
5194 return false;
5195 if (! check_bool_pattern (rhs1, vinfo, stmts))
5196 return false;
5197 break;
5199 case BIT_NOT_EXPR:
5200 if (! check_bool_pattern (rhs1, vinfo, stmts))
5201 return false;
5202 break;
5204 case BIT_AND_EXPR:
5205 case BIT_IOR_EXPR:
5206 case BIT_XOR_EXPR:
5207 if (! check_bool_pattern (rhs1, vinfo, stmts)
5208 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
5209 return false;
5210 break;
5212 default:
5213 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5215 tree vecitype, comp_vectype;
5217 /* If the comparison can throw, then is_gimple_condexpr will be
5218 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
5219 if (stmt_could_throw_p (cfun, def_stmt))
5220 return false;
5222 comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
5223 if (comp_vectype == NULL_TREE)
5224 return false;
5226 tree mask_type = get_mask_type_for_scalar_type (vinfo,
5227 TREE_TYPE (rhs1));
5228 if (mask_type
5229 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
5230 return false;
5232 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
5234 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
5235 tree itype
5236 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
5237 vecitype = get_vectype_for_scalar_type (vinfo, itype);
5238 if (vecitype == NULL_TREE)
5239 return false;
5241 else
5242 vecitype = comp_vectype;
5243 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
5244 return false;
5246 else
5247 return false;
5248 break;
5251 bool res = stmts.add (def_stmt);
5252 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
5253 gcc_assert (!res);
5255 return true;
5259 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
5260 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
5261 pattern sequence. */
5263 static tree
5264 adjust_bool_pattern_cast (vec_info *vinfo,
5265 tree type, tree var, stmt_vec_info stmt_info)
5267 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
5268 NOP_EXPR, var);
5269 append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
5270 get_vectype_for_scalar_type (vinfo, type));
5271 return gimple_assign_lhs (cast_stmt);
5274 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
5275 VAR is an SSA_NAME that should be transformed from bool to a wider integer
5276 type, OUT_TYPE is the desired final integer type of the whole pattern.
5277 STMT_INFO is the info of the pattern root and is where pattern stmts should
5278 be associated with. DEFS is a map of pattern defs. */
5280 static void
5281 adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
5282 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
5284 gimple *stmt = SSA_NAME_DEF_STMT (var);
5285 enum tree_code rhs_code, def_rhs_code;
5286 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
5287 location_t loc;
5288 gimple *pattern_stmt, *def_stmt;
5289 tree trueval = NULL_TREE;
5291 rhs1 = gimple_assign_rhs1 (stmt);
5292 rhs2 = gimple_assign_rhs2 (stmt);
5293 rhs_code = gimple_assign_rhs_code (stmt);
5294 loc = gimple_location (stmt);
5295 switch (rhs_code)
5297 case SSA_NAME:
5298 CASE_CONVERT:
5299 irhs1 = *defs.get (rhs1);
5300 itype = TREE_TYPE (irhs1);
5301 pattern_stmt
5302 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5303 SSA_NAME, irhs1);
5304 break;
5306 case BIT_NOT_EXPR:
5307 irhs1 = *defs.get (rhs1);
5308 itype = TREE_TYPE (irhs1);
5309 pattern_stmt
5310 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5311 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
5312 break;
5314 case BIT_AND_EXPR:
5315 /* Try to optimize x = y & (a < b ? 1 : 0); into
5316 x = (a < b ? y : 0);
5318 E.g. for:
5319 bool a_b, b_b, c_b;
5320 TYPE d_T;
5322 S1 a_b = x1 CMP1 y1;
5323 S2 b_b = x2 CMP2 y2;
5324 S3 c_b = a_b & b_b;
5325 S4 d_T = (TYPE) c_b;
5327 we would normally emit:
5329 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5330 S2' b_T = x2 CMP2 y2 ? 1 : 0;
5331 S3' c_T = a_T & b_T;
5332 S4' d_T = c_T;
5334 but we can save one stmt by using the
5335 result of one of the COND_EXPRs in the other COND_EXPR and leave
5336 BIT_AND_EXPR stmt out:
5338 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5339 S3' c_T = x2 CMP2 y2 ? a_T : 0;
5340 S4' f_T = c_T;
5342 At least when VEC_COND_EXPR is implemented using masks
5343 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
5344 computes the comparison masks and ands it, in one case with
5345 all ones vector, in the other case with a vector register.
5346 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
5347 often more expensive. */
5348 def_stmt = SSA_NAME_DEF_STMT (rhs2);
5349 def_rhs_code = gimple_assign_rhs_code (def_stmt);
5350 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
5352 irhs1 = *defs.get (rhs1);
5353 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
5354 if (TYPE_PRECISION (TREE_TYPE (irhs1))
5355 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
5357 rhs_code = def_rhs_code;
5358 rhs1 = def_rhs1;
5359 rhs2 = gimple_assign_rhs2 (def_stmt);
5360 trueval = irhs1;
5361 goto do_compare;
5363 else
5364 irhs2 = *defs.get (rhs2);
5365 goto and_ior_xor;
5367 def_stmt = SSA_NAME_DEF_STMT (rhs1);
5368 def_rhs_code = gimple_assign_rhs_code (def_stmt);
5369 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
5371 irhs2 = *defs.get (rhs2);
5372 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
5373 if (TYPE_PRECISION (TREE_TYPE (irhs2))
5374 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
5376 rhs_code = def_rhs_code;
5377 rhs1 = def_rhs1;
5378 rhs2 = gimple_assign_rhs2 (def_stmt);
5379 trueval = irhs2;
5380 goto do_compare;
5382 else
5383 irhs1 = *defs.get (rhs1);
5384 goto and_ior_xor;
5386 /* FALLTHRU */
5387 case BIT_IOR_EXPR:
5388 case BIT_XOR_EXPR:
5389 irhs1 = *defs.get (rhs1);
5390 irhs2 = *defs.get (rhs2);
5391 and_ior_xor:
5392 if (TYPE_PRECISION (TREE_TYPE (irhs1))
5393 != TYPE_PRECISION (TREE_TYPE (irhs2)))
5395 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
5396 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
5397 int out_prec = TYPE_PRECISION (out_type);
5398 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
5399 irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
5400 stmt_info);
5401 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
5402 irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
5403 stmt_info);
5404 else
5406 irhs1 = adjust_bool_pattern_cast (vinfo,
5407 out_type, irhs1, stmt_info);
5408 irhs2 = adjust_bool_pattern_cast (vinfo,
5409 out_type, irhs2, stmt_info);
5412 itype = TREE_TYPE (irhs1);
5413 pattern_stmt
5414 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5415 rhs_code, irhs1, irhs2);
5416 break;
5418 default:
5419 do_compare:
5420 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
5421 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
5422 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
5423 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
5424 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
5426 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
5427 itype
5428 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
5430 else
5431 itype = TREE_TYPE (rhs1);
5432 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
5433 if (trueval == NULL_TREE)
5434 trueval = build_int_cst (itype, 1);
5435 else
5436 gcc_checking_assert (useless_type_conversion_p (itype,
5437 TREE_TYPE (trueval)));
5438 pattern_stmt
5439 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5440 COND_EXPR, cond_expr, trueval,
5441 build_int_cst (itype, 0));
5442 break;
5445 gimple_set_location (pattern_stmt, loc);
5446 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
5447 get_vectype_for_scalar_type (vinfo, itype));
5448 defs.put (var, gimple_assign_lhs (pattern_stmt));
5451 /* Comparison function to qsort a vector of gimple stmts after UID. */
5453 static int
5454 sort_after_uid (const void *p1, const void *p2)
5456 const gimple *stmt1 = *(const gimple * const *)p1;
5457 const gimple *stmt2 = *(const gimple * const *)p2;
5458 return gimple_uid (stmt1) - gimple_uid (stmt2);
5461 /* Create pattern stmts for all stmts participating in the bool pattern
5462 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
5463 OUT_TYPE. Return the def of the pattern root. */
5465 static tree
5466 adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
5467 tree out_type, stmt_vec_info stmt_info)
5469 /* Gather original stmts in the bool pattern in their order of appearance
5470 in the IL. */
5471 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
5472 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
5473 i != bool_stmt_set.end (); ++i)
5474 bool_stmts.quick_push (*i);
5475 bool_stmts.qsort (sort_after_uid);
5477 /* Now process them in that order, producing pattern stmts. */
5478 hash_map <tree, tree> defs;
5479 for (unsigned i = 0; i < bool_stmts.length (); ++i)
5480 adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
5481 out_type, stmt_info, defs);
5483 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
5484 gimple *pattern_stmt
5485 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5486 return gimple_assign_lhs (pattern_stmt);
5489 /* Return the proper type for converting bool VAR into
5490 an integer value or NULL_TREE if no such type exists.
5491 The type is chosen so that the converted value has the
5492 same number of elements as VAR's vector type. */
5494 static tree
5495 integer_type_for_mask (tree var, vec_info *vinfo)
5497 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
5498 return NULL_TREE;
5500 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
5501 if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
5502 return NULL_TREE;
5504 return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
5507 /* Function vect_recog_bool_pattern
5509 Try to find pattern like following:
5511 bool a_b, b_b, c_b, d_b, e_b;
5512 TYPE f_T;
5513 loop:
5514 S1 a_b = x1 CMP1 y1;
5515 S2 b_b = x2 CMP2 y2;
5516 S3 c_b = a_b & b_b;
5517 S4 d_b = x3 CMP3 y3;
5518 S5 e_b = c_b | d_b;
5519 S6 f_T = (TYPE) e_b;
5521 where type 'TYPE' is an integral type. Or a similar pattern
5522 ending in
5524 S6 f_Y = e_b ? r_Y : s_Y;
5526 as results from if-conversion of a complex condition.
5528 Input:
5530 * STMT_VINFO: The stmt at the end from which the pattern
5531 search begins, i.e. cast of a bool to
5532 an integer type.
5534 Output:
5536 * TYPE_OUT: The type of the output of this pattern.
5538 * Return value: A new stmt that will be used to replace the pattern.
5540 Assuming size of TYPE is the same as size of all comparisons
5541 (otherwise some casts would be added where needed), the above
5542 sequence we create related pattern stmts:
5543 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5544 S3' c_T = x2 CMP2 y2 ? a_T : 0;
5545 S4' d_T = x3 CMP3 y3 ? 1 : 0;
5546 S5' e_T = c_T | d_T;
5547 S6' f_T = e_T;
5549 Instead of the above S3' we could emit:
5550 S2' b_T = x2 CMP2 y2 ? 1 : 0;
5551 S3' c_T = a_T | b_T;
5552 but the above is more efficient. */
5554 static gimple *
5555 vect_recog_bool_pattern (vec_info *vinfo,
5556 stmt_vec_info stmt_vinfo, tree *type_out)
5558 gimple *last_stmt = stmt_vinfo->stmt;
5559 enum tree_code rhs_code;
5560 tree var, lhs, rhs, vectype;
5561 gimple *pattern_stmt;
5563 if (!is_gimple_assign (last_stmt))
5564 return NULL;
5566 var = gimple_assign_rhs1 (last_stmt);
5567 lhs = gimple_assign_lhs (last_stmt);
5568 rhs_code = gimple_assign_rhs_code (last_stmt);
5570 if (rhs_code == VIEW_CONVERT_EXPR)
5571 var = TREE_OPERAND (var, 0);
5573 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
5574 return NULL;
5576 hash_set<gimple *> bool_stmts;
5578 if (CONVERT_EXPR_CODE_P (rhs_code)
5579 || rhs_code == VIEW_CONVERT_EXPR)
5581 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
5582 || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5583 return NULL;
5584 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5586 if (check_bool_pattern (var, vinfo, bool_stmts))
5588 rhs = adjust_bool_stmts (vinfo, bool_stmts,
5589 TREE_TYPE (lhs), stmt_vinfo);
5590 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5591 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
5592 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
5593 else
5594 pattern_stmt
5595 = gimple_build_assign (lhs, NOP_EXPR, rhs);
5597 else
5599 tree type = integer_type_for_mask (var, vinfo);
5600 tree cst0, cst1, tmp;
5602 if (!type)
5603 return NULL;
5605 /* We may directly use cond with narrowed type to avoid
5606 multiple cond exprs with following result packing and
5607 perform single cond with packed mask instead. In case
5608 of widening we better make cond first and then extract
5609 results. */
5610 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
5611 type = TREE_TYPE (lhs);
5613 cst0 = build_int_cst (type, 0);
5614 cst1 = build_int_cst (type, 1);
5615 tmp = vect_recog_temp_ssa_var (type, NULL);
5616 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
5618 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
5620 tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
5621 append_pattern_def_seq (vinfo, stmt_vinfo,
5622 pattern_stmt, new_vectype);
5624 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5625 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
5629 *type_out = vectype;
5630 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
5632 return pattern_stmt;
5634 else if (rhs_code == COND_EXPR
5635 && TREE_CODE (var) == SSA_NAME)
5637 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5638 if (vectype == NULL_TREE)
5639 return NULL;
5641 /* Build a scalar type for the boolean result that when
5642 vectorized matches the vector type of the result in
5643 size and number of elements. */
5644 unsigned prec
5645 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
5646 TYPE_VECTOR_SUBPARTS (vectype));
5648 tree type
5649 = build_nonstandard_integer_type (prec,
5650 TYPE_UNSIGNED (TREE_TYPE (var)));
5651 if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
5652 return NULL;
5654 if (check_bool_pattern (var, vinfo, bool_stmts))
5655 var = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
5656 else if (integer_type_for_mask (var, vinfo))
5657 return NULL;
5659 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5660 pattern_stmt
5661 = gimple_build_assign (lhs, COND_EXPR,
5662 build2 (NE_EXPR, boolean_type_node,
5663 var, build_int_cst (TREE_TYPE (var), 0)),
5664 gimple_assign_rhs2 (last_stmt),
5665 gimple_assign_rhs3 (last_stmt));
5666 *type_out = vectype;
5667 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
5669 return pattern_stmt;
5671 else if (rhs_code == SSA_NAME
5672 && STMT_VINFO_DATA_REF (stmt_vinfo))
5674 stmt_vec_info pattern_stmt_info;
5675 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5676 if (!vectype || !VECTOR_MODE_P (TYPE_MODE (vectype)))
5677 return NULL;
5679 if (check_bool_pattern (var, vinfo, bool_stmts))
5680 rhs = adjust_bool_stmts (vinfo, bool_stmts,
5681 TREE_TYPE (vectype), stmt_vinfo);
5682 else
5684 tree type = integer_type_for_mask (var, vinfo);
5685 tree cst0, cst1, new_vectype;
5687 if (!type)
5688 return NULL;
5690 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
5691 type = TREE_TYPE (vectype);
5693 cst0 = build_int_cst (type, 0);
5694 cst1 = build_int_cst (type, 1);
5695 new_vectype = get_vectype_for_scalar_type (vinfo, type);
5697 rhs = vect_recog_temp_ssa_var (type, NULL);
5698 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
5699 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
5702 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
5703 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
5705 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5706 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
5707 append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
5708 rhs = rhs2;
5710 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
5711 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
5712 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
5713 *type_out = vectype;
5714 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
5716 return pattern_stmt;
5718 else
5719 return NULL;
5723 /* A helper for vect_recog_mask_conversion_pattern. Build
5724 conversion of MASK to a type suitable for masking VECTYPE.
5725 Built statement gets required vectype and is appended to
5726 a pattern sequence of STMT_VINFO.
5728 Return converted mask. */
5730 static tree
5731 build_mask_conversion (vec_info *vinfo,
5732 tree mask, tree vectype, stmt_vec_info stmt_vinfo)
5734 gimple *stmt;
5735 tree masktype, tmp;
5737 masktype = truth_type_for (vectype);
5738 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
5739 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
5740 append_pattern_def_seq (vinfo, stmt_vinfo,
5741 stmt, masktype, TREE_TYPE (vectype));
5743 return tmp;
5747 /* Function vect_recog_mask_conversion_pattern
5749 Try to find statements which require boolean type
5750 converison. Additional conversion statements are
5751 added to handle such cases. For example:
5753 bool m_1, m_2, m_3;
5754 int i_4, i_5;
5755 double d_6, d_7;
5756 char c_1, c_2, c_3;
5758 S1 m_1 = i_4 > i_5;
5759 S2 m_2 = d_6 < d_7;
5760 S3 m_3 = m_1 & m_2;
5761 S4 c_1 = m_3 ? c_2 : c_3;
5763 Will be transformed into:
5765 S1 m_1 = i_4 > i_5;
5766 S2 m_2 = d_6 < d_7;
5767 S3'' m_2' = (_Bool[bitsize=32])m_2
5768 S3' m_3' = m_1 & m_2';
5769 S4'' m_3'' = (_Bool[bitsize=8])m_3'
5770 S4' c_1' = m_3'' ? c_2 : c_3; */
5772 static gimple *
5773 vect_recog_mask_conversion_pattern (vec_info *vinfo,
5774 stmt_vec_info stmt_vinfo, tree *type_out)
5776 gimple *last_stmt = stmt_vinfo->stmt;
5777 enum tree_code rhs_code;
5778 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
5779 tree vectype1, vectype2;
5780 stmt_vec_info pattern_stmt_info;
5781 tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
5782 tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
5784 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
5785 if (is_gimple_call (last_stmt)
5786 && gimple_call_internal_p (last_stmt))
5788 gcall *pattern_stmt;
5790 internal_fn ifn = gimple_call_internal_fn (last_stmt);
5791 int mask_argno = internal_fn_mask_index (ifn);
5792 if (mask_argno < 0)
5793 return NULL;
5795 bool store_p = internal_store_fn_p (ifn);
5796 if (store_p)
5798 int rhs_index = internal_fn_stored_value_index (ifn);
5799 tree rhs = gimple_call_arg (last_stmt, rhs_index);
5800 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
5802 else
5804 lhs = gimple_call_lhs (last_stmt);
5805 if (!lhs)
5806 return NULL;
5807 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5810 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
5811 tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
5812 if (!mask_arg_type)
5813 return NULL;
5814 vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
5816 if (!vectype1 || !vectype2
5817 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
5818 TYPE_VECTOR_SUBPARTS (vectype2)))
5819 return NULL;
5821 tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
5823 auto_vec<tree, 8> args;
5824 unsigned int nargs = gimple_call_num_args (last_stmt);
5825 args.safe_grow (nargs, true);
5826 for (unsigned int i = 0; i < nargs; ++i)
5827 args[i] = ((int) i == mask_argno
5828 ? tmp
5829 : gimple_call_arg (last_stmt, i));
5830 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
5832 if (!store_p)
5834 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5835 gimple_call_set_lhs (pattern_stmt, lhs);
5837 gimple_call_set_nothrow (pattern_stmt, true);
5839 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
5840 if (STMT_VINFO_DATA_REF (stmt_vinfo))
5841 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
5843 *type_out = vectype1;
5844 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
5846 return pattern_stmt;
5849 if (!is_gimple_assign (last_stmt))
5850 return NULL;
5852 gimple *pattern_stmt;
5853 lhs = gimple_assign_lhs (last_stmt);
5854 rhs1 = gimple_assign_rhs1 (last_stmt);
5855 rhs_code = gimple_assign_rhs_code (last_stmt);
5857 /* Check for cond expression requiring mask conversion. */
5858 if (rhs_code == COND_EXPR)
5860 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5862 if (TREE_CODE (rhs1) == SSA_NAME)
5864 rhs1_type = integer_type_for_mask (rhs1, vinfo);
5865 if (!rhs1_type)
5866 return NULL;
5868 else if (COMPARISON_CLASS_P (rhs1))
5870 /* Check whether we're comparing scalar booleans and (if so)
5871 whether a better mask type exists than the mask associated
5872 with boolean-sized elements. This avoids unnecessary packs
5873 and unpacks if the booleans are set from comparisons of
5874 wider types. E.g. in:
5876 int x1, x2, x3, x4, y1, y1;
5878 bool b1 = (x1 == x2);
5879 bool b2 = (x3 == x4);
5880 ... = b1 == b2 ? y1 : y2;
5882 it is better for b1 and b2 to use the mask type associated
5883 with int elements rather bool (byte) elements. */
5884 rhs1_op0 = TREE_OPERAND (rhs1, 0);
5885 rhs1_op1 = TREE_OPERAND (rhs1, 1);
5886 if (!rhs1_op0 || !rhs1_op1)
5887 return NULL;
5888 rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
5889 rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
5891 if (!rhs1_op0_type)
5892 rhs1_type = TREE_TYPE (rhs1_op0);
5893 else if (!rhs1_op1_type)
5894 rhs1_type = TREE_TYPE (rhs1_op1);
5895 else if (TYPE_PRECISION (rhs1_op0_type)
5896 != TYPE_PRECISION (rhs1_op1_type))
5898 int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
5899 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
5900 int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
5901 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
5902 if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
5904 if (abs (tmp0) > abs (tmp1))
5905 rhs1_type = rhs1_op1_type;
5906 else
5907 rhs1_type = rhs1_op0_type;
5909 else
5910 rhs1_type = build_nonstandard_integer_type
5911 (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
5913 else
5914 rhs1_type = rhs1_op0_type;
5916 else
5917 return NULL;
5919 vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
5921 if (!vectype1 || !vectype2)
5922 return NULL;
5924 /* Continue if a conversion is needed. Also continue if we have
5925 a comparison whose vector type would normally be different from
5926 VECTYPE2 when considered in isolation. In that case we'll
5927 replace the comparison with an SSA name (so that we can record
5928 its vector type) and behave as though the comparison was an SSA
5929 name from the outset. */
5930 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
5931 TYPE_VECTOR_SUBPARTS (vectype2))
5932 && !rhs1_op0_type
5933 && !rhs1_op1_type)
5934 return NULL;
5936 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
5937 in place, we can handle it in vectorizable_condition. This avoids
5938 unnecessary promotion stmts and increased vectorization factor. */
5939 if (COMPARISON_CLASS_P (rhs1)
5940 && INTEGRAL_TYPE_P (rhs1_type)
5941 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
5942 TYPE_VECTOR_SUBPARTS (vectype2)))
5944 enum vect_def_type dt;
5945 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
5946 && dt == vect_external_def
5947 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
5948 && (dt == vect_external_def
5949 || dt == vect_constant_def))
5951 tree wide_scalar_type = build_nonstandard_integer_type
5952 (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
5953 tree vectype3 = get_vectype_for_scalar_type (vinfo,
5954 wide_scalar_type);
5955 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
5956 return NULL;
5960 /* If rhs1 is a comparison we need to move it into a
5961 separate statement. */
5962 if (TREE_CODE (rhs1) != SSA_NAME)
5964 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
5965 if (rhs1_op0_type
5966 && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
5967 rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
5968 vectype2, stmt_vinfo);
5969 if (rhs1_op1_type
5970 && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
5971 rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
5972 vectype2, stmt_vinfo);
5973 pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
5974 rhs1_op0, rhs1_op1);
5975 rhs1 = tmp;
5976 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
5977 rhs1_type);
5980 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
5981 TYPE_VECTOR_SUBPARTS (vectype2)))
5982 tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
5983 else
5984 tmp = rhs1;
5986 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5987 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
5988 gimple_assign_rhs2 (last_stmt),
5989 gimple_assign_rhs3 (last_stmt));
5991 *type_out = vectype1;
5992 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
5994 return pattern_stmt;
5997 /* Now check for binary boolean operations requiring conversion for
5998 one of operands. */
5999 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
6000 return NULL;
6002 if (rhs_code != BIT_IOR_EXPR
6003 && rhs_code != BIT_XOR_EXPR
6004 && rhs_code != BIT_AND_EXPR
6005 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
6006 return NULL;
6008 rhs2 = gimple_assign_rhs2 (last_stmt);
6010 rhs1_type = integer_type_for_mask (rhs1, vinfo);
6011 rhs2_type = integer_type_for_mask (rhs2, vinfo);
6013 if (!rhs1_type || !rhs2_type
6014 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
6015 return NULL;
6017 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
6019 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
6020 if (!vectype1)
6021 return NULL;
6022 rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
6024 else
6026 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
6027 if (!vectype1)
6028 return NULL;
6029 rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
6032 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6033 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
6035 *type_out = vectype1;
6036 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
6038 return pattern_stmt;
6041 /* STMT_INFO is a load or store. If the load or store is conditional, return
6042 the boolean condition under which it occurs, otherwise return null. */
6044 static tree
6045 vect_get_load_store_mask (stmt_vec_info stmt_info)
6047 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
6049 gcc_assert (gimple_assign_single_p (def_assign));
6050 return NULL_TREE;
6053 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
6055 internal_fn ifn = gimple_call_internal_fn (def_call);
6056 int mask_index = internal_fn_mask_index (ifn);
6057 return gimple_call_arg (def_call, mask_index);
6060 gcc_unreachable ();
6063 /* Return MASK if MASK is suitable for masking an operation on vectors
6064 of type VECTYPE, otherwise convert it into such a form and return
6065 the result. Associate any conversion statements with STMT_INFO's
6066 pattern. */
6068 static tree
6069 vect_convert_mask_for_vectype (tree mask, tree vectype,
6070 stmt_vec_info stmt_info, vec_info *vinfo)
6072 tree mask_type = integer_type_for_mask (mask, vinfo);
6073 if (mask_type)
6075 tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
6076 if (mask_vectype
6077 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
6078 TYPE_VECTOR_SUBPARTS (mask_vectype)))
6079 mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
6081 return mask;
6084 /* Return the equivalent of:
6086 fold_convert (TYPE, VALUE)
6088 with the expectation that the operation will be vectorized.
6089 If new statements are needed, add them as pattern statements
6090 to STMT_INFO. */
6092 static tree
6093 vect_add_conversion_to_pattern (vec_info *vinfo,
6094 tree type, tree value, stmt_vec_info stmt_info)
6096 if (useless_type_conversion_p (type, TREE_TYPE (value)))
6097 return value;
6099 tree new_value = vect_recog_temp_ssa_var (type, NULL);
6100 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
6101 append_pattern_def_seq (vinfo, stmt_info, conversion,
6102 get_vectype_for_scalar_type (vinfo, type));
6103 return new_value;
6106 /* Try to convert STMT_INFO into a call to a gather load or scatter store
6107 internal function. Return the final statement on success and set
6108 *TYPE_OUT to the vector type being loaded or stored.
6110 This function only handles gathers and scatters that were recognized
6111 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
6113 static gimple *
6114 vect_recog_gather_scatter_pattern (vec_info *vinfo,
6115 stmt_vec_info stmt_info, tree *type_out)
6117 /* Currently we only support this for loop vectorization. */
6118 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6119 if (!loop_vinfo)
6120 return NULL;
6122 /* Make sure that we're looking at a gather load or scatter store. */
6123 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
6124 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6125 return NULL;
6127 /* Get the boolean that controls whether the load or store happens.
6128 This is null if the operation is unconditional. */
6129 tree mask = vect_get_load_store_mask (stmt_info);
6131 /* Make sure that the target supports an appropriate internal
6132 function for the gather/scatter operation. */
6133 gather_scatter_info gs_info;
6134 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
6135 || gs_info.ifn == IFN_LAST)
6136 return NULL;
6138 /* Convert the mask to the right form. */
6139 tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
6140 gs_info.element_type);
6141 if (mask)
6142 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
6143 loop_vinfo);
6144 else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
6145 || gs_info.ifn == IFN_MASK_GATHER_LOAD
6146 || gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE
6147 || gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
6148 mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
6150 /* Get the invariant base and non-invariant offset, converting the
6151 latter to the same width as the vector elements. */
6152 tree base = gs_info.base;
6153 tree offset_type = TREE_TYPE (gs_info.offset_vectype);
6154 tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
6155 gs_info.offset, stmt_info);
6157 /* Build the new pattern statement. */
6158 tree scale = size_int (gs_info.scale);
6159 gcall *pattern_stmt;
6160 if (DR_IS_READ (dr))
6162 tree zero = build_zero_cst (gs_info.element_type);
6163 if (mask != NULL)
6164 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
6165 offset, scale, zero, mask);
6166 else
6167 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
6168 offset, scale, zero);
6169 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
6170 gimple_call_set_lhs (pattern_stmt, load_lhs);
6172 else
6174 tree rhs = vect_get_store_rhs (stmt_info);
6175 if (mask != NULL)
6176 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
6177 base, offset, scale, rhs,
6178 mask);
6179 else
6180 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
6181 base, offset, scale, rhs);
6183 gimple_call_set_nothrow (pattern_stmt, true);
6185 /* Copy across relevant vectorization info and associate DR with the
6186 new pattern statement instead of the original statement. */
6187 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
6188 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
6190 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6191 *type_out = vectype;
6192 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
6194 return pattern_stmt;
6197 /* Return true if TYPE is a non-boolean integer type. These are the types
6198 that we want to consider for narrowing. */
6200 static bool
6201 vect_narrowable_type_p (tree type)
6203 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
6206 /* Return true if the operation given by CODE can be truncated to N bits
6207 when only N bits of the output are needed. This is only true if bit N+1
6208 of the inputs has no effect on the low N bits of the result. */
6210 static bool
6211 vect_truncatable_operation_p (tree_code code)
6213 switch (code)
6215 case PLUS_EXPR:
6216 case MINUS_EXPR:
6217 case MULT_EXPR:
6218 case BIT_AND_EXPR:
6219 case BIT_IOR_EXPR:
6220 case BIT_XOR_EXPR:
6221 case COND_EXPR:
6222 return true;
6224 default:
6225 return false;
6229 /* Record that STMT_INFO could be changed from operating on TYPE to
6230 operating on a type with the precision and sign given by PRECISION
6231 and SIGN respectively. PRECISION is an arbitrary bit precision;
6232 it might not be a whole number of bytes. */
6234 static void
6235 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
6236 unsigned int precision, signop sign)
6238 /* Round the precision up to a whole number of bytes. */
6239 precision = vect_element_precision (precision);
6240 if (precision < TYPE_PRECISION (type)
6241 && (!stmt_info->operation_precision
6242 || stmt_info->operation_precision > precision))
6244 stmt_info->operation_precision = precision;
6245 stmt_info->operation_sign = sign;
6249 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
6250 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
6251 is an arbitrary bit precision; it might not be a whole number of bytes. */
6253 static void
6254 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
6255 unsigned int min_input_precision)
6257 /* This operation in isolation only requires the inputs to have
6258 MIN_INPUT_PRECISION of precision, However, that doesn't mean
6259 that MIN_INPUT_PRECISION is a natural precision for the chain
6260 as a whole. E.g. consider something like:
6262 unsigned short *x, *y;
6263 *y = ((*x & 0xf0) >> 4) | (*y << 4);
6265 The right shift can be done on unsigned chars, and only requires the
6266 result of "*x & 0xf0" to be done on unsigned chars. But taking that
6267 approach would mean turning a natural chain of single-vector unsigned
6268 short operations into one that truncates "*x" and then extends
6269 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
6270 operation and one vector for each unsigned char operation.
6271 This would be a significant pessimization.
6273 Instead only propagate the maximum of this precision and the precision
6274 required by the users of the result. This means that we don't pessimize
6275 the case above but continue to optimize things like:
6277 unsigned char *y;
6278 unsigned short *x;
6279 *y = ((*x & 0xf0) >> 4) | (*y << 4);
6281 Here we would truncate two vectors of *x to a single vector of
6282 unsigned chars and use single-vector unsigned char operations for
6283 everything else, rather than doing two unsigned short copies of
6284 "(*x & 0xf0) >> 4" and then truncating the result. */
6285 min_input_precision = MAX (min_input_precision,
6286 stmt_info->min_output_precision);
6288 if (min_input_precision < TYPE_PRECISION (type)
6289 && (!stmt_info->min_input_precision
6290 || stmt_info->min_input_precision > min_input_precision))
6291 stmt_info->min_input_precision = min_input_precision;
6294 /* Subroutine of vect_determine_min_output_precision. Return true if
6295 we can calculate a reduced number of output bits for STMT_INFO,
6296 whose result is LHS. */
6298 static bool
6299 vect_determine_min_output_precision_1 (vec_info *vinfo,
6300 stmt_vec_info stmt_info, tree lhs)
6302 /* Take the maximum precision required by users of the result. */
6303 unsigned int precision = 0;
6304 imm_use_iterator iter;
6305 use_operand_p use;
6306 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
6308 gimple *use_stmt = USE_STMT (use);
6309 if (is_gimple_debug (use_stmt))
6310 continue;
6311 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
6312 if (!use_stmt_info || !use_stmt_info->min_input_precision)
6313 return false;
6314 /* The input precision recorded for COND_EXPRs applies only to the
6315 "then" and "else" values. */
6316 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
6317 if (assign
6318 && gimple_assign_rhs_code (assign) == COND_EXPR
6319 && use->use != gimple_assign_rhs2_ptr (assign)
6320 && use->use != gimple_assign_rhs3_ptr (assign))
6321 return false;
6322 precision = MAX (precision, use_stmt_info->min_input_precision);
6325 if (dump_enabled_p ())
6326 dump_printf_loc (MSG_NOTE, vect_location,
6327 "only the low %d bits of %T are significant\n",
6328 precision, lhs);
6329 stmt_info->min_output_precision = precision;
6330 return true;
6333 /* Calculate min_output_precision for STMT_INFO. */
6335 static void
6336 vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
6338 /* We're only interested in statements with a narrowable result. */
6339 tree lhs = gimple_get_lhs (stmt_info->stmt);
6340 if (!lhs
6341 || TREE_CODE (lhs) != SSA_NAME
6342 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
6343 return;
6345 if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
6346 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
6349 /* Use range information to decide whether STMT (described by STMT_INFO)
6350 could be done in a narrower type. This is effectively a forward
6351 propagation, since it uses context-independent information that applies
6352 to all users of an SSA name. */
6354 static void
6355 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
6357 tree lhs = gimple_assign_lhs (stmt);
6358 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
6359 return;
6361 tree type = TREE_TYPE (lhs);
6362 if (!vect_narrowable_type_p (type))
6363 return;
6365 /* First see whether we have any useful range information for the result. */
6366 unsigned int precision = TYPE_PRECISION (type);
6367 signop sign = TYPE_SIGN (type);
6368 wide_int min_value, max_value;
6369 if (!vect_get_range_info (lhs, &min_value, &max_value))
6370 return;
6372 tree_code code = gimple_assign_rhs_code (stmt);
6373 unsigned int nops = gimple_num_ops (stmt);
6375 if (!vect_truncatable_operation_p (code))
6376 /* Check that all relevant input operands are compatible, and update
6377 [MIN_VALUE, MAX_VALUE] to include their ranges. */
6378 for (unsigned int i = 1; i < nops; ++i)
6380 tree op = gimple_op (stmt, i);
6381 if (TREE_CODE (op) == INTEGER_CST)
6383 /* Don't require the integer to have RHS_TYPE (which it might
6384 not for things like shift amounts, etc.), but do require it
6385 to fit the type. */
6386 if (!int_fits_type_p (op, type))
6387 return;
6389 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
6390 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
6392 else if (TREE_CODE (op) == SSA_NAME)
6394 /* Ignore codes that don't take uniform arguments. */
6395 if (!types_compatible_p (TREE_TYPE (op), type))
6396 return;
6398 wide_int op_min_value, op_max_value;
6399 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
6400 return;
6402 min_value = wi::min (min_value, op_min_value, sign);
6403 max_value = wi::max (max_value, op_max_value, sign);
6405 else
6406 return;
6409 /* Try to switch signed types for unsigned types if we can.
6410 This is better for two reasons. First, unsigned ops tend
6411 to be cheaper than signed ops. Second, it means that we can
6412 handle things like:
6414 signed char c;
6415 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
6419 signed char c;
6420 unsigned short res_1 = (unsigned short) c & 0xff00;
6421 int res = (int) res_1;
6423 where the intermediate result res_1 has unsigned rather than
6424 signed type. */
6425 if (sign == SIGNED && !wi::neg_p (min_value))
6426 sign = UNSIGNED;
6428 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
6429 unsigned int precision1 = wi::min_precision (min_value, sign);
6430 unsigned int precision2 = wi::min_precision (max_value, sign);
6431 unsigned int value_precision = MAX (precision1, precision2);
6432 if (value_precision >= precision)
6433 return;
6435 if (dump_enabled_p ())
6436 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
6437 " without loss of precision: %G",
6438 sign == SIGNED ? "signed" : "unsigned",
6439 value_precision, (gimple *) stmt);
6441 vect_set_operation_type (stmt_info, type, value_precision, sign);
6442 vect_set_min_input_precision (stmt_info, type, value_precision);
6445 /* Use information about the users of STMT's result to decide whether
6446 STMT (described by STMT_INFO) could be done in a narrower type.
6447 This is effectively a backward propagation. */
6449 static void
6450 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
6452 tree_code code = gimple_assign_rhs_code (stmt);
6453 unsigned int opno = (code == COND_EXPR ? 2 : 1);
6454 tree type = TREE_TYPE (gimple_op (stmt, opno));
6455 if (!vect_narrowable_type_p (type))
6456 return;
6458 unsigned int precision = TYPE_PRECISION (type);
6459 unsigned int operation_precision, min_input_precision;
6460 switch (code)
6462 CASE_CONVERT:
6463 /* Only the bits that contribute to the output matter. Don't change
6464 the precision of the operation itself. */
6465 operation_precision = precision;
6466 min_input_precision = stmt_info->min_output_precision;
6467 break;
6469 case LSHIFT_EXPR:
6470 case RSHIFT_EXPR:
6472 tree shift = gimple_assign_rhs2 (stmt);
6473 if (TREE_CODE (shift) != INTEGER_CST
6474 || !wi::ltu_p (wi::to_widest (shift), precision))
6475 return;
6476 unsigned int const_shift = TREE_INT_CST_LOW (shift);
6477 if (code == LSHIFT_EXPR)
6479 /* Avoid creating an undefined shift.
6481 ??? We could instead use min_output_precision as-is and
6482 optimize out-of-range shifts to zero. However, only
6483 degenerate testcases shift away all their useful input data,
6484 and it isn't natural to drop input operations in the middle
6485 of vectorization. This sort of thing should really be
6486 handled before vectorization. */
6487 operation_precision = MAX (stmt_info->min_output_precision,
6488 const_shift + 1);
6489 /* We need CONST_SHIFT fewer bits of the input. */
6490 min_input_precision = (MAX (operation_precision, const_shift)
6491 - const_shift);
6493 else
6495 /* We need CONST_SHIFT extra bits to do the operation. */
6496 operation_precision = (stmt_info->min_output_precision
6497 + const_shift);
6498 min_input_precision = operation_precision;
6500 break;
6503 default:
6504 if (vect_truncatable_operation_p (code))
6506 /* Input bit N has no effect on output bits N-1 and lower. */
6507 operation_precision = stmt_info->min_output_precision;
6508 min_input_precision = operation_precision;
6509 break;
6511 return;
6514 if (operation_precision < precision)
6516 if (dump_enabled_p ())
6517 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
6518 " without affecting users: %G",
6519 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
6520 operation_precision, (gimple *) stmt);
6521 vect_set_operation_type (stmt_info, type, operation_precision,
6522 TYPE_SIGN (type));
6524 vect_set_min_input_precision (stmt_info, type, min_input_precision);
6527 /* Return true if the statement described by STMT_INFO sets a boolean
6528 SSA_NAME and if we know how to vectorize this kind of statement using
6529 vector mask types. */
6531 static bool
6532 possible_vector_mask_operation_p (stmt_vec_info stmt_info)
6534 tree lhs = gimple_get_lhs (stmt_info->stmt);
6535 if (!lhs
6536 || TREE_CODE (lhs) != SSA_NAME
6537 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
6538 return false;
6540 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
6542 tree_code rhs_code = gimple_assign_rhs_code (assign);
6543 switch (rhs_code)
6545 CASE_CONVERT:
6546 case SSA_NAME:
6547 case BIT_NOT_EXPR:
6548 case BIT_IOR_EXPR:
6549 case BIT_XOR_EXPR:
6550 case BIT_AND_EXPR:
6551 return true;
6553 default:
6554 return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
6557 else if (is_a <gphi *> (stmt_info->stmt))
6558 return true;
6559 return false;
6562 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
6563 a vector mask type instead of a normal vector type. Record the
6564 result in STMT_INFO->mask_precision. */
6566 static void
6567 vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
6569 if (!possible_vector_mask_operation_p (stmt_info))
6570 return;
6572 /* If at least one boolean input uses a vector mask type,
6573 pick the mask type with the narrowest elements.
6575 ??? This is the traditional behavior. It should always produce
6576 the smallest number of operations, but isn't necessarily the
6577 optimal choice. For example, if we have:
6579 a = b & c
6581 where:
6583 - the user of a wants it to have a mask type for 16-bit elements (M16)
6584 - b also uses M16
6585 - c uses a mask type for 8-bit elements (M8)
6587 then picking M8 gives:
6589 - 1 M16->M8 pack for b
6590 - 1 M8 AND for a
6591 - 2 M8->M16 unpacks for the user of a
6593 whereas picking M16 would have given:
6595 - 2 M8->M16 unpacks for c
6596 - 2 M16 ANDs for a
6598 The number of operations are equal, but M16 would have given
6599 a shorter dependency chain and allowed more ILP. */
6600 unsigned int precision = ~0U;
6601 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
6603 unsigned int nops = gimple_num_ops (assign);
6604 for (unsigned int i = 1; i < nops; ++i)
6606 tree rhs = gimple_op (assign, i);
6607 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
6608 continue;
6610 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
6611 if (!def_stmt_info)
6612 /* Don't let external or constant operands influence the choice.
6613 We can convert them to whichever vector type we pick. */
6614 continue;
6616 if (def_stmt_info->mask_precision)
6618 if (precision > def_stmt_info->mask_precision)
6619 precision = def_stmt_info->mask_precision;
6623 /* If the statement compares two values that shouldn't use vector masks,
6624 try comparing the values as normal scalars instead. */
6625 tree_code rhs_code = gimple_assign_rhs_code (assign);
6626 if (precision == ~0U
6627 && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
6629 tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
6630 scalar_mode mode;
6631 tree vectype, mask_type;
6632 if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
6633 && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
6634 && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
6635 && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
6636 precision = GET_MODE_BITSIZE (mode);
6639 else
6641 gphi *phi = as_a <gphi *> (stmt_info->stmt);
6642 for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
6644 tree rhs = gimple_phi_arg_def (phi, i);
6646 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
6647 if (!def_stmt_info)
6648 /* Don't let external or constant operands influence the choice.
6649 We can convert them to whichever vector type we pick. */
6650 continue;
6652 if (def_stmt_info->mask_precision)
6654 if (precision > def_stmt_info->mask_precision)
6655 precision = def_stmt_info->mask_precision;
6660 if (dump_enabled_p ())
6662 if (precision == ~0U)
6663 dump_printf_loc (MSG_NOTE, vect_location,
6664 "using normal nonmask vectors for %G",
6665 stmt_info->stmt);
6666 else
6667 dump_printf_loc (MSG_NOTE, vect_location,
6668 "using boolean precision %d for %G",
6669 precision, stmt_info->stmt);
6672 stmt_info->mask_precision = precision;
6675 /* Handle vect_determine_precisions for STMT_INFO, given that we
6676 have already done so for the users of its result. */
6678 void
6679 vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
6681 vect_determine_min_output_precision (vinfo, stmt_info);
6682 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
6684 vect_determine_precisions_from_range (stmt_info, stmt);
6685 vect_determine_precisions_from_users (stmt_info, stmt);
6689 /* Walk backwards through the vectorizable region to determine the
6690 values of these fields:
6692 - min_output_precision
6693 - min_input_precision
6694 - operation_precision
6695 - operation_sign. */
6697 void
6698 vect_determine_precisions (vec_info *vinfo)
6700 DUMP_VECT_SCOPE ("vect_determine_precisions");
6702 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
6704 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6705 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
6706 unsigned int nbbs = loop->num_nodes;
6708 for (unsigned int i = 0; i < nbbs; i++)
6710 basic_block bb = bbs[i];
6711 for (auto gsi = gsi_start_phis (bb);
6712 !gsi_end_p (gsi); gsi_next (&gsi))
6714 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6715 if (stmt_info)
6716 vect_determine_mask_precision (vinfo, stmt_info);
6718 for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
6719 if (!is_gimple_debug (gsi_stmt (si)))
6720 vect_determine_mask_precision
6721 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
6723 for (unsigned int i = 0; i < nbbs; i++)
6725 basic_block bb = bbs[nbbs - i - 1];
6726 for (gimple_stmt_iterator si = gsi_last_bb (bb);
6727 !gsi_end_p (si); gsi_prev (&si))
6728 if (!is_gimple_debug (gsi_stmt (si)))
6729 vect_determine_stmt_precisions
6730 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
6731 for (auto gsi = gsi_start_phis (bb);
6732 !gsi_end_p (gsi); gsi_next (&gsi))
6734 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6735 if (stmt_info)
6736 vect_determine_stmt_precisions (vinfo, stmt_info);
6740 else
6742 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
6743 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
6745 basic_block bb = bb_vinfo->bbs[i];
6746 for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6748 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6749 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6750 vect_determine_mask_precision (vinfo, stmt_info);
6752 for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6754 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
6755 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6756 vect_determine_mask_precision (vinfo, stmt_info);
6759 for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
6761 for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]);
6762 !gsi_end_p (gsi); gsi_prev (&gsi))
6764 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
6765 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6766 vect_determine_stmt_precisions (vinfo, stmt_info);
6768 for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]);
6769 !gsi_end_p (gsi); gsi_next (&gsi))
6771 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6772 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6773 vect_determine_stmt_precisions (vinfo, stmt_info);
6779 typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
6781 struct vect_recog_func
6783 vect_recog_func_ptr fn;
6784 const char *name;
6787 /* Note that ordering matters - the first pattern matching on a stmt is
6788 taken which means usually the more complex one needs to preceed the
6789 less comples onex (widen_sum only after dot_prod or sad for example). */
6790 static vect_recog_func vect_vect_recog_func_ptrs[] = {
6791 { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
6792 { vect_recog_bit_insert_pattern, "bit_insert" },
6793 { vect_recog_abd_pattern, "abd" },
6794 { vect_recog_over_widening_pattern, "over_widening" },
6795 /* Must come after over_widening, which narrows the shift as much as
6796 possible beforehand. */
6797 { vect_recog_average_pattern, "average" },
6798 { vect_recog_cond_expr_convert_pattern, "cond_expr_convert" },
6799 { vect_recog_mulhs_pattern, "mult_high" },
6800 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
6801 { vect_recog_widen_mult_pattern, "widen_mult" },
6802 { vect_recog_dot_prod_pattern, "dot_prod" },
6803 { vect_recog_sad_pattern, "sad" },
6804 { vect_recog_widen_sum_pattern, "widen_sum" },
6805 { vect_recog_pow_pattern, "pow" },
6806 { vect_recog_popcount_clz_ctz_ffs_pattern, "popcount_clz_ctz_ffs" },
6807 { vect_recog_ctz_ffs_pattern, "ctz_ffs" },
6808 { vect_recog_widen_shift_pattern, "widen_shift" },
6809 { vect_recog_rotate_pattern, "rotate" },
6810 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
6811 { vect_recog_divmod_pattern, "divmod" },
6812 { vect_recog_mult_pattern, "mult" },
6813 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
6814 { vect_recog_bool_pattern, "bool" },
6815 /* This must come before mask conversion, and includes the parts
6816 of mask conversion that are needed for gather and scatter
6817 internal functions. */
6818 { vect_recog_gather_scatter_pattern, "gather_scatter" },
6819 { vect_recog_mask_conversion_pattern, "mask_conversion" },
6820 { vect_recog_widen_plus_pattern, "widen_plus" },
6821 { vect_recog_widen_minus_pattern, "widen_minus" },
6822 { vect_recog_widen_abd_pattern, "widen_abd" },
6823 /* These must come after the double widening ones. */
6826 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
6828 /* Mark statements that are involved in a pattern. */
6830 void
6831 vect_mark_pattern_stmts (vec_info *vinfo,
6832 stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
6833 tree pattern_vectype)
6835 stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
6836 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
6838 gimple *orig_pattern_stmt = NULL;
6839 if (is_pattern_stmt_p (orig_stmt_info))
6841 /* We're replacing a statement in an existing pattern definition
6842 sequence. */
6843 orig_pattern_stmt = orig_stmt_info->stmt;
6844 if (dump_enabled_p ())
6845 dump_printf_loc (MSG_NOTE, vect_location,
6846 "replacing earlier pattern %G", orig_pattern_stmt);
6848 /* To keep the book-keeping simple, just swap the lhs of the
6849 old and new statements, so that the old one has a valid but
6850 unused lhs. */
6851 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
6852 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
6853 gimple_set_lhs (pattern_stmt, old_lhs);
6855 if (dump_enabled_p ())
6856 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
6858 /* Switch to the statement that ORIG replaces. */
6859 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
6861 /* We shouldn't be replacing the main pattern statement. */
6862 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
6863 != orig_pattern_stmt);
6866 if (def_seq)
6867 for (gimple_stmt_iterator si = gsi_start (def_seq);
6868 !gsi_end_p (si); gsi_next (&si))
6870 if (dump_enabled_p ())
6871 dump_printf_loc (MSG_NOTE, vect_location,
6872 "extra pattern stmt: %G", gsi_stmt (si));
6873 stmt_vec_info pattern_stmt_info
6874 = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
6875 orig_stmt_info, pattern_vectype);
6876 /* Stmts in the def sequence are not vectorizable cycle or
6877 induction defs, instead they should all be vect_internal_def
6878 feeding the main pattern stmt which retains this def type. */
6879 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
6882 if (orig_pattern_stmt)
6884 vect_init_pattern_stmt (vinfo, pattern_stmt,
6885 orig_stmt_info, pattern_vectype);
6887 /* Insert all the new pattern statements before the original one. */
6888 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
6889 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
6890 orig_def_seq);
6891 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
6892 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
6894 /* Remove the pattern statement that this new pattern replaces. */
6895 gsi_remove (&gsi, false);
6897 else
6898 vect_set_pattern_stmt (vinfo,
6899 pattern_stmt, orig_stmt_info, pattern_vectype);
6901 /* Transfer reduction path info to the pattern. */
6902 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
6904 gimple_match_op op;
6905 if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op))
6906 gcc_unreachable ();
6907 tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)];
6908 /* Search the pattern def sequence and the main pattern stmt. Note
6909 we may have inserted all into a containing pattern def sequence
6910 so the following is a bit awkward. */
6911 gimple_stmt_iterator si;
6912 gimple *s;
6913 if (def_seq)
6915 si = gsi_start (def_seq);
6916 s = gsi_stmt (si);
6917 gsi_next (&si);
6919 else
6921 si = gsi_none ();
6922 s = pattern_stmt;
6926 bool found = false;
6927 if (gimple_extract_op (s, &op))
6928 for (unsigned i = 0; i < op.num_ops; ++i)
6929 if (op.ops[i] == lookfor)
6931 STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
6932 lookfor = gimple_get_lhs (s);
6933 found = true;
6934 break;
6936 if (s == pattern_stmt)
6938 if (!found && dump_enabled_p ())
6939 dump_printf_loc (MSG_NOTE, vect_location,
6940 "failed to update reduction index.\n");
6941 break;
6943 if (gsi_end_p (si))
6944 s = pattern_stmt;
6945 else
6947 s = gsi_stmt (si);
6948 if (s == pattern_stmt)
6949 /* Found the end inside a bigger pattern def seq. */
6950 si = gsi_none ();
6951 else
6952 gsi_next (&si);
6954 } while (1);
6958 /* Function vect_pattern_recog_1
6960 Input:
6961 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
6962 computation pattern.
6963 STMT_INFO: A stmt from which the pattern search should start.
6965 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
6966 a sequence of statements that has the same functionality and can be
6967 used to replace STMT_INFO. It returns the last statement in the sequence
6968 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
6969 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
6970 statement, having first checked that the target supports the new operation
6971 in that type.
6973 This function also does some bookkeeping, as explained in the documentation
6974 for vect_recog_pattern. */
6976 static void
6977 vect_pattern_recog_1 (vec_info *vinfo,
6978 vect_recog_func *recog_func, stmt_vec_info stmt_info)
6980 gimple *pattern_stmt;
6981 loop_vec_info loop_vinfo;
6982 tree pattern_vectype;
6984 /* If this statement has already been replaced with pattern statements,
6985 leave the original statement alone, since the first match wins.
6986 Instead try to match against the definition statements that feed
6987 the main pattern statement. */
6988 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
6990 gimple_stmt_iterator gsi;
6991 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
6992 !gsi_end_p (gsi); gsi_next (&gsi))
6993 vect_pattern_recog_1 (vinfo, recog_func,
6994 vinfo->lookup_stmt (gsi_stmt (gsi)));
6995 return;
6998 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
6999 pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
7000 if (!pattern_stmt)
7002 /* Clear any half-formed pattern definition sequence. */
7003 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
7004 return;
7007 loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7009 /* Found a vectorizable pattern. */
7010 if (dump_enabled_p ())
7011 dump_printf_loc (MSG_NOTE, vect_location,
7012 "%s pattern recognized: %G",
7013 recog_func->name, pattern_stmt);
7015 /* Mark the stmts that are involved in the pattern. */
7016 vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
7018 /* Patterns cannot be vectorized using SLP, because they change the order of
7019 computation. */
7020 if (loop_vinfo)
7022 unsigned ix, ix2;
7023 stmt_vec_info *elem_ptr;
7024 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
7025 elem_ptr, *elem_ptr == stmt_info);
7030 /* Function vect_pattern_recog
7032 Input:
7033 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
7034 computation idioms.
7036 Output - for each computation idiom that is detected we create a new stmt
7037 that provides the same functionality and that can be vectorized. We
7038 also record some information in the struct_stmt_info of the relevant
7039 stmts, as explained below:
7041 At the entry to this function we have the following stmts, with the
7042 following initial value in the STMT_VINFO fields:
7044 stmt in_pattern_p related_stmt vec_stmt
7045 S1: a_i = .... - - -
7046 S2: a_2 = ..use(a_i).. - - -
7047 S3: a_1 = ..use(a_2).. - - -
7048 S4: a_0 = ..use(a_1).. - - -
7049 S5: ... = ..use(a_0).. - - -
7051 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
7052 represented by a single stmt. We then:
7053 - create a new stmt S6 equivalent to the pattern (the stmt is not
7054 inserted into the code)
7055 - fill in the STMT_VINFO fields as follows:
7057 in_pattern_p related_stmt vec_stmt
7058 S1: a_i = .... - - -
7059 S2: a_2 = ..use(a_i).. - - -
7060 S3: a_1 = ..use(a_2).. - - -
7061 S4: a_0 = ..use(a_1).. true S6 -
7062 '---> S6: a_new = .... - S4 -
7063 S5: ... = ..use(a_0).. - - -
7065 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
7066 to each other through the RELATED_STMT field).
7068 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
7069 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
7070 remain irrelevant unless used by stmts other than S4.
7072 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
7073 (because they are marked as irrelevant). It will vectorize S6, and record
7074 a pointer to the new vector stmt VS6 from S6 (as usual).
7075 S4 will be skipped, and S5 will be vectorized as usual:
7077 in_pattern_p related_stmt vec_stmt
7078 S1: a_i = .... - - -
7079 S2: a_2 = ..use(a_i).. - - -
7080 S3: a_1 = ..use(a_2).. - - -
7081 > VS6: va_new = .... - - -
7082 S4: a_0 = ..use(a_1).. true S6 VS6
7083 '---> S6: a_new = .... - S4 VS6
7084 > VS5: ... = ..vuse(va_new).. - - -
7085 S5: ... = ..use(a_0).. - - -
7087 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
7088 elsewhere), and we'll end up with:
7090 VS6: va_new = ....
7091 VS5: ... = ..vuse(va_new)..
7093 In case of more than one pattern statements, e.g., widen-mult with
7094 intermediate type:
7096 S1 a_t = ;
7097 S2 a_T = (TYPE) a_t;
7098 '--> S3: a_it = (interm_type) a_t;
7099 S4 prod_T = a_T * CONST;
7100 '--> S5: prod_T' = a_it w* CONST;
7102 there may be other users of a_T outside the pattern. In that case S2 will
7103 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
7104 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
7105 be recorded in S3. */
7107 void
7108 vect_pattern_recog (vec_info *vinfo)
7110 class loop *loop;
7111 basic_block *bbs;
7112 unsigned int nbbs;
7113 gimple_stmt_iterator si;
7114 unsigned int i, j;
7116 vect_determine_precisions (vinfo);
7118 DUMP_VECT_SCOPE ("vect_pattern_recog");
7120 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
7122 loop = LOOP_VINFO_LOOP (loop_vinfo);
7123 bbs = LOOP_VINFO_BBS (loop_vinfo);
7124 nbbs = loop->num_nodes;
7126 /* Scan through the loop stmts, applying the pattern recognition
7127 functions starting at each stmt visited: */
7128 for (i = 0; i < nbbs; i++)
7130 basic_block bb = bbs[i];
7131 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
7133 if (is_gimple_debug (gsi_stmt (si)))
7134 continue;
7135 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
7136 /* Scan over all generic vect_recog_xxx_pattern functions. */
7137 for (j = 0; j < NUM_PATTERNS; j++)
7138 vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j],
7139 stmt_info);
7143 else
7145 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
7146 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
7147 for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]);
7148 !gsi_end_p (gsi); gsi_next (&gsi))
7150 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (gsi));
7151 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
7152 continue;
7154 /* Scan over all generic vect_recog_xxx_pattern functions. */
7155 for (j = 0; j < NUM_PATTERNS; j++)
7156 vect_pattern_recog_1 (vinfo,
7157 &vect_vect_recog_func_ptrs[j], stmt_info);
7161 /* After this no more add_stmt calls are allowed. */
7162 vinfo->stmt_vec_info_ro = true;
7165 /* Build a GIMPLE_ASSIGN or GIMPLE_CALL with the tree_code,
7166 or internal_fn contained in ch, respectively. */
7167 gimple *
7168 vect_gimple_build (tree lhs, code_helper ch, tree op0, tree op1)
7170 gcc_assert (op0 != NULL_TREE);
7171 if (ch.is_tree_code ())
7172 return gimple_build_assign (lhs, (tree_code) ch, op0, op1);
7174 gcc_assert (ch.is_internal_fn ());
7175 gimple* stmt = gimple_build_call_internal (as_internal_fn ((combined_fn) ch),
7176 op1 == NULL_TREE ? 1 : 2,
7177 op0, op1);
7178 gimple_call_set_lhs (stmt, lhs);
7179 return stmt;