c++: trait patch tweak
[official-gcc.git] / gcc / tree-vect-patterns.cc
blob696b70b76a8286f988de1cc05717592108bf9b07
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2023 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "gimple-iterator.h"
29 #include "gimple-fold.h"
30 #include "ssa.h"
31 #include "expmed.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "tree-eh.h"
38 #include "gimplify.h"
39 #include "gimple-iterator.h"
40 #include "gimple-fold.h"
41 #include "gimplify-me.h"
42 #include "cfgloop.h"
43 #include "tree-vectorizer.h"
44 #include "dumpfile.h"
45 #include "builtins.h"
46 #include "internal-fn.h"
47 #include "case-cfn-macros.h"
48 #include "fold-const-call.h"
49 #include "attribs.h"
50 #include "cgraph.h"
51 #include "omp-simd-clone.h"
52 #include "predict.h"
53 #include "tree-vector-builder.h"
54 #include "vec-perm-indices.h"
55 #include "gimple-range.h"
58 /* TODO: Note the vectorizer still builds COND_EXPRs with GENERIC compares
59 in the first operand. Disentangling this is future work, the
60 IL is properly transfered to VEC_COND_EXPRs with separate compares. */
63 /* Return true if we have a useful VR_RANGE range for VAR, storing it
64 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
66 bool
67 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
69 value_range vr;
70 tree vr_min, vr_max;
71 get_range_query (cfun)->range_of_expr (vr, var);
72 if (vr.undefined_p ())
73 vr.set_varying (TREE_TYPE (var));
74 value_range_kind vr_type = get_legacy_range (vr, vr_min, vr_max);
75 *min_value = wi::to_wide (vr_min);
76 *max_value = wi::to_wide (vr_max);
77 wide_int nonzero = get_nonzero_bits (var);
78 signop sgn = TYPE_SIGN (TREE_TYPE (var));
79 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
80 nonzero, sgn) == VR_RANGE)
82 if (dump_enabled_p ())
84 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
85 dump_printf (MSG_NOTE, " has range [");
86 dump_hex (MSG_NOTE, *min_value);
87 dump_printf (MSG_NOTE, ", ");
88 dump_hex (MSG_NOTE, *max_value);
89 dump_printf (MSG_NOTE, "]\n");
91 return true;
93 else
95 if (dump_enabled_p ())
97 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
98 dump_printf (MSG_NOTE, " has no range info\n");
100 return false;
104 /* Report that we've found an instance of pattern PATTERN in
105 statement STMT. */
107 static void
108 vect_pattern_detected (const char *name, gimple *stmt)
110 if (dump_enabled_p ())
111 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
114 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
115 return the pattern statement's stmt_vec_info. Set its vector type to
116 VECTYPE if it doesn't have one already. */
118 static stmt_vec_info
119 vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
120 stmt_vec_info orig_stmt_info, tree vectype)
122 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
123 if (pattern_stmt_info == NULL)
124 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
125 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
127 pattern_stmt_info->pattern_stmt_p = true;
128 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
129 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
130 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
131 STMT_VINFO_TYPE (pattern_stmt_info) = STMT_VINFO_TYPE (orig_stmt_info);
132 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
134 gcc_assert (!vectype
135 || (VECTOR_BOOLEAN_TYPE_P (vectype)
136 == vect_use_mask_type_p (orig_stmt_info)));
137 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
138 pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
140 return pattern_stmt_info;
143 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
144 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
145 have one already. */
147 static void
148 vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
149 stmt_vec_info orig_stmt_info, tree vectype)
151 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
152 STMT_VINFO_RELATED_STMT (orig_stmt_info)
153 = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
156 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
157 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
158 be different from the vector type of the final pattern statement.
159 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
160 from which it was derived. */
162 static inline void
163 append_pattern_def_seq (vec_info *vinfo,
164 stmt_vec_info stmt_info, gimple *new_stmt,
165 tree vectype = NULL_TREE,
166 tree scalar_type_for_mask = NULL_TREE)
168 gcc_assert (!scalar_type_for_mask
169 == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
170 if (vectype)
172 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
173 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
174 if (scalar_type_for_mask)
175 new_stmt_info->mask_precision
176 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
178 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
179 new_stmt);
182 /* The caller wants to perform new operations on vect_external variable
183 VAR, so that the result of the operations would also be vect_external.
184 Return the edge on which the operations can be performed, if one exists.
185 Return null if the operations should instead be treated as part of
186 the pattern that needs them. */
188 static edge
189 vect_get_external_def_edge (vec_info *vinfo, tree var)
191 edge e = NULL;
192 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
194 e = loop_preheader_edge (loop_vinfo->loop);
195 if (!SSA_NAME_IS_DEFAULT_DEF (var))
197 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
198 if (bb == NULL
199 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
200 e = NULL;
203 return e;
206 /* Return true if the target supports a vector version of CODE,
207 where CODE is known to map to a direct optab with the given SUBTYPE.
208 ITYPE specifies the type of (some of) the scalar inputs and OTYPE
209 specifies the type of the scalar result.
211 If CODE allows the inputs and outputs to have different type
212 (such as for WIDEN_SUM_EXPR), it is the input mode rather
213 than the output mode that determines the appropriate target pattern.
214 Operand 0 of the target pattern then specifies the mode that the output
215 must have.
217 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
218 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
219 is nonnull. */
221 static bool
222 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
223 tree itype, tree *vecotype_out,
224 tree *vecitype_out = NULL,
225 enum optab_subtype subtype = optab_default)
227 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
228 if (!vecitype)
229 return false;
231 tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
232 if (!vecotype)
233 return false;
235 optab optab = optab_for_tree_code (code, vecitype, subtype);
236 if (!optab)
237 return false;
239 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
240 if (icode == CODE_FOR_nothing
241 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
242 return false;
244 *vecotype_out = vecotype;
245 if (vecitype_out)
246 *vecitype_out = vecitype;
247 return true;
250 /* Round bit precision PRECISION up to a full element. */
252 static unsigned int
253 vect_element_precision (unsigned int precision)
255 precision = 1 << ceil_log2 (precision);
256 return MAX (precision, BITS_PER_UNIT);
259 /* If OP is defined by a statement that's being considered for vectorization,
260 return information about that statement, otherwise return NULL. */
262 static stmt_vec_info
263 vect_get_internal_def (vec_info *vinfo, tree op)
265 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
266 if (def_stmt_info
267 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
268 return def_stmt_info;
269 return NULL;
272 /* Check whether NAME, an ssa-name used in STMT_VINFO,
273 is a result of a type promotion, such that:
274 DEF_STMT: NAME = NOP (name0)
275 If CHECK_SIGN is TRUE, check that either both types are signed or both are
276 unsigned. */
278 static bool
279 type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
280 tree *orig_type, gimple **def_stmt, bool *promotion)
282 tree type = TREE_TYPE (name);
283 tree oprnd0;
284 enum vect_def_type dt;
286 stmt_vec_info def_stmt_info;
287 if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
288 return false;
290 if (dt != vect_internal_def
291 && dt != vect_external_def && dt != vect_constant_def)
292 return false;
294 if (!*def_stmt)
295 return false;
297 if (!is_gimple_assign (*def_stmt))
298 return false;
300 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
301 return false;
303 oprnd0 = gimple_assign_rhs1 (*def_stmt);
305 *orig_type = TREE_TYPE (oprnd0);
306 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
307 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
308 return false;
310 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
311 *promotion = true;
312 else
313 *promotion = false;
315 if (!vect_is_simple_use (oprnd0, vinfo, &dt))
316 return false;
318 return true;
321 /* Holds information about an input operand after some sign changes
322 and type promotions have been peeled away. */
323 class vect_unpromoted_value {
324 public:
325 vect_unpromoted_value ();
327 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
329 /* The value obtained after peeling away zero or more casts. */
330 tree op;
332 /* The type of OP. */
333 tree type;
335 /* The definition type of OP. */
336 vect_def_type dt;
338 /* If OP is the result of peeling at least one cast, and if the cast
339 of OP itself is a vectorizable statement, CASTER identifies that
340 statement, otherwise it is null. */
341 stmt_vec_info caster;
344 inline vect_unpromoted_value::vect_unpromoted_value ()
345 : op (NULL_TREE),
346 type (NULL_TREE),
347 dt (vect_uninitialized_def),
348 caster (NULL)
352 /* Set the operand to OP_IN, its definition type to DT_IN, and the
353 statement that casts it to CASTER_IN. */
355 inline void
356 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
357 stmt_vec_info caster_in)
359 op = op_in;
360 type = TREE_TYPE (op);
361 dt = dt_in;
362 caster = caster_in;
365 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
366 to reach some vectorizable inner operand OP', continuing as long as it
367 is possible to convert OP' back to OP using a possible sign change
368 followed by a possible promotion P. Return this OP', or null if OP is
369 not a vectorizable SSA name. If there is a promotion P, describe its
370 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
371 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
372 have more than one user.
374 A successful return means that it is possible to go from OP' to OP
375 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
376 whereas the cast from UNPROM to OP might be a promotion, a sign
377 change, or a nop.
379 E.g. say we have:
381 signed short *ptr = ...;
382 signed short C = *ptr;
383 unsigned short B = (unsigned short) C; // sign change
384 signed int A = (signed int) B; // unsigned promotion
385 ...possible other uses of A...
386 unsigned int OP = (unsigned int) A; // sign change
388 In this case it's possible to go directly from C to OP using:
390 OP = (unsigned int) (unsigned short) C;
391 +------------+ +--------------+
392 promotion sign change
394 so OP' would be C. The input to the promotion is B, so UNPROM
395 would describe B. */
397 static tree
398 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
399 vect_unpromoted_value *unprom,
400 bool *single_use_p = NULL)
402 tree op_type = TREE_TYPE (op);
403 if (!INTEGRAL_TYPE_P (op_type))
404 return NULL_TREE;
406 tree res = NULL_TREE;
407 unsigned int orig_precision = TYPE_PRECISION (op_type);
408 unsigned int min_precision = orig_precision;
409 stmt_vec_info caster = NULL;
410 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
412 /* See whether OP is simple enough to vectorize. */
413 stmt_vec_info def_stmt_info;
414 gimple *def_stmt;
415 vect_def_type dt;
416 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
417 break;
419 /* If OP is the input of a demotion, skip over it to see whether
420 OP is itself the result of a promotion. If so, the combined
421 effect of the promotion and the demotion might fit the required
422 pattern, otherwise neither operation fits.
424 This copes with cases such as the result of an arithmetic
425 operation being truncated before being stored, and where that
426 arithmetic operation has been recognized as an over-widened one. */
427 if (TYPE_PRECISION (op_type) <= min_precision)
429 /* Use OP as the UNPROM described above if we haven't yet
430 found a promotion, or if using the new input preserves the
431 sign of the previous promotion. */
432 if (!res
433 || TYPE_PRECISION (unprom->type) == orig_precision
434 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
436 unprom->set_op (op, dt, caster);
437 min_precision = TYPE_PRECISION (op_type);
439 /* Stop if we've already seen a promotion and if this
440 conversion does more than change the sign. */
441 else if (TYPE_PRECISION (op_type)
442 != TYPE_PRECISION (unprom->type))
443 break;
445 /* The sequence now extends to OP. */
446 res = op;
449 /* See whether OP is defined by a cast. Record it as CASTER if
450 the cast is potentially vectorizable. */
451 if (!def_stmt)
452 break;
453 caster = def_stmt_info;
455 /* Ignore pattern statements, since we don't link uses for them. */
456 if (caster
457 && single_use_p
458 && !STMT_VINFO_RELATED_STMT (caster)
459 && !has_single_use (res))
460 *single_use_p = false;
462 gassign *assign = dyn_cast <gassign *> (def_stmt);
463 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
464 break;
466 /* Continue with the input to the cast. */
467 op = gimple_assign_rhs1 (def_stmt);
468 op_type = TREE_TYPE (op);
470 return res;
473 /* OP is an integer operand to an operation that returns TYPE, and we
474 want to treat the operation as a widening one. So far we can treat
475 it as widening from *COMMON_TYPE.
477 Return true if OP is suitable for such a widening operation,
478 either widening from *COMMON_TYPE or from some supertype of it.
479 Update *COMMON_TYPE to the supertype in the latter case.
481 SHIFT_P is true if OP is a shift amount. */
483 static bool
484 vect_joust_widened_integer (tree type, bool shift_p, tree op,
485 tree *common_type)
487 /* Calculate the minimum precision required by OP, without changing
488 the sign of either operand. */
489 unsigned int precision;
490 if (shift_p)
492 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
493 return false;
494 precision = TREE_INT_CST_LOW (op);
496 else
498 precision = wi::min_precision (wi::to_widest (op),
499 TYPE_SIGN (*common_type));
500 if (precision * 2 > TYPE_PRECISION (type))
501 return false;
504 /* If OP requires a wider type, switch to that type. The checks
505 above ensure that this is still narrower than the result. */
506 precision = vect_element_precision (precision);
507 if (TYPE_PRECISION (*common_type) < precision)
508 *common_type = build_nonstandard_integer_type
509 (precision, TYPE_UNSIGNED (*common_type));
510 return true;
513 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
514 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
516 static bool
517 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
519 if (types_compatible_p (*common_type, new_type))
520 return true;
522 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
523 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
524 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
525 return true;
527 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
528 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
529 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
531 *common_type = new_type;
532 return true;
535 /* We have mismatched signs, with the signed type being
536 no wider than the unsigned type. In this case we need
537 a wider signed type. */
538 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
539 TYPE_PRECISION (new_type));
540 precision *= 2;
542 if (precision * 2 > TYPE_PRECISION (type))
543 return false;
545 *common_type = build_nonstandard_integer_type (precision, false);
546 return true;
549 /* Check whether STMT_INFO can be viewed as a tree of integer operations
550 in which each node either performs CODE or WIDENED_CODE, and where
551 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
552 specifies the maximum number of leaf operands. SHIFT_P says whether
553 CODE and WIDENED_CODE are some sort of shift.
555 If STMT_INFO is such a tree, return the number of leaf operands
556 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
557 to a type that (a) is narrower than the result of STMT_INFO and
558 (b) can hold all leaf operand values.
560 If SUBTYPE then allow that the signs of the operands
561 may differ in signs but not in precision. SUBTYPE is updated to reflect
562 this.
564 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
565 exists. */
567 static unsigned int
568 vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
569 code_helper widened_code, bool shift_p,
570 unsigned int max_nops,
571 vect_unpromoted_value *unprom, tree *common_type,
572 enum optab_subtype *subtype = NULL)
574 /* Check for an integer operation with the right code. */
575 gimple* stmt = stmt_info->stmt;
576 if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
577 return 0;
579 code_helper rhs_code;
580 if (is_gimple_assign (stmt))
581 rhs_code = gimple_assign_rhs_code (stmt);
582 else if (is_gimple_call (stmt))
583 rhs_code = gimple_call_combined_fn (stmt);
584 else
585 return 0;
587 if (rhs_code != code
588 && rhs_code != widened_code)
589 return 0;
591 tree lhs = gimple_get_lhs (stmt);
592 tree type = TREE_TYPE (lhs);
593 if (!INTEGRAL_TYPE_P (type))
594 return 0;
596 /* Assume that both operands will be leaf operands. */
597 max_nops -= 2;
599 /* Check the operands. */
600 unsigned int next_op = 0;
601 for (unsigned int i = 0; i < 2; ++i)
603 vect_unpromoted_value *this_unprom = &unprom[next_op];
604 unsigned int nops = 1;
605 tree op = gimple_arg (stmt, i);
606 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
608 /* We already have a common type from earlier operands.
609 Update it to account for OP. */
610 this_unprom->set_op (op, vect_constant_def);
611 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
612 return 0;
614 else
616 /* Only allow shifts by constants. */
617 if (shift_p && i == 1)
618 return 0;
620 if (rhs_code != code)
622 /* If rhs_code is widened_code, don't look through further
623 possible promotions, there is a promotion already embedded
624 in the WIDEN_*_EXPR. */
625 if (TREE_CODE (op) != SSA_NAME
626 || !INTEGRAL_TYPE_P (TREE_TYPE (op)))
627 return 0;
629 stmt_vec_info def_stmt_info;
630 gimple *def_stmt;
631 vect_def_type dt;
632 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info,
633 &def_stmt))
634 return 0;
635 this_unprom->set_op (op, dt, NULL);
637 else if (!vect_look_through_possible_promotion (vinfo, op,
638 this_unprom))
639 return 0;
641 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
643 /* The operand isn't widened. If STMT_INFO has the code
644 for an unwidened operation, recursively check whether
645 this operand is a node of the tree. */
646 if (rhs_code != code
647 || max_nops == 0
648 || this_unprom->dt != vect_internal_def)
649 return 0;
651 /* Give back the leaf slot allocated above now that we're
652 not treating this as a leaf operand. */
653 max_nops += 1;
655 /* Recursively process the definition of the operand. */
656 stmt_vec_info def_stmt_info
657 = vinfo->lookup_def (this_unprom->op);
658 nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
659 widened_code, shift_p, max_nops,
660 this_unprom, common_type,
661 subtype);
662 if (nops == 0)
663 return 0;
665 max_nops -= nops;
667 else
669 /* Make sure that the operand is narrower than the result. */
670 if (TYPE_PRECISION (this_unprom->type) * 2
671 > TYPE_PRECISION (type))
672 return 0;
674 /* Update COMMON_TYPE for the new operand. */
675 if (i == 0)
676 *common_type = this_unprom->type;
677 else if (!vect_joust_widened_type (type, this_unprom->type,
678 common_type))
680 if (subtype)
682 /* See if we can sign extend the smaller type. */
683 if (TYPE_PRECISION (this_unprom->type)
684 > TYPE_PRECISION (*common_type))
685 *common_type = this_unprom->type;
686 *subtype = optab_vector_mixed_sign;
688 else
689 return 0;
693 next_op += nops;
695 return next_op;
698 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
699 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
701 static tree
702 vect_recog_temp_ssa_var (tree type, gimple *stmt = NULL)
704 return make_temp_ssa_name (type, stmt, "patt");
707 /* STMT2_INFO describes a type conversion that could be split into STMT1
708 followed by a version of STMT2_INFO that takes NEW_RHS as its first
709 input. Try to do this using pattern statements, returning true on
710 success. */
712 static bool
713 vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
714 gimple *stmt1, tree vectype)
716 if (is_pattern_stmt_p (stmt2_info))
718 /* STMT2_INFO is part of a pattern. Get the statement to which
719 the pattern is attached. */
720 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
721 vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
723 if (dump_enabled_p ())
724 dump_printf_loc (MSG_NOTE, vect_location,
725 "Splitting pattern statement: %G", stmt2_info->stmt);
727 /* Since STMT2_INFO is a pattern statement, we can change it
728 in-situ without worrying about changing the code for the
729 containing block. */
730 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
732 if (dump_enabled_p ())
734 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
735 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
736 stmt2_info->stmt);
739 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
740 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
741 /* STMT2_INFO is the actual pattern statement. Add STMT1
742 to the end of the definition sequence. */
743 gimple_seq_add_stmt_without_update (def_seq, stmt1);
744 else
746 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
747 before it. */
748 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
749 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
751 return true;
753 else
755 /* STMT2_INFO doesn't yet have a pattern. Try to create a
756 two-statement pattern now. */
757 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
758 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
759 tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
760 if (!lhs_vectype)
761 return false;
763 if (dump_enabled_p ())
764 dump_printf_loc (MSG_NOTE, vect_location,
765 "Splitting statement: %G", stmt2_info->stmt);
767 /* Add STMT1 as a singleton pattern definition sequence. */
768 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
769 vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
770 gimple_seq_add_stmt_without_update (def_seq, stmt1);
772 /* Build the second of the two pattern statements. */
773 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
774 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
775 vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
777 if (dump_enabled_p ())
779 dump_printf_loc (MSG_NOTE, vect_location,
780 "into pattern statements: %G", stmt1);
781 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
782 (gimple *) new_stmt2);
785 return true;
789 /* Look for the following pattern
790 X = x[i]
791 Y = y[i]
792 DIFF = X - Y
793 DAD = ABS_EXPR<DIFF>
795 ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
796 HALF_TYPE and UNPROM will be set should the statement be found to
797 be a widened operation.
798 DIFF_STMT will be set to the MINUS_EXPR
799 statement that precedes the ABS_STMT unless vect_widened_op_tree
800 succeeds.
802 static bool
803 vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
804 tree *half_type,
805 vect_unpromoted_value unprom[2],
806 gassign **diff_stmt)
808 if (!abs_stmt)
809 return false;
811 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
812 inside the loop (in case we are analyzing an outer-loop). */
813 enum tree_code code = gimple_assign_rhs_code (abs_stmt);
814 if (code != ABS_EXPR && code != ABSU_EXPR)
815 return false;
817 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
818 tree abs_type = TREE_TYPE (abs_oprnd);
819 if (!abs_oprnd)
820 return false;
821 if (!ANY_INTEGRAL_TYPE_P (abs_type)
822 || TYPE_OVERFLOW_WRAPS (abs_type)
823 || TYPE_UNSIGNED (abs_type))
824 return false;
826 /* Peel off conversions from the ABS input. This can involve sign
827 changes (e.g. from an unsigned subtraction to a signed ABS input)
828 or signed promotion, but it can't include unsigned promotion.
829 (Note that ABS of an unsigned promotion should have been folded
830 away before now anyway.) */
831 vect_unpromoted_value unprom_diff;
832 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
833 &unprom_diff);
834 if (!abs_oprnd)
835 return false;
836 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
837 && TYPE_UNSIGNED (unprom_diff.type))
838 return false;
840 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
841 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
842 if (!diff_stmt_vinfo)
843 return false;
845 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
846 inside the loop (in case we are analyzing an outer-loop). */
847 if (vect_widened_op_tree (vinfo, diff_stmt_vinfo,
848 MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
849 false, 2, unprom, half_type))
850 return true;
852 /* Failed to find a widen operation so we check for a regular MINUS_EXPR. */
853 gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
854 if (diff_stmt && diff
855 && gimple_assign_rhs_code (diff) == MINUS_EXPR
856 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd)))
858 *diff_stmt = diff;
859 *half_type = NULL_TREE;
860 return true;
863 return false;
866 /* Convert UNPROM to TYPE and return the result, adding new statements
867 to STMT_INFO's pattern definition statements if no better way is
868 available. VECTYPE is the vector form of TYPE.
870 If SUBTYPE then convert the type based on the subtype. */
872 static tree
873 vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
874 vect_unpromoted_value *unprom, tree vectype,
875 enum optab_subtype subtype = optab_default)
877 /* Update the type if the signs differ. */
878 if (subtype == optab_vector_mixed_sign)
880 gcc_assert (!TYPE_UNSIGNED (type));
881 if (TYPE_UNSIGNED (TREE_TYPE (unprom->op)))
883 type = unsigned_type_for (type);
884 vectype = unsigned_type_for (vectype);
888 /* Check for a no-op conversion. */
889 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
890 return unprom->op;
892 /* Allow the caller to create constant vect_unpromoted_values. */
893 if (TREE_CODE (unprom->op) == INTEGER_CST)
894 return wide_int_to_tree (type, wi::to_widest (unprom->op));
896 tree input = unprom->op;
897 if (unprom->caster)
899 tree lhs = gimple_get_lhs (unprom->caster->stmt);
900 tree lhs_type = TREE_TYPE (lhs);
902 /* If the result of the existing cast is the right width, use it
903 instead of the source of the cast. */
904 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
905 input = lhs;
906 /* If the precision we want is between the source and result
907 precisions of the existing cast, try splitting the cast into
908 two and tapping into a mid-way point. */
909 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
910 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
912 /* In order to preserve the semantics of the original cast,
913 give the mid-way point the same signedness as the input value.
915 It would be possible to use a signed type here instead if
916 TYPE is signed and UNPROM->TYPE is unsigned, but that would
917 make the sign of the midtype sensitive to the order in
918 which we process the statements, since the signedness of
919 TYPE is the signedness required by just one of possibly
920 many users. Also, unsigned promotions are usually as cheap
921 as or cheaper than signed ones, so it's better to keep an
922 unsigned promotion. */
923 tree midtype = build_nonstandard_integer_type
924 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
925 tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
926 if (vec_midtype)
928 input = vect_recog_temp_ssa_var (midtype, NULL);
929 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
930 unprom->op);
931 if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
932 vec_midtype))
933 append_pattern_def_seq (vinfo, stmt_info,
934 new_stmt, vec_midtype);
938 /* See if we can reuse an existing result. */
939 if (types_compatible_p (type, TREE_TYPE (input)))
940 return input;
943 /* We need a new conversion statement. */
944 tree new_op = vect_recog_temp_ssa_var (type, NULL);
945 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
947 /* If OP is an external value, see if we can insert the new statement
948 on an incoming edge. */
949 if (input == unprom->op && unprom->dt == vect_external_def)
950 if (edge e = vect_get_external_def_edge (vinfo, input))
952 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
953 gcc_assert (!new_bb);
954 return new_op;
957 /* As a (common) last resort, add the statement to the pattern itself. */
958 append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
959 return new_op;
962 /* Invoke vect_convert_input for N elements of UNPROM and store the
963 result in the corresponding elements of RESULT.
965 If SUBTYPE then convert the type based on the subtype. */
967 static void
968 vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
969 tree *result, tree type, vect_unpromoted_value *unprom,
970 tree vectype, enum optab_subtype subtype = optab_default)
972 for (unsigned int i = 0; i < n; ++i)
974 unsigned int j;
975 for (j = 0; j < i; ++j)
976 if (unprom[j].op == unprom[i].op)
977 break;
979 if (j < i)
980 result[i] = result[j];
981 else
982 result[i] = vect_convert_input (vinfo, stmt_info,
983 type, &unprom[i], vectype, subtype);
987 /* The caller has created a (possibly empty) sequence of pattern definition
988 statements followed by a single statement PATTERN_STMT. Cast the result
989 of this final statement to TYPE. If a new statement is needed, add
990 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
991 and return the new statement, otherwise return PATTERN_STMT as-is.
992 VECITYPE is the vector form of PATTERN_STMT's result type. */
994 static gimple *
995 vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
996 gimple *pattern_stmt, tree vecitype)
998 tree lhs = gimple_get_lhs (pattern_stmt);
999 if (!types_compatible_p (type, TREE_TYPE (lhs)))
1001 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
1002 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
1003 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
1005 return pattern_stmt;
1008 /* Return true if STMT_VINFO describes a reduction for which reassociation
1009 is allowed. If STMT_INFO is part of a group, assume that it's part of
1010 a reduction chain and optimistically assume that all statements
1011 except the last allow reassociation.
1012 Also require it to have code CODE and to be a reduction
1013 in the outermost loop. When returning true, store the operands in
1014 *OP0_OUT and *OP1_OUT. */
1016 static bool
1017 vect_reassociating_reduction_p (vec_info *vinfo,
1018 stmt_vec_info stmt_info, tree_code code,
1019 tree *op0_out, tree *op1_out)
1021 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
1022 if (!loop_info)
1023 return false;
1025 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
1026 if (!assign || gimple_assign_rhs_code (assign) != code)
1027 return false;
1029 /* We don't allow changing the order of the computation in the inner-loop
1030 when doing outer-loop vectorization. */
1031 class loop *loop = LOOP_VINFO_LOOP (loop_info);
1032 if (loop && nested_in_vect_loop_p (loop, stmt_info))
1033 return false;
1035 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
1037 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
1038 code))
1039 return false;
1041 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
1042 return false;
1044 *op0_out = gimple_assign_rhs1 (assign);
1045 *op1_out = gimple_assign_rhs2 (assign);
1046 if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
1047 std::swap (*op0_out, *op1_out);
1048 return true;
1051 /* match.pd function to match
1052 (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
1053 with conditions:
1054 1) @1, @2, c, d, a, b are all integral type.
1055 2) There's single_use for both @1 and @2.
1056 3) a, c have same precision.
1057 4) c and @1 have different precision.
1058 5) c, d are the same type or they can differ in sign when convert is
1059 truncation.
1061 record a and c and d and @3. */
1063 extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree));
1065 /* Function vect_recog_cond_expr_convert
1067 Try to find the following pattern:
1069 TYPE_AB A,B;
1070 TYPE_CD C,D;
1071 TYPE_E E;
1072 TYPE_E op_true = (TYPE_E) A;
1073 TYPE_E op_false = (TYPE_E) B;
1075 E = C cmp D ? op_true : op_false;
1077 where
1078 TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
1079 TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
1080 single_use of op_true and op_false.
1081 TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
1083 Input:
1085 * STMT_VINFO: The stmt from which the pattern search begins.
1086 here it starts with E = c cmp D ? op_true : op_false;
1088 Output:
1090 TYPE1 E' = C cmp D ? A : B;
1091 TYPE3 E = (TYPE3) E';
1093 There may extra nop_convert for A or B to handle different signness.
1095 * TYPE_OUT: The vector type of the output of this pattern.
1097 * Return value: A new stmt that will be used to replace the sequence of
1098 stmts that constitute the pattern. In this case it will be:
1099 E = (TYPE3)E';
1100 E' = C cmp D ? A : B; is recorded in pattern definition statements; */
1102 static gimple *
1103 vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
1104 stmt_vec_info stmt_vinfo, tree *type_out)
1106 gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
1107 tree lhs, match[4], temp, type, new_lhs, op2;
1108 gimple *cond_stmt;
1109 gimple *pattern_stmt;
1111 if (!last_stmt)
1112 return NULL;
1114 lhs = gimple_assign_lhs (last_stmt);
1116 /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
1117 TYPE_PRECISION (A) == TYPE_PRECISION (C). */
1118 if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
1119 return NULL;
1121 vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
1123 op2 = match[2];
1124 type = TREE_TYPE (match[1]);
1125 if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
1127 op2 = vect_recog_temp_ssa_var (type, NULL);
1128 gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
1129 append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt,
1130 get_vectype_for_scalar_type (vinfo, type));
1133 temp = vect_recog_temp_ssa_var (type, NULL);
1134 cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
1135 match[1], op2));
1136 append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt,
1137 get_vectype_for_scalar_type (vinfo, type));
1138 new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
1139 pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp);
1140 *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
1142 if (dump_enabled_p ())
1143 dump_printf_loc (MSG_NOTE, vect_location,
1144 "created pattern stmt: %G", pattern_stmt);
1145 return pattern_stmt;
1148 /* Function vect_recog_dot_prod_pattern
1150 Try to find the following pattern:
1152 type1a x_t
1153 type1b y_t;
1154 TYPE1 prod;
1155 TYPE2 sum = init;
1156 loop:
1157 sum_0 = phi <init, sum_1>
1158 S1 x_t = ...
1159 S2 y_t = ...
1160 S3 x_T = (TYPE1) x_t;
1161 S4 y_T = (TYPE1) y_t;
1162 S5 prod = x_T * y_T;
1163 [S6 prod = (TYPE2) prod; #optional]
1164 S7 sum_1 = prod + sum_0;
1166 where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
1167 the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
1168 'type1a' and 'type1b' can differ.
1170 Input:
1172 * STMT_VINFO: The stmt from which the pattern search begins. In the
1173 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
1174 will be detected.
1176 Output:
1178 * TYPE_OUT: The type of the output of this pattern.
1180 * Return value: A new stmt that will be used to replace the sequence of
1181 stmts that constitute the pattern. In this case it will be:
1182 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
1184 Note: The dot-prod idiom is a widening reduction pattern that is
1185 vectorized without preserving all the intermediate results. It
1186 produces only N/2 (widened) results (by summing up pairs of
1187 intermediate results) rather than all N results. Therefore, we
1188 cannot allow this pattern when we want to get all the results and in
1189 the correct order (as is the case when this computation is in an
1190 inner-loop nested in an outer-loop that us being vectorized). */
1192 static gimple *
1193 vect_recog_dot_prod_pattern (vec_info *vinfo,
1194 stmt_vec_info stmt_vinfo, tree *type_out)
1196 tree oprnd0, oprnd1;
1197 gimple *last_stmt = stmt_vinfo->stmt;
1198 tree type, half_type;
1199 gimple *pattern_stmt;
1200 tree var;
1202 /* Look for the following pattern
1203 DX = (TYPE1) X;
1204 DY = (TYPE1) Y;
1205 DPROD = DX * DY;
1206 DDPROD = (TYPE2) DPROD;
1207 sum_1 = DDPROD + sum_0;
1208 In which
1209 - DX is double the size of X
1210 - DY is double the size of Y
1211 - DX, DY, DPROD all have the same type but the sign
1212 between X, Y and DPROD can differ.
1213 - sum is the same size of DPROD or bigger
1214 - sum has been recognized as a reduction variable.
1216 This is equivalent to:
1217 DPROD = X w* Y; #widen mult
1218 sum_1 = DPROD w+ sum_0; #widen summation
1220 DPROD = X w* Y; #widen mult
1221 sum_1 = DPROD + sum_0; #summation
1224 /* Starting from LAST_STMT, follow the defs of its uses in search
1225 of the above pattern. */
1227 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1228 &oprnd0, &oprnd1))
1229 return NULL;
1231 type = TREE_TYPE (gimple_get_lhs (last_stmt));
1233 vect_unpromoted_value unprom_mult;
1234 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
1236 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1237 we know that oprnd1 is the reduction variable (defined by a loop-header
1238 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1239 Left to check that oprnd0 is defined by a (widen_)mult_expr */
1240 if (!oprnd0)
1241 return NULL;
1243 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
1244 if (!mult_vinfo)
1245 return NULL;
1247 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1248 inside the loop (in case we are analyzing an outer-loop). */
1249 vect_unpromoted_value unprom0[2];
1250 enum optab_subtype subtype = optab_vector;
1251 if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
1252 false, 2, unprom0, &half_type, &subtype))
1253 return NULL;
1255 /* If there are two widening operations, make sure they agree on the sign
1256 of the extension. The result of an optab_vector_mixed_sign operation
1257 is signed; otherwise, the result has the same sign as the operands. */
1258 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
1259 && (subtype == optab_vector_mixed_sign
1260 ? TYPE_UNSIGNED (unprom_mult.type)
1261 : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
1262 return NULL;
1264 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
1266 /* If the inputs have mixed signs, canonicalize on using the signed
1267 input type for analysis. This also helps when emulating mixed-sign
1268 operations using signed operations. */
1269 if (subtype == optab_vector_mixed_sign)
1270 half_type = signed_type_for (half_type);
1272 tree half_vectype;
1273 if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1274 type_out, &half_vectype, subtype))
1276 /* We can emulate a mixed-sign dot-product using a sequence of
1277 signed dot-products; see vect_emulate_mixed_dot_prod for details. */
1278 if (subtype != optab_vector_mixed_sign
1279 || !vect_supportable_direct_optab_p (vinfo, signed_type_for (type),
1280 DOT_PROD_EXPR, half_type,
1281 type_out, &half_vectype,
1282 optab_vector))
1283 return NULL;
1285 *type_out = signed_or_unsigned_type_for (TYPE_UNSIGNED (type),
1286 *type_out);
1289 /* Get the inputs in the appropriate types. */
1290 tree mult_oprnd[2];
1291 vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
1292 unprom0, half_vectype, subtype);
1294 var = vect_recog_temp_ssa_var (type, NULL);
1295 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1296 mult_oprnd[0], mult_oprnd[1], oprnd1);
1298 return pattern_stmt;
1302 /* Function vect_recog_sad_pattern
1304 Try to find the following Sum of Absolute Difference (SAD) pattern:
1306 type x_t, y_t;
1307 signed TYPE1 diff, abs_diff;
1308 TYPE2 sum = init;
1309 loop:
1310 sum_0 = phi <init, sum_1>
1311 S1 x_t = ...
1312 S2 y_t = ...
1313 S3 x_T = (TYPE1) x_t;
1314 S4 y_T = (TYPE1) y_t;
1315 S5 diff = x_T - y_T;
1316 S6 abs_diff = ABS_EXPR <diff>;
1317 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1318 S8 sum_1 = abs_diff + sum_0;
1320 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1321 same size of 'TYPE1' or bigger. This is a special case of a reduction
1322 computation.
1324 Input:
1326 * STMT_VINFO: The stmt from which the pattern search begins. In the
1327 example, when this function is called with S8, the pattern
1328 {S3,S4,S5,S6,S7,S8} will be detected.
1330 Output:
1332 * TYPE_OUT: The type of the output of this pattern.
1334 * Return value: A new stmt that will be used to replace the sequence of
1335 stmts that constitute the pattern. In this case it will be:
1336 SAD_EXPR <x_t, y_t, sum_0>
1339 static gimple *
1340 vect_recog_sad_pattern (vec_info *vinfo,
1341 stmt_vec_info stmt_vinfo, tree *type_out)
1343 gimple *last_stmt = stmt_vinfo->stmt;
1344 tree half_type;
1346 /* Look for the following pattern
1347 DX = (TYPE1) X;
1348 DY = (TYPE1) Y;
1349 DDIFF = DX - DY;
1350 DAD = ABS_EXPR <DDIFF>;
1351 DDPROD = (TYPE2) DPROD;
1352 sum_1 = DAD + sum_0;
1353 In which
1354 - DX is at least double the size of X
1355 - DY is at least double the size of Y
1356 - DX, DY, DDIFF, DAD all have the same type
1357 - sum is the same size of DAD or bigger
1358 - sum has been recognized as a reduction variable.
1360 This is equivalent to:
1361 DDIFF = X w- Y; #widen sub
1362 DAD = ABS_EXPR <DDIFF>;
1363 sum_1 = DAD w+ sum_0; #widen summation
1365 DDIFF = X w- Y; #widen sub
1366 DAD = ABS_EXPR <DDIFF>;
1367 sum_1 = DAD + sum_0; #summation
1370 /* Starting from LAST_STMT, follow the defs of its uses in search
1371 of the above pattern. */
1373 tree plus_oprnd0, plus_oprnd1;
1374 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1375 &plus_oprnd0, &plus_oprnd1))
1376 return NULL;
1378 tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
1380 /* Any non-truncating sequence of conversions is OK here, since
1381 with a successful match, the result of the ABS(U) is known to fit
1382 within the nonnegative range of the result type. (It cannot be the
1383 negative of the minimum signed value due to the range of the widening
1384 MINUS_EXPR.) */
1385 vect_unpromoted_value unprom_abs;
1386 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1387 &unprom_abs);
1389 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1390 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1391 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1392 Then check that plus_oprnd0 is defined by an abs_expr. */
1394 if (!plus_oprnd0)
1395 return NULL;
1397 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1398 if (!abs_stmt_vinfo)
1399 return NULL;
1401 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1402 inside the loop (in case we are analyzing an outer-loop). */
1403 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1404 vect_unpromoted_value unprom[2];
1406 if (!abs_stmt)
1408 gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
1409 if (!abd_stmt
1410 || !gimple_call_internal_p (abd_stmt)
1411 || gimple_call_num_args (abd_stmt) != 2)
1412 return NULL;
1414 tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
1415 tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
1417 if (gimple_call_internal_fn (abd_stmt) == IFN_ABD)
1419 if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0,
1420 &unprom[0])
1421 || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
1422 &unprom[1]))
1423 return NULL;
1425 else if (gimple_call_internal_fn (abd_stmt) == IFN_VEC_WIDEN_ABD)
1427 unprom[0].op = abd_oprnd0;
1428 unprom[0].type = TREE_TYPE (abd_oprnd0);
1429 unprom[1].op = abd_oprnd1;
1430 unprom[1].type = TREE_TYPE (abd_oprnd1);
1432 else
1433 return NULL;
1435 half_type = unprom[0].type;
1437 else if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
1438 unprom, NULL))
1439 return NULL;
1441 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1443 tree half_vectype;
1444 if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1445 type_out, &half_vectype))
1446 return NULL;
1448 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1449 tree sad_oprnd[2];
1450 vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
1451 unprom, half_vectype);
1453 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1454 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1455 sad_oprnd[1], plus_oprnd1);
1457 return pattern_stmt;
1460 /* Function vect_recog_abd_pattern
1462 Try to find the following ABsolute Difference (ABD) or
1463 widening ABD (WIDEN_ABD) pattern:
1465 TYPE1 x;
1466 TYPE2 y;
1467 TYPE3 x_cast = (TYPE3) x; // widening or no-op
1468 TYPE3 y_cast = (TYPE3) y; // widening or no-op
1469 TYPE3 diff = x_cast - y_cast;
1470 TYPE4 diff_cast = (TYPE4) diff; // widening or no-op
1471 TYPE5 abs = ABS(U)_EXPR <diff_cast>;
1473 WIDEN_ABD exists to optimize the case where TYPE4 is at least
1474 twice as wide as TYPE3.
1476 Input:
1478 * STMT_VINFO: The stmt from which the pattern search begins
1480 Output:
1482 * TYPE_OUT: The type of the output of this pattern
1484 * Return value: A new stmt that will be used to replace the sequence of
1485 stmts that constitute the pattern, principally:
1486 out = IFN_ABD (x, y)
1487 out = IFN_WIDEN_ABD (x, y)
1490 static gimple *
1491 vect_recog_abd_pattern (vec_info *vinfo,
1492 stmt_vec_info stmt_vinfo, tree *type_out)
1494 gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
1495 if (!last_stmt)
1496 return NULL;
1498 tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1500 vect_unpromoted_value unprom[2];
1501 gassign *diff_stmt;
1502 tree half_type;
1503 if (!vect_recog_absolute_difference (vinfo, last_stmt, &half_type,
1504 unprom, &diff_stmt))
1505 return NULL;
1507 tree abd_in_type, abd_out_type;
1509 if (half_type)
1511 abd_in_type = half_type;
1512 abd_out_type = abd_in_type;
1514 else
1516 unprom[0].op = gimple_assign_rhs1 (diff_stmt);
1517 unprom[1].op = gimple_assign_rhs2 (diff_stmt);
1518 abd_in_type = signed_type_for (out_type);
1519 abd_out_type = abd_in_type;
1522 tree vectype_in = get_vectype_for_scalar_type (vinfo, abd_in_type);
1523 if (!vectype_in)
1524 return NULL;
1526 internal_fn ifn = IFN_ABD;
1527 tree vectype_out = vectype_in;
1529 if (TYPE_PRECISION (out_type) >= TYPE_PRECISION (abd_in_type) * 2
1530 && stmt_vinfo->min_output_precision >= TYPE_PRECISION (abd_in_type) * 2)
1532 tree mid_type
1533 = build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
1534 TYPE_UNSIGNED (abd_in_type));
1535 tree mid_vectype = get_vectype_for_scalar_type (vinfo, mid_type);
1537 code_helper dummy_code;
1538 int dummy_int;
1539 auto_vec<tree> dummy_vec;
1540 if (mid_vectype
1541 && supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD,
1542 stmt_vinfo, mid_vectype,
1543 vectype_in,
1544 &dummy_code, &dummy_code,
1545 &dummy_int, &dummy_vec))
1547 ifn = IFN_VEC_WIDEN_ABD;
1548 abd_out_type = mid_type;
1549 vectype_out = mid_vectype;
1553 if (ifn == IFN_ABD
1554 && !direct_internal_fn_supported_p (ifn, vectype_in,
1555 OPTIMIZE_FOR_SPEED))
1556 return NULL;
1558 vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
1560 tree abd_oprnds[2];
1561 vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
1562 abd_in_type, unprom, vectype_in);
1564 *type_out = get_vectype_for_scalar_type (vinfo, out_type);
1566 tree abd_result = vect_recog_temp_ssa_var (abd_out_type, NULL);
1567 gcall *abd_stmt = gimple_build_call_internal (ifn, 2,
1568 abd_oprnds[0], abd_oprnds[1]);
1569 gimple_call_set_lhs (abd_stmt, abd_result);
1570 gimple_set_location (abd_stmt, gimple_location (last_stmt));
1572 gimple *stmt = abd_stmt;
1573 if (TYPE_PRECISION (abd_in_type) == TYPE_PRECISION (abd_out_type)
1574 && TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type)
1575 && !TYPE_UNSIGNED (abd_out_type))
1577 tree unsign = unsigned_type_for (abd_out_type);
1578 tree unsign_vectype = get_vectype_for_scalar_type (vinfo, unsign);
1579 stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt,
1580 unsign_vectype);
1583 return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype_out);
1586 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1587 so that it can be treated as though it had the form:
1589 A_TYPE a;
1590 B_TYPE b;
1591 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1592 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1593 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1594 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1595 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1597 Try to replace the pattern with:
1599 A_TYPE a;
1600 B_TYPE b;
1601 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1602 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1603 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1604 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1606 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1608 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1609 name of the pattern being matched, for dump purposes. */
1611 static gimple *
1612 vect_recog_widen_op_pattern (vec_info *vinfo,
1613 stmt_vec_info last_stmt_info, tree *type_out,
1614 tree_code orig_code, code_helper wide_code,
1615 bool shift_p, const char *name)
1617 gimple *last_stmt = last_stmt_info->stmt;
1619 vect_unpromoted_value unprom[2];
1620 tree half_type;
1621 if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
1622 shift_p, 2, unprom, &half_type))
1624 return NULL;
1626 /* Pattern detected. */
1627 vect_pattern_detected (name, last_stmt);
1629 tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
1630 tree itype = type;
1631 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1632 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1633 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1634 TYPE_UNSIGNED (half_type));
1636 /* Check target support */
1637 tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1638 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1639 tree ctype = itype;
1640 tree vecctype = vecitype;
1641 if (orig_code == MINUS_EXPR
1642 && TYPE_UNSIGNED (itype)
1643 && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
1645 /* Subtraction is special, even if half_type is unsigned and no matter
1646 whether type is signed or unsigned, if type is wider than itype,
1647 we need to sign-extend from the widening operation result to the
1648 result type.
1649 Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1650 itype unsigned short and type either int or unsigned int.
1651 Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1652 (unsigned short) 0xffff, but for type int we want the result -1
1653 and for type unsigned int 0xffffffff rather than 0xffff. */
1654 ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
1655 vecctype = get_vectype_for_scalar_type (vinfo, ctype);
1658 code_helper dummy_code;
1659 int dummy_int;
1660 auto_vec<tree> dummy_vec;
1661 if (!vectype
1662 || !vecitype
1663 || !vecctype
1664 || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1665 vecitype, vectype,
1666 &dummy_code, &dummy_code,
1667 &dummy_int, &dummy_vec))
1668 return NULL;
1670 *type_out = get_vectype_for_scalar_type (vinfo, type);
1671 if (!*type_out)
1672 return NULL;
1674 tree oprnd[2];
1675 vect_convert_inputs (vinfo, last_stmt_info,
1676 2, oprnd, half_type, unprom, vectype);
1678 tree var = vect_recog_temp_ssa_var (itype, NULL);
1679 gimple *pattern_stmt = vect_gimple_build (var, wide_code, oprnd[0], oprnd[1]);
1681 if (vecctype != vecitype)
1682 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
1683 pattern_stmt, vecitype);
1685 return vect_convert_output (vinfo, last_stmt_info,
1686 type, pattern_stmt, vecctype);
1689 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1690 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1692 static gimple *
1693 vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1694 tree *type_out)
1696 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1697 MULT_EXPR, WIDEN_MULT_EXPR, false,
1698 "vect_recog_widen_mult_pattern");
1701 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1702 to IFN_VEC_WIDEN_PLUS. See vect_recog_widen_op_pattern for details. */
1704 static gimple *
1705 vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1706 tree *type_out)
1708 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1709 PLUS_EXPR, IFN_VEC_WIDEN_PLUS,
1710 false, "vect_recog_widen_plus_pattern");
1713 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1714 to IFN_VEC_WIDEN_MINUS. See vect_recog_widen_op_pattern for details. */
1715 static gimple *
1716 vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1717 tree *type_out)
1719 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1720 MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
1721 false, "vect_recog_widen_minus_pattern");
1724 /* Try to detect abd on widened inputs, converting IFN_ABD
1725 to IFN_VEC_WIDEN_ABD. */
1726 static gimple *
1727 vect_recog_widen_abd_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1728 tree *type_out)
1730 gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
1731 if (!last_stmt || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (last_stmt)))
1732 return NULL;
1734 tree last_rhs = gimple_assign_rhs1 (last_stmt);
1736 tree in_type = TREE_TYPE (last_rhs);
1737 tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1738 if (!INTEGRAL_TYPE_P (in_type)
1739 || !INTEGRAL_TYPE_P (out_type)
1740 || TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type)
1741 || !TYPE_UNSIGNED (in_type))
1742 return NULL;
1744 vect_unpromoted_value unprom;
1745 tree op = vect_look_through_possible_promotion (vinfo, last_rhs, &unprom);
1746 if (!op || TYPE_PRECISION (TREE_TYPE (op)) != TYPE_PRECISION (in_type))
1747 return NULL;
1749 stmt_vec_info abd_pattern_vinfo = vect_get_internal_def (vinfo, op);
1750 if (!abd_pattern_vinfo)
1751 return NULL;
1753 abd_pattern_vinfo = vect_stmt_to_vectorize (abd_pattern_vinfo);
1754 gcall *abd_stmt = dyn_cast <gcall *> (STMT_VINFO_STMT (abd_pattern_vinfo));
1755 if (!abd_stmt
1756 || !gimple_call_internal_p (abd_stmt)
1757 || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
1758 return NULL;
1760 tree vectype_in = get_vectype_for_scalar_type (vinfo, in_type);
1761 tree vectype_out = get_vectype_for_scalar_type (vinfo, out_type);
1763 code_helper dummy_code;
1764 int dummy_int;
1765 auto_vec<tree> dummy_vec;
1766 if (!supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD, stmt_vinfo,
1767 vectype_out, vectype_in,
1768 &dummy_code, &dummy_code,
1769 &dummy_int, &dummy_vec))
1770 return NULL;
1772 vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt);
1774 *type_out = vectype_out;
1776 tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
1777 tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
1778 tree widen_abd_result = vect_recog_temp_ssa_var (out_type, NULL);
1779 gcall *widen_abd_stmt = gimple_build_call_internal (IFN_VEC_WIDEN_ABD, 2,
1780 abd_oprnd0, abd_oprnd1);
1781 gimple_call_set_lhs (widen_abd_stmt, widen_abd_result);
1782 gimple_set_location (widen_abd_stmt, gimple_location (last_stmt));
1783 return widen_abd_stmt;
1786 /* Function vect_recog_ctz_ffs_pattern
1788 Try to find the following pattern:
1790 TYPE1 A;
1791 TYPE1 B;
1793 B = __builtin_ctz{,l,ll} (A);
1797 B = __builtin_ffs{,l,ll} (A);
1799 Input:
1801 * STMT_VINFO: The stmt from which the pattern search begins.
1802 here it starts with B = __builtin_* (A);
1804 Output:
1806 * TYPE_OUT: The vector type of the output of this pattern.
1808 * Return value: A new stmt that will be used to replace the sequence of
1809 stmts that constitute the pattern, using clz or popcount builtins. */
1811 static gimple *
1812 vect_recog_ctz_ffs_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1813 tree *type_out)
1815 gimple *call_stmt = stmt_vinfo->stmt;
1816 gimple *pattern_stmt;
1817 tree rhs_oprnd, rhs_type, lhs_oprnd, lhs_type, vec_type, vec_rhs_type;
1818 tree new_var;
1819 internal_fn ifn = IFN_LAST, ifnnew = IFN_LAST;
1820 bool defined_at_zero = true, defined_at_zero_new = false;
1821 int val = 0, val_new = 0, val_cmp = 0;
1822 int prec;
1823 int sub = 0, add = 0;
1824 location_t loc;
1826 if (!is_gimple_call (call_stmt))
1827 return NULL;
1829 if (gimple_call_num_args (call_stmt) != 1
1830 && gimple_call_num_args (call_stmt) != 2)
1831 return NULL;
1833 rhs_oprnd = gimple_call_arg (call_stmt, 0);
1834 rhs_type = TREE_TYPE (rhs_oprnd);
1835 lhs_oprnd = gimple_call_lhs (call_stmt);
1836 if (!lhs_oprnd)
1837 return NULL;
1838 lhs_type = TREE_TYPE (lhs_oprnd);
1839 if (!INTEGRAL_TYPE_P (lhs_type)
1840 || !INTEGRAL_TYPE_P (rhs_type)
1841 || !type_has_mode_precision_p (rhs_type)
1842 || TREE_CODE (rhs_oprnd) != SSA_NAME)
1843 return NULL;
1845 switch (gimple_call_combined_fn (call_stmt))
1847 CASE_CFN_CTZ:
1848 ifn = IFN_CTZ;
1849 if (!gimple_call_internal_p (call_stmt)
1850 || gimple_call_num_args (call_stmt) != 2)
1851 defined_at_zero = false;
1852 else
1853 val = tree_to_shwi (gimple_call_arg (call_stmt, 1));
1854 break;
1855 CASE_CFN_FFS:
1856 ifn = IFN_FFS;
1857 break;
1858 default:
1859 return NULL;
1862 prec = TYPE_PRECISION (rhs_type);
1863 loc = gimple_location (call_stmt);
1865 vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
1866 if (!vec_type)
1867 return NULL;
1869 vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
1870 if (!vec_rhs_type)
1871 return NULL;
1873 /* Do it only if the backend doesn't have ctz<vector_mode>2 or
1874 ffs<vector_mode>2 pattern but does have clz<vector_mode>2 or
1875 popcount<vector_mode>2. */
1876 if (!vec_type
1877 || direct_internal_fn_supported_p (ifn, vec_rhs_type,
1878 OPTIMIZE_FOR_SPEED))
1879 return NULL;
1881 if (ifn == IFN_FFS
1882 && direct_internal_fn_supported_p (IFN_CTZ, vec_rhs_type,
1883 OPTIMIZE_FOR_SPEED))
1885 ifnnew = IFN_CTZ;
1886 defined_at_zero_new
1887 = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
1888 val_new) == 2;
1890 else if (direct_internal_fn_supported_p (IFN_CLZ, vec_rhs_type,
1891 OPTIMIZE_FOR_SPEED))
1893 ifnnew = IFN_CLZ;
1894 defined_at_zero_new
1895 = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
1896 val_new) == 2;
1898 if ((ifnnew == IFN_LAST
1899 || (defined_at_zero && !defined_at_zero_new))
1900 && direct_internal_fn_supported_p (IFN_POPCOUNT, vec_rhs_type,
1901 OPTIMIZE_FOR_SPEED))
1903 ifnnew = IFN_POPCOUNT;
1904 defined_at_zero_new = true;
1905 val_new = prec;
1907 if (ifnnew == IFN_LAST)
1908 return NULL;
1910 vect_pattern_detected ("vec_recog_ctz_ffs_pattern", call_stmt);
1912 val_cmp = val_new;
1913 if ((ifnnew == IFN_CLZ
1914 && defined_at_zero
1915 && defined_at_zero_new
1916 && val == prec
1917 && val_new == prec)
1918 || (ifnnew == IFN_POPCOUNT && ifn == IFN_CTZ))
1920 /* .CTZ (X) = PREC - .CLZ ((X - 1) & ~X)
1921 .CTZ (X) = .POPCOUNT ((X - 1) & ~X). */
1922 if (ifnnew == IFN_CLZ)
1923 sub = prec;
1924 val_cmp = prec;
1926 if (!TYPE_UNSIGNED (rhs_type))
1928 rhs_type = unsigned_type_for (rhs_type);
1929 vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
1930 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1931 pattern_stmt = gimple_build_assign (new_var, NOP_EXPR, rhs_oprnd);
1932 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
1933 vec_rhs_type);
1934 rhs_oprnd = new_var;
1937 tree m1 = vect_recog_temp_ssa_var (rhs_type, NULL);
1938 pattern_stmt = gimple_build_assign (m1, PLUS_EXPR, rhs_oprnd,
1939 build_int_cst (rhs_type, -1));
1940 gimple_set_location (pattern_stmt, loc);
1941 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1943 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1944 pattern_stmt = gimple_build_assign (new_var, BIT_NOT_EXPR, rhs_oprnd);
1945 gimple_set_location (pattern_stmt, loc);
1946 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1947 rhs_oprnd = new_var;
1949 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1950 pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
1951 m1, rhs_oprnd);
1952 gimple_set_location (pattern_stmt, loc);
1953 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1954 rhs_oprnd = new_var;
1956 else if (ifnnew == IFN_CLZ)
1958 /* .CTZ (X) = (PREC - 1) - .CLZ (X & -X)
1959 .FFS (X) = PREC - .CLZ (X & -X). */
1960 sub = prec - (ifn == IFN_CTZ);
1961 val_cmp = sub - val_new;
1963 tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
1964 pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
1965 gimple_set_location (pattern_stmt, loc);
1966 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1968 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1969 pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
1970 rhs_oprnd, neg);
1971 gimple_set_location (pattern_stmt, loc);
1972 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1973 rhs_oprnd = new_var;
1975 else if (ifnnew == IFN_POPCOUNT)
1977 /* .CTZ (X) = PREC - .POPCOUNT (X | -X)
1978 .FFS (X) = (PREC + 1) - .POPCOUNT (X | -X). */
1979 sub = prec + (ifn == IFN_FFS);
1980 val_cmp = sub;
1982 tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
1983 pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
1984 gimple_set_location (pattern_stmt, loc);
1985 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1987 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1988 pattern_stmt = gimple_build_assign (new_var, BIT_IOR_EXPR,
1989 rhs_oprnd, neg);
1990 gimple_set_location (pattern_stmt, loc);
1991 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1992 rhs_oprnd = new_var;
1994 else if (ifnnew == IFN_CTZ)
1996 /* .FFS (X) = .CTZ (X) + 1. */
1997 add = 1;
1998 val_cmp++;
2001 /* Create B = .IFNNEW (A). */
2002 new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2003 if ((ifnnew == IFN_CLZ || ifnnew == IFN_CTZ) && defined_at_zero_new)
2004 pattern_stmt
2005 = gimple_build_call_internal (ifnnew, 2, rhs_oprnd,
2006 build_int_cst (integer_type_node,
2007 val_new));
2008 else
2009 pattern_stmt = gimple_build_call_internal (ifnnew, 1, rhs_oprnd);
2010 gimple_call_set_lhs (pattern_stmt, new_var);
2011 gimple_set_location (pattern_stmt, loc);
2012 *type_out = vec_type;
2014 if (sub)
2016 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2017 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2018 pattern_stmt = gimple_build_assign (ret_var, MINUS_EXPR,
2019 build_int_cst (lhs_type, sub),
2020 new_var);
2021 gimple_set_location (pattern_stmt, loc);
2022 new_var = ret_var;
2024 else if (add)
2026 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2027 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2028 pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
2029 build_int_cst (lhs_type, add));
2030 gimple_set_location (pattern_stmt, loc);
2031 new_var = ret_var;
2034 if (defined_at_zero
2035 && (!defined_at_zero_new || val != val_cmp))
2037 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2038 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2039 rhs_oprnd = gimple_call_arg (call_stmt, 0);
2040 rhs_type = TREE_TYPE (rhs_oprnd);
2041 tree cmp = build2_loc (loc, NE_EXPR, boolean_type_node,
2042 rhs_oprnd, build_zero_cst (rhs_type));
2043 pattern_stmt = gimple_build_assign (ret_var, COND_EXPR, cmp,
2044 new_var,
2045 build_int_cst (lhs_type, val));
2048 if (dump_enabled_p ())
2049 dump_printf_loc (MSG_NOTE, vect_location,
2050 "created pattern stmt: %G", pattern_stmt);
2052 return pattern_stmt;
2055 /* Function vect_recog_popcount_clz_ctz_ffs_pattern
2057 Try to find the following pattern:
2059 UTYPE1 A;
2060 TYPE1 B;
2061 UTYPE2 temp_in;
2062 TYPE3 temp_out;
2063 temp_in = (UTYPE2)A;
2065 temp_out = __builtin_popcount{,l,ll} (temp_in);
2066 B = (TYPE1) temp_out;
2068 TYPE2 may or may not be equal to TYPE3.
2069 i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
2070 i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
2072 Input:
2074 * STMT_VINFO: The stmt from which the pattern search begins.
2075 here it starts with B = (TYPE1) temp_out;
2077 Output:
2079 * TYPE_OUT: The vector type of the output of this pattern.
2081 * Return value: A new stmt that will be used to replace the sequence of
2082 stmts that constitute the pattern. In this case it will be:
2083 B = .POPCOUNT (A);
2085 Similarly for clz, ctz and ffs.
2088 static gimple *
2089 vect_recog_popcount_clz_ctz_ffs_pattern (vec_info *vinfo,
2090 stmt_vec_info stmt_vinfo,
2091 tree *type_out)
2093 gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
2094 gimple *call_stmt, *pattern_stmt;
2095 tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
2096 internal_fn ifn = IFN_LAST;
2097 int addend = 0;
2099 /* Find B = (TYPE1) temp_out. */
2100 if (!last_stmt)
2101 return NULL;
2102 tree_code code = gimple_assign_rhs_code (last_stmt);
2103 if (!CONVERT_EXPR_CODE_P (code))
2104 return NULL;
2106 lhs_oprnd = gimple_assign_lhs (last_stmt);
2107 lhs_type = TREE_TYPE (lhs_oprnd);
2108 if (!INTEGRAL_TYPE_P (lhs_type))
2109 return NULL;
2111 rhs_oprnd = gimple_assign_rhs1 (last_stmt);
2112 if (TREE_CODE (rhs_oprnd) != SSA_NAME
2113 || !has_single_use (rhs_oprnd))
2114 return NULL;
2115 call_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
2117 /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
2118 if (!is_gimple_call (call_stmt))
2119 return NULL;
2120 switch (gimple_call_combined_fn (call_stmt))
2122 int val;
2123 CASE_CFN_POPCOUNT:
2124 ifn = IFN_POPCOUNT;
2125 break;
2126 CASE_CFN_CLZ:
2127 ifn = IFN_CLZ;
2128 /* Punt if call result is unsigned and defined value at zero
2129 is negative, as the negative value doesn't extend correctly. */
2130 if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
2131 && gimple_call_internal_p (call_stmt)
2132 && CLZ_DEFINED_VALUE_AT_ZERO
2133 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
2134 && val < 0)
2135 return NULL;
2136 break;
2137 CASE_CFN_CTZ:
2138 ifn = IFN_CTZ;
2139 /* Punt if call result is unsigned and defined value at zero
2140 is negative, as the negative value doesn't extend correctly. */
2141 if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
2142 && gimple_call_internal_p (call_stmt)
2143 && CTZ_DEFINED_VALUE_AT_ZERO
2144 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
2145 && val < 0)
2146 return NULL;
2147 break;
2148 CASE_CFN_FFS:
2149 ifn = IFN_FFS;
2150 break;
2151 default:
2152 return NULL;
2155 if (gimple_call_num_args (call_stmt) != 1
2156 && gimple_call_num_args (call_stmt) != 2)
2157 return NULL;
2159 rhs_oprnd = gimple_call_arg (call_stmt, 0);
2160 vect_unpromoted_value unprom_diff;
2161 rhs_origin
2162 = vect_look_through_possible_promotion (vinfo, rhs_oprnd, &unprom_diff);
2164 if (!rhs_origin)
2165 return NULL;
2167 /* Input and output of .POPCOUNT should be same-precision integer. */
2168 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type))
2169 return NULL;
2171 /* Also A should be unsigned or same precision as temp_in, otherwise
2172 different builtins/internal functions have different behaviors. */
2173 if (TYPE_PRECISION (unprom_diff.type)
2174 != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))
2175 switch (ifn)
2177 case IFN_POPCOUNT:
2178 /* For popcount require zero extension, which doesn't add any
2179 further bits to the count. */
2180 if (!TYPE_UNSIGNED (unprom_diff.type))
2181 return NULL;
2182 break;
2183 case IFN_CLZ:
2184 /* clzll (x) == clz (x) + 32 for unsigned x != 0, so ok
2185 if it is undefined at zero or if it matches also for the
2186 defined value there. */
2187 if (!TYPE_UNSIGNED (unprom_diff.type))
2188 return NULL;
2189 if (!type_has_mode_precision_p (lhs_type)
2190 || !type_has_mode_precision_p (TREE_TYPE (rhs_oprnd)))
2191 return NULL;
2192 addend = (TYPE_PRECISION (TREE_TYPE (rhs_oprnd))
2193 - TYPE_PRECISION (lhs_type));
2194 if (gimple_call_internal_p (call_stmt)
2195 && gimple_call_num_args (call_stmt) == 2)
2197 int val1, val2;
2198 val1 = tree_to_shwi (gimple_call_arg (call_stmt, 1));
2199 int d2
2200 = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2201 val2);
2202 if (d2 != 2 || val1 != val2 + addend)
2203 return NULL;
2205 break;
2206 case IFN_CTZ:
2207 /* ctzll (x) == ctz (x) for unsigned or signed x != 0, so ok
2208 if it is undefined at zero or if it matches also for the
2209 defined value there. */
2210 if (gimple_call_internal_p (call_stmt)
2211 && gimple_call_num_args (call_stmt) == 2)
2213 int val1, val2;
2214 val1 = tree_to_shwi (gimple_call_arg (call_stmt, 1));
2215 int d2
2216 = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2217 val2);
2218 if (d2 != 2 || val1 != val2)
2219 return NULL;
2221 break;
2222 case IFN_FFS:
2223 /* ffsll (x) == ffs (x) for unsigned or signed x. */
2224 break;
2225 default:
2226 gcc_unreachable ();
2229 vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
2230 /* Do it only if the backend has popcount<vector_mode>2 etc. pattern. */
2231 if (!vec_type)
2232 return NULL;
2234 bool supported
2235 = direct_internal_fn_supported_p (ifn, vec_type, OPTIMIZE_FOR_SPEED);
2236 if (!supported)
2237 switch (ifn)
2239 case IFN_POPCOUNT:
2240 case IFN_CLZ:
2241 return NULL;
2242 case IFN_FFS:
2243 /* vect_recog_ctz_ffs_pattern can implement ffs using ctz. */
2244 if (direct_internal_fn_supported_p (IFN_CTZ, vec_type,
2245 OPTIMIZE_FOR_SPEED))
2246 break;
2247 /* FALLTHRU */
2248 case IFN_CTZ:
2249 /* vect_recog_ctz_ffs_pattern can implement ffs or ctz using
2250 clz or popcount. */
2251 if (direct_internal_fn_supported_p (IFN_CLZ, vec_type,
2252 OPTIMIZE_FOR_SPEED))
2253 break;
2254 if (direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
2255 OPTIMIZE_FOR_SPEED))
2256 break;
2257 return NULL;
2258 default:
2259 gcc_unreachable ();
2262 vect_pattern_detected ("vec_recog_popcount_clz_ctz_ffs_pattern",
2263 call_stmt);
2265 /* Create B = .POPCOUNT (A). */
2266 new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2267 tree arg2 = NULL_TREE;
2268 int val;
2269 if (ifn == IFN_CLZ
2270 && CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2271 val) == 2)
2272 arg2 = build_int_cst (integer_type_node, val);
2273 else if (ifn == IFN_CTZ
2274 && CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2275 val) == 2)
2276 arg2 = build_int_cst (integer_type_node, val);
2277 if (arg2)
2278 pattern_stmt = gimple_build_call_internal (ifn, 2, unprom_diff.op, arg2);
2279 else
2280 pattern_stmt = gimple_build_call_internal (ifn, 1, unprom_diff.op);
2281 gimple_call_set_lhs (pattern_stmt, new_var);
2282 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2283 *type_out = vec_type;
2285 if (dump_enabled_p ())
2286 dump_printf_loc (MSG_NOTE, vect_location,
2287 "created pattern stmt: %G", pattern_stmt);
2289 if (addend)
2291 gcc_assert (supported);
2292 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2293 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2294 pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
2295 build_int_cst (lhs_type, addend));
2297 else if (!supported)
2299 stmt_vec_info new_stmt_info = vinfo->add_stmt (pattern_stmt);
2300 STMT_VINFO_VECTYPE (new_stmt_info) = vec_type;
2301 pattern_stmt
2302 = vect_recog_ctz_ffs_pattern (vinfo, new_stmt_info, type_out);
2303 if (pattern_stmt == NULL)
2304 return NULL;
2305 if (gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (new_stmt_info))
2307 gimple_seq *pseq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
2308 gimple_seq_add_seq_without_update (pseq, seq);
2311 return pattern_stmt;
2314 /* Function vect_recog_pow_pattern
2316 Try to find the following pattern:
2318 x = POW (y, N);
2320 with POW being one of pow, powf, powi, powif and N being
2321 either 2 or 0.5.
2323 Input:
2325 * STMT_VINFO: The stmt from which the pattern search begins.
2327 Output:
2329 * TYPE_OUT: The type of the output of this pattern.
2331 * Return value: A new stmt that will be used to replace the sequence of
2332 stmts that constitute the pattern. In this case it will be:
2333 x = x * x
2335 x = sqrt (x)
2338 static gimple *
2339 vect_recog_pow_pattern (vec_info *vinfo,
2340 stmt_vec_info stmt_vinfo, tree *type_out)
2342 gimple *last_stmt = stmt_vinfo->stmt;
2343 tree base, exp;
2344 gimple *stmt;
2345 tree var;
2347 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
2348 return NULL;
2350 switch (gimple_call_combined_fn (last_stmt))
2352 CASE_CFN_POW:
2353 CASE_CFN_POWI:
2354 break;
2356 default:
2357 return NULL;
2360 base = gimple_call_arg (last_stmt, 0);
2361 exp = gimple_call_arg (last_stmt, 1);
2362 if (TREE_CODE (exp) != REAL_CST
2363 && TREE_CODE (exp) != INTEGER_CST)
2365 if (flag_unsafe_math_optimizations
2366 && TREE_CODE (base) == REAL_CST
2367 && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
2369 combined_fn log_cfn;
2370 built_in_function exp_bfn;
2371 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
2373 case BUILT_IN_POW:
2374 log_cfn = CFN_BUILT_IN_LOG;
2375 exp_bfn = BUILT_IN_EXP;
2376 break;
2377 case BUILT_IN_POWF:
2378 log_cfn = CFN_BUILT_IN_LOGF;
2379 exp_bfn = BUILT_IN_EXPF;
2380 break;
2381 case BUILT_IN_POWL:
2382 log_cfn = CFN_BUILT_IN_LOGL;
2383 exp_bfn = BUILT_IN_EXPL;
2384 break;
2385 default:
2386 return NULL;
2388 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
2389 tree exp_decl = builtin_decl_implicit (exp_bfn);
2390 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
2391 does that, but if C is a power of 2, we want to use
2392 exp2 (log2 (C) * x) in the non-vectorized version, but for
2393 vectorization we don't have vectorized exp2. */
2394 if (logc
2395 && TREE_CODE (logc) == REAL_CST
2396 && exp_decl
2397 && lookup_attribute ("omp declare simd",
2398 DECL_ATTRIBUTES (exp_decl)))
2400 cgraph_node *node = cgraph_node::get_create (exp_decl);
2401 if (node->simd_clones == NULL)
2403 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
2404 || node->definition)
2405 return NULL;
2406 expand_simd_clones (node);
2407 if (node->simd_clones == NULL)
2408 return NULL;
2410 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
2411 if (!*type_out)
2412 return NULL;
2413 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2414 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
2415 append_pattern_def_seq (vinfo, stmt_vinfo, g);
2416 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2417 g = gimple_build_call (exp_decl, 1, def);
2418 gimple_call_set_lhs (g, res);
2419 return g;
2423 return NULL;
2426 /* We now have a pow or powi builtin function call with a constant
2427 exponent. */
2429 /* Catch squaring. */
2430 if ((tree_fits_shwi_p (exp)
2431 && tree_to_shwi (exp) == 2)
2432 || (TREE_CODE (exp) == REAL_CST
2433 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
2435 if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
2436 TREE_TYPE (base), type_out))
2437 return NULL;
2439 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2440 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
2441 return stmt;
2444 /* Catch square root. */
2445 if (TREE_CODE (exp) == REAL_CST
2446 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
2448 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
2449 if (*type_out
2450 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
2451 OPTIMIZE_FOR_SPEED))
2453 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
2454 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
2455 gimple_call_set_lhs (stmt, var);
2456 gimple_call_set_nothrow (stmt, true);
2457 return stmt;
2461 return NULL;
2465 /* Function vect_recog_widen_sum_pattern
2467 Try to find the following pattern:
2469 type x_t;
2470 TYPE x_T, sum = init;
2471 loop:
2472 sum_0 = phi <init, sum_1>
2473 S1 x_t = *p;
2474 S2 x_T = (TYPE) x_t;
2475 S3 sum_1 = x_T + sum_0;
2477 where type 'TYPE' is at least double the size of type 'type', i.e - we're
2478 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
2479 a special case of a reduction computation.
2481 Input:
2483 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
2484 when this function is called with S3, the pattern {S2,S3} will be detected.
2486 Output:
2488 * TYPE_OUT: The type of the output of this pattern.
2490 * Return value: A new stmt that will be used to replace the sequence of
2491 stmts that constitute the pattern. In this case it will be:
2492 WIDEN_SUM <x_t, sum_0>
2494 Note: The widening-sum idiom is a widening reduction pattern that is
2495 vectorized without preserving all the intermediate results. It
2496 produces only N/2 (widened) results (by summing up pairs of
2497 intermediate results) rather than all N results. Therefore, we
2498 cannot allow this pattern when we want to get all the results and in
2499 the correct order (as is the case when this computation is in an
2500 inner-loop nested in an outer-loop that us being vectorized). */
2502 static gimple *
2503 vect_recog_widen_sum_pattern (vec_info *vinfo,
2504 stmt_vec_info stmt_vinfo, tree *type_out)
2506 gimple *last_stmt = stmt_vinfo->stmt;
2507 tree oprnd0, oprnd1;
2508 tree type;
2509 gimple *pattern_stmt;
2510 tree var;
2512 /* Look for the following pattern
2513 DX = (TYPE) X;
2514 sum_1 = DX + sum_0;
2515 In which DX is at least double the size of X, and sum_1 has been
2516 recognized as a reduction variable.
2519 /* Starting from LAST_STMT, follow the defs of its uses in search
2520 of the above pattern. */
2522 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
2523 &oprnd0, &oprnd1)
2524 || TREE_CODE (oprnd0) != SSA_NAME
2525 || !vinfo->lookup_def (oprnd0))
2526 return NULL;
2528 type = TREE_TYPE (gimple_get_lhs (last_stmt));
2530 /* So far so good. Since last_stmt was detected as a (summation) reduction,
2531 we know that oprnd1 is the reduction variable (defined by a loop-header
2532 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
2533 Left to check that oprnd0 is defined by a cast from type 'type' to type
2534 'TYPE'. */
2536 vect_unpromoted_value unprom0;
2537 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
2538 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
2539 return NULL;
2541 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
2543 if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
2544 unprom0.type, type_out))
2545 return NULL;
2547 var = vect_recog_temp_ssa_var (type, NULL);
2548 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
2550 return pattern_stmt;
2553 /* Function vect_recog_bitfield_ref_pattern
2555 Try to find the following pattern:
2557 bf_value = BIT_FIELD_REF (container, bitsize, bitpos);
2558 result = (type_out) bf_value;
2562 if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
2564 where type_out is a non-bitfield type, that is to say, it's precision matches
2565 2^(TYPE_SIZE(type_out) - (TYPE_UNSIGNED (type_out) ? 1 : 2)).
2567 Input:
2569 * STMT_VINFO: The stmt from which the pattern search begins.
2570 here it starts with:
2571 result = (type_out) bf_value;
2575 if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
2577 Output:
2579 * TYPE_OUT: The vector type of the output of this pattern.
2581 * Return value: A new stmt that will be used to replace the sequence of
2582 stmts that constitute the pattern. If the precision of type_out is bigger
2583 than the precision type of _1 we perform the widening before the shifting,
2584 since the new precision will be large enough to shift the value and moving
2585 widening operations up the statement chain enables the generation of
2586 widening loads. If we are widening and the operation after the pattern is
2587 an addition then we mask first and shift later, to enable the generation of
2588 shifting adds. In the case of narrowing we will always mask first, shift
2589 last and then perform a narrowing operation. This will enable the
2590 generation of narrowing shifts.
2592 Widening with mask first, shift later:
2593 container = (type_out) container;
2594 masked = container & (((1 << bitsize) - 1) << bitpos);
2595 result = masked >> bitpos;
2597 Widening with shift first, mask last:
2598 container = (type_out) container;
2599 shifted = container >> bitpos;
2600 result = shifted & ((1 << bitsize) - 1);
2602 Narrowing:
2603 masked = container & (((1 << bitsize) - 1) << bitpos);
2604 result = masked >> bitpos;
2605 result = (type_out) result;
2607 If the bitfield is signed and it's wider than type_out, we need to
2608 keep the result sign-extended:
2609 container = (type) container;
2610 masked = container << (prec - bitsize - bitpos);
2611 result = (type_out) (masked >> (prec - bitsize));
2613 Here type is the signed variant of the wider of type_out and the type
2614 of container.
2616 The shifting is always optional depending on whether bitpos != 0.
2618 When the original bitfield was inside a gcond then an new gcond is also
2619 generated with the newly `result` as the operand to the comparison.
2623 static gimple *
2624 vect_recog_bitfield_ref_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
2625 tree *type_out)
2627 gimple *bf_stmt = NULL;
2628 tree lhs = NULL_TREE;
2629 tree ret_type = NULL_TREE;
2630 gimple *stmt = STMT_VINFO_STMT (stmt_info);
2631 if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
2633 tree op = gimple_cond_lhs (cond_stmt);
2634 if (TREE_CODE (op) != SSA_NAME)
2635 return NULL;
2636 bf_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
2637 if (TREE_CODE (gimple_cond_rhs (cond_stmt)) != INTEGER_CST)
2638 return NULL;
2640 else if (is_gimple_assign (stmt)
2641 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
2642 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
2644 gimple *second_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
2645 bf_stmt = dyn_cast <gassign *> (second_stmt);
2646 lhs = gimple_assign_lhs (stmt);
2647 ret_type = TREE_TYPE (lhs);
2650 if (!bf_stmt
2651 || gimple_assign_rhs_code (bf_stmt) != BIT_FIELD_REF)
2652 return NULL;
2654 tree bf_ref = gimple_assign_rhs1 (bf_stmt);
2655 tree container = TREE_OPERAND (bf_ref, 0);
2656 ret_type = ret_type ? ret_type : TREE_TYPE (container);
2658 if (!bit_field_offset (bf_ref).is_constant ()
2659 || !bit_field_size (bf_ref).is_constant ()
2660 || !tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (container))))
2661 return NULL;
2663 if (!INTEGRAL_TYPE_P (TREE_TYPE (bf_ref))
2664 || !INTEGRAL_TYPE_P (TREE_TYPE (container))
2665 || TYPE_MODE (TREE_TYPE (container)) == E_BLKmode)
2666 return NULL;
2668 gimple *use_stmt, *pattern_stmt;
2669 use_operand_p use_p;
2670 bool shift_first = true;
2671 tree container_type = TREE_TYPE (container);
2672 tree vectype = get_vectype_for_scalar_type (vinfo, container_type);
2674 /* Calculate shift_n before the adjustments for widening loads, otherwise
2675 the container may change and we have to consider offset change for
2676 widening loads on big endianness. The shift_n calculated here can be
2677 independent of widening. */
2678 unsigned HOST_WIDE_INT shift_n = bit_field_offset (bf_ref).to_constant ();
2679 unsigned HOST_WIDE_INT mask_width = bit_field_size (bf_ref).to_constant ();
2680 unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
2681 if (BYTES_BIG_ENDIAN)
2682 shift_n = prec - shift_n - mask_width;
2684 bool ref_sext = (!TYPE_UNSIGNED (TREE_TYPE (bf_ref)) &&
2685 TYPE_PRECISION (ret_type) > mask_width);
2686 bool load_widen = (TYPE_PRECISION (TREE_TYPE (container)) <
2687 TYPE_PRECISION (ret_type));
2689 /* We move the conversion earlier if the loaded type is smaller than the
2690 return type to enable the use of widening loads. And if we need a
2691 sign extension, we need to convert the loaded value early to a signed
2692 type as well. */
2693 if (ref_sext || load_widen)
2695 tree type = load_widen ? ret_type : container_type;
2696 if (ref_sext)
2697 type = gimple_signed_type (type);
2698 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type),
2699 NOP_EXPR, container);
2700 container = gimple_get_lhs (pattern_stmt);
2701 container_type = TREE_TYPE (container);
2702 prec = tree_to_uhwi (TYPE_SIZE (container_type));
2703 vectype = get_vectype_for_scalar_type (vinfo, container_type);
2704 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2706 else if (!useless_type_conversion_p (TREE_TYPE (container), ret_type))
2707 /* If we are doing the conversion last then also delay the shift as we may
2708 be able to combine the shift and conversion in certain cases. */
2709 shift_first = false;
2711 /* If the only use of the result of this BIT_FIELD_REF + CONVERT is a
2712 PLUS_EXPR then do the shift last as some targets can combine the shift and
2713 add into a single instruction. */
2714 if (lhs && single_imm_use (lhs, &use_p, &use_stmt))
2716 if (gimple_code (use_stmt) == GIMPLE_ASSIGN
2717 && gimple_assign_rhs_code (use_stmt) == PLUS_EXPR)
2718 shift_first = false;
2721 /* If we don't have to shift we only generate the mask, so just fix the
2722 code-path to shift_first. */
2723 if (shift_n == 0)
2724 shift_first = true;
2726 tree result;
2727 if (shift_first && !ref_sext)
2729 tree shifted = container;
2730 if (shift_n)
2732 pattern_stmt
2733 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2734 RSHIFT_EXPR, container,
2735 build_int_cst (sizetype, shift_n));
2736 shifted = gimple_assign_lhs (pattern_stmt);
2737 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2740 tree mask = wide_int_to_tree (container_type,
2741 wi::mask (mask_width, false, prec));
2743 pattern_stmt
2744 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2745 BIT_AND_EXPR, shifted, mask);
2746 result = gimple_assign_lhs (pattern_stmt);
2748 else
2750 tree temp = vect_recog_temp_ssa_var (container_type);
2751 if (!ref_sext)
2753 tree mask = wide_int_to_tree (container_type,
2754 wi::shifted_mask (shift_n,
2755 mask_width,
2756 false, prec));
2757 pattern_stmt = gimple_build_assign (temp, BIT_AND_EXPR,
2758 container, mask);
2760 else
2762 HOST_WIDE_INT shl = prec - shift_n - mask_width;
2763 shift_n += shl;
2764 pattern_stmt = gimple_build_assign (temp, LSHIFT_EXPR,
2765 container,
2766 build_int_cst (sizetype,
2767 shl));
2770 tree masked = gimple_assign_lhs (pattern_stmt);
2771 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2772 pattern_stmt
2773 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2774 RSHIFT_EXPR, masked,
2775 build_int_cst (sizetype, shift_n));
2776 result = gimple_assign_lhs (pattern_stmt);
2779 if (!useless_type_conversion_p (TREE_TYPE (result), ret_type))
2781 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2782 pattern_stmt
2783 = gimple_build_assign (vect_recog_temp_ssa_var (ret_type),
2784 NOP_EXPR, result);
2787 if (!lhs)
2789 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2790 gcond *cond_stmt = dyn_cast <gcond *> (stmt_info->stmt);
2791 tree cond_cst = gimple_cond_rhs (cond_stmt);
2792 pattern_stmt
2793 = gimple_build_cond (gimple_cond_code (cond_stmt),
2794 gimple_get_lhs (pattern_stmt),
2795 fold_convert (ret_type, cond_cst),
2796 gimple_cond_true_label (cond_stmt),
2797 gimple_cond_false_label (cond_stmt));
2800 *type_out = STMT_VINFO_VECTYPE (stmt_info);
2801 vect_pattern_detected ("bitfield_ref pattern", stmt_info->stmt);
2803 return pattern_stmt;
2806 /* Function vect_recog_bit_insert_pattern
2808 Try to find the following pattern:
2810 written = BIT_INSERT_EXPR (container, value, bitpos);
2812 Input:
2814 * STMT_VINFO: The stmt we want to replace.
2816 Output:
2818 * TYPE_OUT: The vector type of the output of this pattern.
2820 * Return value: A new stmt that will be used to replace the sequence of
2821 stmts that constitute the pattern. In this case it will be:
2822 value = (container_type) value; // Make sure
2823 shifted = value << bitpos; // Shift value into place
2824 masked = shifted & (mask << bitpos); // Mask off the non-relevant bits in
2825 // the 'to-write value'.
2826 cleared = container & ~(mask << bitpos); // Clearing the bits we want to
2827 // write to from the value we want
2828 // to write to.
2829 written = cleared | masked; // Write bits.
2832 where mask = ((1 << TYPE_PRECISION (value)) - 1), a mask to keep the number of
2833 bits corresponding to the real size of the bitfield value we are writing to.
2834 The shifting is always optional depending on whether bitpos != 0.
2838 static gimple *
2839 vect_recog_bit_insert_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
2840 tree *type_out)
2842 gassign *bf_stmt = dyn_cast <gassign *> (stmt_info->stmt);
2843 if (!bf_stmt || gimple_assign_rhs_code (bf_stmt) != BIT_INSERT_EXPR)
2844 return NULL;
2846 tree container = gimple_assign_rhs1 (bf_stmt);
2847 tree value = gimple_assign_rhs2 (bf_stmt);
2848 tree shift = gimple_assign_rhs3 (bf_stmt);
2850 tree bf_type = TREE_TYPE (value);
2851 tree container_type = TREE_TYPE (container);
2853 if (!INTEGRAL_TYPE_P (container_type)
2854 || !tree_fits_uhwi_p (TYPE_SIZE (container_type)))
2855 return NULL;
2857 gimple *pattern_stmt;
2859 vect_unpromoted_value unprom;
2860 unprom.set_op (value, vect_internal_def);
2861 value = vect_convert_input (vinfo, stmt_info, container_type, &unprom,
2862 get_vectype_for_scalar_type (vinfo,
2863 container_type));
2865 unsigned HOST_WIDE_INT mask_width = TYPE_PRECISION (bf_type);
2866 unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
2867 unsigned HOST_WIDE_INT shift_n = tree_to_uhwi (shift);
2868 if (BYTES_BIG_ENDIAN)
2870 shift_n = prec - shift_n - mask_width;
2871 shift = build_int_cst (TREE_TYPE (shift), shift_n);
2874 if (!useless_type_conversion_p (TREE_TYPE (value), container_type))
2876 pattern_stmt =
2877 gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2878 NOP_EXPR, value);
2879 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2880 value = gimple_get_lhs (pattern_stmt);
2883 /* Shift VALUE into place. */
2884 tree shifted = value;
2885 if (shift_n)
2887 gimple_seq stmts = NULL;
2888 shifted
2889 = gimple_build (&stmts, LSHIFT_EXPR, container_type, value, shift);
2890 if (!gimple_seq_empty_p (stmts))
2891 append_pattern_def_seq (vinfo, stmt_info,
2892 gimple_seq_first_stmt (stmts));
2895 tree mask_t
2896 = wide_int_to_tree (container_type,
2897 wi::shifted_mask (shift_n, mask_width, false, prec));
2899 /* Clear bits we don't want to write back from SHIFTED. */
2900 gimple_seq stmts = NULL;
2901 tree masked = gimple_build (&stmts, BIT_AND_EXPR, container_type, shifted,
2902 mask_t);
2903 if (!gimple_seq_empty_p (stmts))
2905 pattern_stmt = gimple_seq_first_stmt (stmts);
2906 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2909 /* Mask off the bits in the container that we are to write to. */
2910 mask_t = wide_int_to_tree (container_type,
2911 wi::shifted_mask (shift_n, mask_width, true, prec));
2912 tree cleared = vect_recog_temp_ssa_var (container_type);
2913 pattern_stmt = gimple_build_assign (cleared, BIT_AND_EXPR, container, mask_t);
2914 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2916 /* Write MASKED into CLEARED. */
2917 pattern_stmt
2918 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2919 BIT_IOR_EXPR, cleared, masked);
2921 *type_out = STMT_VINFO_VECTYPE (stmt_info);
2922 vect_pattern_detected ("bit_insert pattern", stmt_info->stmt);
2924 return pattern_stmt;
2928 /* Recognize cases in which an operation is performed in one type WTYPE
2929 but could be done more efficiently in a narrower type NTYPE. For example,
2930 if we have:
2932 ATYPE a; // narrower than NTYPE
2933 BTYPE b; // narrower than NTYPE
2934 WTYPE aw = (WTYPE) a;
2935 WTYPE bw = (WTYPE) b;
2936 WTYPE res = aw + bw; // only uses of aw and bw
2938 then it would be more efficient to do:
2940 NTYPE an = (NTYPE) a;
2941 NTYPE bn = (NTYPE) b;
2942 NTYPE resn = an + bn;
2943 WTYPE res = (WTYPE) resn;
2945 Other situations include things like:
2947 ATYPE a; // NTYPE or narrower
2948 WTYPE aw = (WTYPE) a;
2949 WTYPE res = aw + b;
2951 when only "(NTYPE) res" is significant. In that case it's more efficient
2952 to truncate "b" and do the operation on NTYPE instead:
2954 NTYPE an = (NTYPE) a;
2955 NTYPE bn = (NTYPE) b; // truncation
2956 NTYPE resn = an + bn;
2957 WTYPE res = (WTYPE) resn;
2959 All users of "res" should then use "resn" instead, making the final
2960 statement dead (not marked as relevant). The final statement is still
2961 needed to maintain the type correctness of the IR.
2963 vect_determine_precisions has already determined the minimum
2964 precison of the operation and the minimum precision required
2965 by users of the result. */
2967 static gimple *
2968 vect_recog_over_widening_pattern (vec_info *vinfo,
2969 stmt_vec_info last_stmt_info, tree *type_out)
2971 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2972 if (!last_stmt)
2973 return NULL;
2975 /* See whether we have found that this operation can be done on a
2976 narrower type without changing its semantics. */
2977 unsigned int new_precision = last_stmt_info->operation_precision;
2978 if (!new_precision)
2979 return NULL;
2981 tree lhs = gimple_assign_lhs (last_stmt);
2982 tree type = TREE_TYPE (lhs);
2983 tree_code code = gimple_assign_rhs_code (last_stmt);
2985 /* Punt for reductions where we don't handle the type conversions. */
2986 if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
2987 return NULL;
2989 /* Keep the first operand of a COND_EXPR as-is: only the other two
2990 operands are interesting. */
2991 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
2993 /* Check the operands. */
2994 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
2995 auto_vec <vect_unpromoted_value, 3> unprom (nops);
2996 unprom.quick_grow_cleared (nops);
2997 unsigned int min_precision = 0;
2998 bool single_use_p = false;
2999 for (unsigned int i = 0; i < nops; ++i)
3001 tree op = gimple_op (last_stmt, first_op + i);
3002 if (TREE_CODE (op) == INTEGER_CST)
3003 unprom[i].set_op (op, vect_constant_def);
3004 else if (TREE_CODE (op) == SSA_NAME)
3006 bool op_single_use_p = true;
3007 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
3008 &op_single_use_p))
3009 return NULL;
3010 /* If:
3012 (1) N bits of the result are needed;
3013 (2) all inputs are widened from M<N bits; and
3014 (3) one operand OP is a single-use SSA name
3016 we can shift the M->N widening from OP to the output
3017 without changing the number or type of extensions involved.
3018 This then reduces the number of copies of STMT_INFO.
3020 If instead of (3) more than one operand is a single-use SSA name,
3021 shifting the extension to the output is even more of a win.
3023 If instead:
3025 (1) N bits of the result are needed;
3026 (2) one operand OP2 is widened from M2<N bits;
3027 (3) another operand OP1 is widened from M1<M2 bits; and
3028 (4) both OP1 and OP2 are single-use
3030 the choice is between:
3032 (a) truncating OP2 to M1, doing the operation on M1,
3033 and then widening the result to N
3035 (b) widening OP1 to M2, doing the operation on M2, and then
3036 widening the result to N
3038 Both shift the M2->N widening of the inputs to the output.
3039 (a) additionally shifts the M1->M2 widening to the output;
3040 it requires fewer copies of STMT_INFO but requires an extra
3041 M2->M1 truncation.
3043 Which is better will depend on the complexity and cost of
3044 STMT_INFO, which is hard to predict at this stage. However,
3045 a clear tie-breaker in favor of (b) is the fact that the
3046 truncation in (a) increases the length of the operation chain.
3048 If instead of (4) only one of OP1 or OP2 is single-use,
3049 (b) is still a win over doing the operation in N bits:
3050 it still shifts the M2->N widening on the single-use operand
3051 to the output and reduces the number of STMT_INFO copies.
3053 If neither operand is single-use then operating on fewer than
3054 N bits might lead to more extensions overall. Whether it does
3055 or not depends on global information about the vectorization
3056 region, and whether that's a good trade-off would again
3057 depend on the complexity and cost of the statements involved,
3058 as well as things like register pressure that are not normally
3059 modelled at this stage. We therefore ignore these cases
3060 and just optimize the clear single-use wins above.
3062 Thus we take the maximum precision of the unpromoted operands
3063 and record whether any operand is single-use. */
3064 if (unprom[i].dt == vect_internal_def)
3066 min_precision = MAX (min_precision,
3067 TYPE_PRECISION (unprom[i].type));
3068 single_use_p |= op_single_use_p;
3071 else
3072 return NULL;
3075 /* Although the operation could be done in operation_precision, we have
3076 to balance that against introducing extra truncations or extensions.
3077 Calculate the minimum precision that can be handled efficiently.
3079 The loop above determined that the operation could be handled
3080 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
3081 extension from the inputs to the output without introducing more
3082 instructions, and would reduce the number of instructions required
3083 for STMT_INFO itself.
3085 vect_determine_precisions has also determined that the result only
3086 needs min_output_precision bits. Truncating by a factor of N times
3087 requires a tree of N - 1 instructions, so if TYPE is N times wider
3088 than min_output_precision, doing the operation in TYPE and truncating
3089 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
3090 In contrast:
3092 - truncating the input to a unary operation and doing the operation
3093 in the new type requires at most N - 1 + 1 = N instructions per
3094 output vector
3096 - doing the same for a binary operation requires at most
3097 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
3099 Both unary and binary operations require fewer instructions than
3100 this if the operands were extended from a suitable truncated form.
3101 Thus there is usually nothing to lose by doing operations in
3102 min_output_precision bits, but there can be something to gain. */
3103 if (!single_use_p)
3104 min_precision = last_stmt_info->min_output_precision;
3105 else
3106 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
3108 /* Apply the minimum efficient precision we just calculated. */
3109 if (new_precision < min_precision)
3110 new_precision = min_precision;
3111 new_precision = vect_element_precision (new_precision);
3112 if (new_precision >= TYPE_PRECISION (type))
3113 return NULL;
3115 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
3117 *type_out = get_vectype_for_scalar_type (vinfo, type);
3118 if (!*type_out)
3119 return NULL;
3121 /* We've found a viable pattern. Get the new type of the operation. */
3122 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
3123 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
3125 /* If we're truncating an operation, we need to make sure that we
3126 don't introduce new undefined overflow. The codes tested here are
3127 a subset of those accepted by vect_truncatable_operation_p. */
3128 tree op_type = new_type;
3129 if (TYPE_OVERFLOW_UNDEFINED (new_type)
3130 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
3131 op_type = build_nonstandard_integer_type (new_precision, true);
3133 /* We specifically don't check here whether the target supports the
3134 new operation, since it might be something that a later pattern
3135 wants to rewrite anyway. If targets have a minimum element size
3136 for some optabs, we should pattern-match smaller ops to larger ops
3137 where beneficial. */
3138 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3139 tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
3140 if (!new_vectype || !op_vectype)
3141 return NULL;
3143 if (dump_enabled_p ())
3144 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
3145 type, new_type);
3147 /* Calculate the rhs operands for an operation on OP_TYPE. */
3148 tree ops[3] = {};
3149 for (unsigned int i = 1; i < first_op; ++i)
3150 ops[i - 1] = gimple_op (last_stmt, i);
3151 /* For right shifts limit the shift operand. */
3152 vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
3153 op_type, &unprom[0], op_vectype);
3155 /* Limit shift operands. */
3156 if (code == RSHIFT_EXPR)
3158 wide_int min_value, max_value;
3159 if (TREE_CODE (ops[1]) == INTEGER_CST)
3160 ops[1] = wide_int_to_tree (op_type,
3161 wi::umin (wi::to_wide (ops[1]),
3162 new_precision - 1));
3163 else if (!vect_get_range_info (ops[1], &min_value, &max_value)
3164 || wi::ge_p (max_value, new_precision, TYPE_SIGN (op_type)))
3166 /* ??? Note the following bad for SLP as that only supports
3167 same argument widened shifts and it un-CSEs same arguments. */
3168 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
3169 gimple *pattern_stmt
3170 = gimple_build_assign (new_var, MIN_EXPR, ops[1],
3171 build_int_cst (op_type, new_precision - 1));
3172 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3173 if (ops[1] == unprom[1].op && unprom[1].dt == vect_external_def)
3175 if (edge e = vect_get_external_def_edge (vinfo, ops[1]))
3177 basic_block new_bb
3178 = gsi_insert_on_edge_immediate (e, pattern_stmt);
3179 gcc_assert (!new_bb);
3181 else
3182 return NULL;
3184 else
3185 append_pattern_def_seq (vinfo, last_stmt_info, pattern_stmt,
3186 op_vectype);
3187 ops[1] = new_var;
3191 /* Use the operation to produce a result of type OP_TYPE. */
3192 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
3193 gimple *pattern_stmt = gimple_build_assign (new_var, code,
3194 ops[0], ops[1], ops[2]);
3195 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3197 if (dump_enabled_p ())
3198 dump_printf_loc (MSG_NOTE, vect_location,
3199 "created pattern stmt: %G", pattern_stmt);
3201 /* Convert back to the original signedness, if OP_TYPE is different
3202 from NEW_TYPE. */
3203 if (op_type != new_type)
3204 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
3205 pattern_stmt, op_vectype);
3207 /* Promote the result to the original type. */
3208 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
3209 pattern_stmt, new_vectype);
3211 return pattern_stmt;
3214 /* Recognize the following patterns:
3216 ATYPE a; // narrower than TYPE
3217 BTYPE b; // narrower than TYPE
3219 1) Multiply high with scaling
3220 TYPE res = ((TYPE) a * (TYPE) b) >> c;
3221 Here, c is bitsize (TYPE) / 2 - 1.
3223 2) ... or also with rounding
3224 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
3225 Here, d is bitsize (TYPE) / 2 - 2.
3227 3) Normal multiply high
3228 TYPE res = ((TYPE) a * (TYPE) b) >> e;
3229 Here, e is bitsize (TYPE) / 2.
3231 where only the bottom half of res is used. */
3233 static gimple *
3234 vect_recog_mulhs_pattern (vec_info *vinfo,
3235 stmt_vec_info last_stmt_info, tree *type_out)
3237 /* Check for a right shift. */
3238 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3239 if (!last_stmt
3240 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
3241 return NULL;
3243 /* Check that the shift result is wider than the users of the
3244 result need (i.e. that narrowing would be a natural choice). */
3245 tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
3246 unsigned int target_precision
3247 = vect_element_precision (last_stmt_info->min_output_precision);
3248 if (!INTEGRAL_TYPE_P (lhs_type)
3249 || target_precision >= TYPE_PRECISION (lhs_type))
3250 return NULL;
3252 /* Look through any change in sign on the outer shift input. */
3253 vect_unpromoted_value unprom_rshift_input;
3254 tree rshift_input = vect_look_through_possible_promotion
3255 (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
3256 if (!rshift_input
3257 || TYPE_PRECISION (TREE_TYPE (rshift_input))
3258 != TYPE_PRECISION (lhs_type))
3259 return NULL;
3261 /* Get the definition of the shift input. */
3262 stmt_vec_info rshift_input_stmt_info
3263 = vect_get_internal_def (vinfo, rshift_input);
3264 if (!rshift_input_stmt_info)
3265 return NULL;
3266 gassign *rshift_input_stmt
3267 = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
3268 if (!rshift_input_stmt)
3269 return NULL;
3271 stmt_vec_info mulh_stmt_info;
3272 tree scale_term;
3273 bool rounding_p = false;
3275 /* Check for the presence of the rounding term. */
3276 if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
3278 /* Check that the outer shift was by 1. */
3279 if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
3280 return NULL;
3282 /* Check that the second operand of the PLUS_EXPR is 1. */
3283 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
3284 return NULL;
3286 /* Look through any change in sign on the addition input. */
3287 vect_unpromoted_value unprom_plus_input;
3288 tree plus_input = vect_look_through_possible_promotion
3289 (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
3290 if (!plus_input
3291 || TYPE_PRECISION (TREE_TYPE (plus_input))
3292 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
3293 return NULL;
3295 /* Get the definition of the multiply-high-scale part. */
3296 stmt_vec_info plus_input_stmt_info
3297 = vect_get_internal_def (vinfo, plus_input);
3298 if (!plus_input_stmt_info)
3299 return NULL;
3300 gassign *plus_input_stmt
3301 = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
3302 if (!plus_input_stmt
3303 || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
3304 return NULL;
3306 /* Look through any change in sign on the scaling input. */
3307 vect_unpromoted_value unprom_scale_input;
3308 tree scale_input = vect_look_through_possible_promotion
3309 (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
3310 if (!scale_input
3311 || TYPE_PRECISION (TREE_TYPE (scale_input))
3312 != TYPE_PRECISION (TREE_TYPE (plus_input)))
3313 return NULL;
3315 /* Get the definition of the multiply-high part. */
3316 mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
3317 if (!mulh_stmt_info)
3318 return NULL;
3320 /* Get the scaling term. */
3321 scale_term = gimple_assign_rhs2 (plus_input_stmt);
3322 rounding_p = true;
3324 else
3326 mulh_stmt_info = rshift_input_stmt_info;
3327 scale_term = gimple_assign_rhs2 (last_stmt);
3330 /* Check that the scaling factor is constant. */
3331 if (TREE_CODE (scale_term) != INTEGER_CST)
3332 return NULL;
3334 /* Check whether the scaling input term can be seen as two widened
3335 inputs multiplied together. */
3336 vect_unpromoted_value unprom_mult[2];
3337 tree new_type;
3338 unsigned int nops
3339 = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
3340 false, 2, unprom_mult, &new_type);
3341 if (nops != 2)
3342 return NULL;
3344 /* Adjust output precision. */
3345 if (TYPE_PRECISION (new_type) < target_precision)
3346 new_type = build_nonstandard_integer_type
3347 (target_precision, TYPE_UNSIGNED (new_type));
3349 unsigned mult_precision = TYPE_PRECISION (new_type);
3350 internal_fn ifn;
3351 /* Check that the scaling factor is expected. Instead of
3352 target_precision, we should use the one that we actually
3353 use for internal function. */
3354 if (rounding_p)
3356 /* Check pattern 2). */
3357 if (wi::to_widest (scale_term) + mult_precision + 2
3358 != TYPE_PRECISION (lhs_type))
3359 return NULL;
3361 ifn = IFN_MULHRS;
3363 else
3365 /* Check for pattern 1). */
3366 if (wi::to_widest (scale_term) + mult_precision + 1
3367 == TYPE_PRECISION (lhs_type))
3368 ifn = IFN_MULHS;
3369 /* Check for pattern 3). */
3370 else if (wi::to_widest (scale_term) + mult_precision
3371 == TYPE_PRECISION (lhs_type))
3372 ifn = IFN_MULH;
3373 else
3374 return NULL;
3377 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
3379 /* Check for target support. */
3380 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3381 if (!new_vectype
3382 || !direct_internal_fn_supported_p
3383 (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
3384 return NULL;
3386 /* The IR requires a valid vector type for the cast result, even though
3387 it's likely to be discarded. */
3388 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
3389 if (!*type_out)
3390 return NULL;
3392 /* Generate the IFN_MULHRS call. */
3393 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
3394 tree new_ops[2];
3395 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
3396 unprom_mult, new_vectype);
3397 gcall *mulhrs_stmt
3398 = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
3399 gimple_call_set_lhs (mulhrs_stmt, new_var);
3400 gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
3402 if (dump_enabled_p ())
3403 dump_printf_loc (MSG_NOTE, vect_location,
3404 "created pattern stmt: %G", (gimple *) mulhrs_stmt);
3406 return vect_convert_output (vinfo, last_stmt_info, lhs_type,
3407 mulhrs_stmt, new_vectype);
3410 /* Recognize the patterns:
3412 ATYPE a; // narrower than TYPE
3413 BTYPE b; // narrower than TYPE
3414 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
3415 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
3417 where only the bottom half of avg is used. Try to transform them into:
3419 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
3420 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
3422 followed by:
3424 TYPE avg = (TYPE) avg';
3426 where NTYPE is no wider than half of TYPE. Since only the bottom half
3427 of avg is used, all or part of the cast of avg' should become redundant.
3429 If there is no target support available, generate code to distribute rshift
3430 over plus and add a carry. */
3432 static gimple *
3433 vect_recog_average_pattern (vec_info *vinfo,
3434 stmt_vec_info last_stmt_info, tree *type_out)
3436 /* Check for a shift right by one bit. */
3437 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3438 if (!last_stmt
3439 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
3440 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
3441 return NULL;
3443 /* Check that the shift result is wider than the users of the
3444 result need (i.e. that narrowing would be a natural choice). */
3445 tree lhs = gimple_assign_lhs (last_stmt);
3446 tree type = TREE_TYPE (lhs);
3447 unsigned int target_precision
3448 = vect_element_precision (last_stmt_info->min_output_precision);
3449 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
3450 return NULL;
3452 /* Look through any change in sign on the shift input. */
3453 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
3454 vect_unpromoted_value unprom_plus;
3455 rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
3456 &unprom_plus);
3457 if (!rshift_rhs
3458 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
3459 return NULL;
3461 /* Get the definition of the shift input. */
3462 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
3463 if (!plus_stmt_info)
3464 return NULL;
3466 /* Check whether the shift input can be seen as a tree of additions on
3467 2 or 3 widened inputs.
3469 Note that the pattern should be a win even if the result of one or
3470 more additions is reused elsewhere: if the pattern matches, we'd be
3471 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
3472 internal_fn ifn = IFN_AVG_FLOOR;
3473 vect_unpromoted_value unprom[3];
3474 tree new_type;
3475 unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
3476 IFN_VEC_WIDEN_PLUS, false, 3,
3477 unprom, &new_type);
3478 if (nops == 0)
3479 return NULL;
3480 if (nops == 3)
3482 /* Check that one operand is 1. */
3483 unsigned int i;
3484 for (i = 0; i < 3; ++i)
3485 if (integer_onep (unprom[i].op))
3486 break;
3487 if (i == 3)
3488 return NULL;
3489 /* Throw away the 1 operand and keep the other two. */
3490 if (i < 2)
3491 unprom[i] = unprom[2];
3492 ifn = IFN_AVG_CEIL;
3495 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
3497 /* We know that:
3499 (a) the operation can be viewed as:
3501 TYPE widened0 = (TYPE) UNPROM[0];
3502 TYPE widened1 = (TYPE) UNPROM[1];
3503 TYPE tmp1 = widened0 + widened1 {+ 1};
3504 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
3506 (b) the first two statements are equivalent to:
3508 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
3509 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
3511 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
3512 where sensible;
3514 (d) all the operations can be performed correctly at twice the width of
3515 NEW_TYPE, due to the nature of the average operation; and
3517 (e) users of the result of the right shift need only TARGET_PRECISION
3518 bits, where TARGET_PRECISION is no more than half of TYPE's
3519 precision.
3521 Under these circumstances, the only situation in which NEW_TYPE
3522 could be narrower than TARGET_PRECISION is if widened0, widened1
3523 and an addition result are all used more than once. Thus we can
3524 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
3525 as "free", whereas widening the result of the average instruction
3526 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
3527 therefore better not to go narrower than TARGET_PRECISION. */
3528 if (TYPE_PRECISION (new_type) < target_precision)
3529 new_type = build_nonstandard_integer_type (target_precision,
3530 TYPE_UNSIGNED (new_type));
3532 /* Check for target support. */
3533 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3534 if (!new_vectype)
3535 return NULL;
3537 bool fallback_p = false;
3539 if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
3541 else if (TYPE_UNSIGNED (new_type)
3542 && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
3543 && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
3544 && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
3545 && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
3546 fallback_p = true;
3547 else
3548 return NULL;
3550 /* The IR requires a valid vector type for the cast result, even though
3551 it's likely to be discarded. */
3552 *type_out = get_vectype_for_scalar_type (vinfo, type);
3553 if (!*type_out)
3554 return NULL;
3556 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
3557 tree new_ops[2];
3558 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
3559 unprom, new_vectype);
3561 if (fallback_p)
3563 /* As a fallback, generate code for following sequence:
3565 shifted_op0 = new_ops[0] >> 1;
3566 shifted_op1 = new_ops[1] >> 1;
3567 sum_of_shifted = shifted_op0 + shifted_op1;
3568 unmasked_carry = new_ops[0] and/or new_ops[1];
3569 carry = unmasked_carry & 1;
3570 new_var = sum_of_shifted + carry;
3573 tree one_cst = build_one_cst (new_type);
3574 gassign *g;
3576 tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
3577 g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
3578 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3580 tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
3581 g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
3582 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3584 tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
3585 g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
3586 shifted_op0, shifted_op1);
3587 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3589 tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
3590 tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
3591 g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
3592 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3594 tree carry = vect_recog_temp_ssa_var (new_type, NULL);
3595 g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
3596 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3598 g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
3599 return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
3602 /* Generate the IFN_AVG* call. */
3603 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
3604 new_ops[1]);
3605 gimple_call_set_lhs (average_stmt, new_var);
3606 gimple_set_location (average_stmt, gimple_location (last_stmt));
3608 if (dump_enabled_p ())
3609 dump_printf_loc (MSG_NOTE, vect_location,
3610 "created pattern stmt: %G", (gimple *) average_stmt);
3612 return vect_convert_output (vinfo, last_stmt_info,
3613 type, average_stmt, new_vectype);
3616 /* Recognize cases in which the input to a cast is wider than its
3617 output, and the input is fed by a widening operation. Fold this
3618 by removing the unnecessary intermediate widening. E.g.:
3620 unsigned char a;
3621 unsigned int b = (unsigned int) a;
3622 unsigned short c = (unsigned short) b;
3626 unsigned short c = (unsigned short) a;
3628 Although this is rare in input IR, it is an expected side-effect
3629 of the over-widening pattern above.
3631 This is beneficial also for integer-to-float conversions, if the
3632 widened integer has more bits than the float, and if the unwidened
3633 input doesn't. */
3635 static gimple *
3636 vect_recog_cast_forwprop_pattern (vec_info *vinfo,
3637 stmt_vec_info last_stmt_info, tree *type_out)
3639 /* Check for a cast, including an integer-to-float conversion. */
3640 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3641 if (!last_stmt)
3642 return NULL;
3643 tree_code code = gimple_assign_rhs_code (last_stmt);
3644 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
3645 return NULL;
3647 /* Make sure that the rhs is a scalar with a natural bitsize. */
3648 tree lhs = gimple_assign_lhs (last_stmt);
3649 if (!lhs)
3650 return NULL;
3651 tree lhs_type = TREE_TYPE (lhs);
3652 scalar_mode lhs_mode;
3653 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
3654 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
3655 return NULL;
3657 /* Check for a narrowing operation (from a vector point of view). */
3658 tree rhs = gimple_assign_rhs1 (last_stmt);
3659 tree rhs_type = TREE_TYPE (rhs);
3660 if (!INTEGRAL_TYPE_P (rhs_type)
3661 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
3662 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
3663 return NULL;
3665 /* Try to find an unpromoted input. */
3666 vect_unpromoted_value unprom;
3667 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
3668 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
3669 return NULL;
3671 /* If the bits above RHS_TYPE matter, make sure that they're the
3672 same when extending from UNPROM as they are when extending from RHS. */
3673 if (!INTEGRAL_TYPE_P (lhs_type)
3674 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
3675 return NULL;
3677 /* We can get the same result by casting UNPROM directly, to avoid
3678 the unnecessary widening and narrowing. */
3679 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
3681 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
3682 if (!*type_out)
3683 return NULL;
3685 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
3686 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
3687 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3689 return pattern_stmt;
3692 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
3693 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
3695 static gimple *
3696 vect_recog_widen_shift_pattern (vec_info *vinfo,
3697 stmt_vec_info last_stmt_info, tree *type_out)
3699 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
3700 LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
3701 "vect_recog_widen_shift_pattern");
3704 /* Detect a rotate pattern wouldn't be otherwise vectorized:
3706 type a_t, b_t, c_t;
3708 S0 a_t = b_t r<< c_t;
3710 Input/Output:
3712 * STMT_VINFO: The stmt from which the pattern search begins,
3713 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
3714 with a sequence:
3716 S1 d_t = -c_t;
3717 S2 e_t = d_t & (B - 1);
3718 S3 f_t = b_t << c_t;
3719 S4 g_t = b_t >> e_t;
3720 S0 a_t = f_t | g_t;
3722 where B is element bitsize of type.
3724 Output:
3726 * TYPE_OUT: The type of the output of this pattern.
3728 * Return value: A new stmt that will be used to replace the rotate
3729 S0 stmt. */
3731 static gimple *
3732 vect_recog_rotate_pattern (vec_info *vinfo,
3733 stmt_vec_info stmt_vinfo, tree *type_out)
3735 gimple *last_stmt = stmt_vinfo->stmt;
3736 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
3737 gimple *pattern_stmt, *def_stmt;
3738 enum tree_code rhs_code;
3739 enum vect_def_type dt;
3740 optab optab1, optab2;
3741 edge ext_def = NULL;
3742 bool bswap16_p = false;
3744 if (is_gimple_assign (last_stmt))
3746 rhs_code = gimple_assign_rhs_code (last_stmt);
3747 switch (rhs_code)
3749 case LROTATE_EXPR:
3750 case RROTATE_EXPR:
3751 break;
3752 default:
3753 return NULL;
3756 lhs = gimple_assign_lhs (last_stmt);
3757 oprnd0 = gimple_assign_rhs1 (last_stmt);
3758 type = TREE_TYPE (oprnd0);
3759 oprnd1 = gimple_assign_rhs2 (last_stmt);
3761 else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
3763 /* __builtin_bswap16 (x) is another form of x r>> 8.
3764 The vectorizer has bswap support, but only if the argument isn't
3765 promoted. */
3766 lhs = gimple_call_lhs (last_stmt);
3767 oprnd0 = gimple_call_arg (last_stmt, 0);
3768 type = TREE_TYPE (oprnd0);
3769 if (!lhs
3770 || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
3771 || TYPE_PRECISION (type) <= 16
3772 || TREE_CODE (oprnd0) != SSA_NAME
3773 || BITS_PER_UNIT != 8)
3774 return NULL;
3776 stmt_vec_info def_stmt_info;
3777 if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
3778 return NULL;
3780 if (dt != vect_internal_def)
3781 return NULL;
3783 if (gimple_assign_cast_p (def_stmt))
3785 def = gimple_assign_rhs1 (def_stmt);
3786 if (INTEGRAL_TYPE_P (TREE_TYPE (def))
3787 && TYPE_PRECISION (TREE_TYPE (def)) == 16)
3788 oprnd0 = def;
3791 type = TREE_TYPE (lhs);
3792 vectype = get_vectype_for_scalar_type (vinfo, type);
3793 if (vectype == NULL_TREE)
3794 return NULL;
3796 if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
3798 /* The encoding uses one stepped pattern for each byte in the
3799 16-bit word. */
3800 vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
3801 for (unsigned i = 0; i < 3; ++i)
3802 for (unsigned j = 0; j < 2; ++j)
3803 elts.quick_push ((i + 1) * 2 - j - 1);
3805 vec_perm_indices indices (elts, 1,
3806 TYPE_VECTOR_SUBPARTS (char_vectype));
3807 machine_mode vmode = TYPE_MODE (char_vectype);
3808 if (can_vec_perm_const_p (vmode, vmode, indices))
3810 /* vectorizable_bswap can handle the __builtin_bswap16 if we
3811 undo the argument promotion. */
3812 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
3814 def = vect_recog_temp_ssa_var (type, NULL);
3815 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3816 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3817 oprnd0 = def;
3820 /* Pattern detected. */
3821 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3823 *type_out = vectype;
3825 /* Pattern supported. Create a stmt to be used to replace the
3826 pattern, with the unpromoted argument. */
3827 var = vect_recog_temp_ssa_var (type, NULL);
3828 pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
3829 1, oprnd0);
3830 gimple_call_set_lhs (pattern_stmt, var);
3831 gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
3832 gimple_call_fntype (last_stmt));
3833 return pattern_stmt;
3837 oprnd1 = build_int_cst (integer_type_node, 8);
3838 rhs_code = LROTATE_EXPR;
3839 bswap16_p = true;
3841 else
3842 return NULL;
3844 if (TREE_CODE (oprnd0) != SSA_NAME
3845 || !INTEGRAL_TYPE_P (type)
3846 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type))
3847 return NULL;
3849 stmt_vec_info def_stmt_info;
3850 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
3851 return NULL;
3853 if (dt != vect_internal_def
3854 && dt != vect_constant_def
3855 && dt != vect_external_def)
3856 return NULL;
3858 vectype = get_vectype_for_scalar_type (vinfo, type);
3859 if (vectype == NULL_TREE)
3860 return NULL;
3862 /* If vector/vector or vector/scalar rotate is supported by the target,
3863 don't do anything here. */
3864 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
3865 if (optab1
3866 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3868 use_rotate:
3869 if (bswap16_p)
3871 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
3873 def = vect_recog_temp_ssa_var (type, NULL);
3874 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3875 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3876 oprnd0 = def;
3879 /* Pattern detected. */
3880 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3882 *type_out = vectype;
3884 /* Pattern supported. Create a stmt to be used to replace the
3885 pattern. */
3886 var = vect_recog_temp_ssa_var (type, NULL);
3887 pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
3888 oprnd1);
3889 return pattern_stmt;
3891 return NULL;
3894 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
3896 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
3897 if (optab2
3898 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3899 goto use_rotate;
3902 tree utype = unsigned_type_for (type);
3903 tree uvectype = get_vectype_for_scalar_type (vinfo, utype);
3904 if (!uvectype)
3905 return NULL;
3907 /* If vector/vector or vector/scalar shifts aren't supported by the target,
3908 don't do anything here either. */
3909 optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_vector);
3910 optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_vector);
3911 if (!optab1
3912 || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
3913 || !optab2
3914 || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
3916 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
3917 return NULL;
3918 optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_scalar);
3919 optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_scalar);
3920 if (!optab1
3921 || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
3922 || !optab2
3923 || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
3924 return NULL;
3927 *type_out = vectype;
3929 if (!useless_type_conversion_p (utype, TREE_TYPE (oprnd0)))
3931 def = vect_recog_temp_ssa_var (utype, NULL);
3932 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3933 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3934 oprnd0 = def;
3937 if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
3938 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
3940 def = NULL_TREE;
3941 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (utype);
3942 if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
3943 def = oprnd1;
3944 else if (def_stmt && gimple_assign_cast_p (def_stmt))
3946 tree rhs1 = gimple_assign_rhs1 (def_stmt);
3947 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
3948 && TYPE_PRECISION (TREE_TYPE (rhs1))
3949 == TYPE_PRECISION (type))
3950 def = rhs1;
3953 if (def == NULL_TREE)
3955 def = vect_recog_temp_ssa_var (utype, NULL);
3956 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
3957 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3959 stype = TREE_TYPE (def);
3961 if (TREE_CODE (def) == INTEGER_CST)
3963 if (!tree_fits_uhwi_p (def)
3964 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
3965 || integer_zerop (def))
3966 return NULL;
3967 def2 = build_int_cst (stype,
3968 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
3970 else
3972 tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
3974 if (vecstype == NULL_TREE)
3975 return NULL;
3976 def2 = vect_recog_temp_ssa_var (stype, NULL);
3977 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
3978 if (ext_def)
3980 basic_block new_bb
3981 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
3982 gcc_assert (!new_bb);
3984 else
3985 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
3987 def2 = vect_recog_temp_ssa_var (stype, NULL);
3988 tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
3989 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
3990 gimple_assign_lhs (def_stmt), mask);
3991 if (ext_def)
3993 basic_block new_bb
3994 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
3995 gcc_assert (!new_bb);
3997 else
3998 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
4001 var1 = vect_recog_temp_ssa_var (utype, NULL);
4002 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
4003 ? LSHIFT_EXPR : RSHIFT_EXPR,
4004 oprnd0, def);
4005 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
4007 var2 = vect_recog_temp_ssa_var (utype, NULL);
4008 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
4009 ? RSHIFT_EXPR : LSHIFT_EXPR,
4010 oprnd0, def2);
4011 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
4013 /* Pattern detected. */
4014 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
4016 /* Pattern supported. Create a stmt to be used to replace the pattern. */
4017 var = vect_recog_temp_ssa_var (utype, NULL);
4018 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
4020 if (!useless_type_conversion_p (type, utype))
4022 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, uvectype);
4023 tree result = vect_recog_temp_ssa_var (type, NULL);
4024 pattern_stmt = gimple_build_assign (result, NOP_EXPR, var);
4026 return pattern_stmt;
4029 /* Detect a vector by vector shift pattern that wouldn't be otherwise
4030 vectorized:
4032 type a_t;
4033 TYPE b_T, res_T;
4035 S1 a_t = ;
4036 S2 b_T = ;
4037 S3 res_T = b_T op a_t;
4039 where type 'TYPE' is a type with different size than 'type',
4040 and op is <<, >> or rotate.
4042 Also detect cases:
4044 type a_t;
4045 TYPE b_T, c_T, res_T;
4047 S0 c_T = ;
4048 S1 a_t = (type) c_T;
4049 S2 b_T = ;
4050 S3 res_T = b_T op a_t;
4052 Input/Output:
4054 * STMT_VINFO: The stmt from which the pattern search begins,
4055 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
4056 with a shift/rotate which has same type on both operands, in the
4057 second case just b_T op c_T, in the first case with added cast
4058 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
4060 Output:
4062 * TYPE_OUT: The type of the output of this pattern.
4064 * Return value: A new stmt that will be used to replace the shift/rotate
4065 S3 stmt. */
4067 static gimple *
4068 vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
4069 stmt_vec_info stmt_vinfo,
4070 tree *type_out)
4072 gimple *last_stmt = stmt_vinfo->stmt;
4073 tree oprnd0, oprnd1, lhs, var;
4074 gimple *pattern_stmt;
4075 enum tree_code rhs_code;
4077 if (!is_gimple_assign (last_stmt))
4078 return NULL;
4080 rhs_code = gimple_assign_rhs_code (last_stmt);
4081 switch (rhs_code)
4083 case LSHIFT_EXPR:
4084 case RSHIFT_EXPR:
4085 case LROTATE_EXPR:
4086 case RROTATE_EXPR:
4087 break;
4088 default:
4089 return NULL;
4092 lhs = gimple_assign_lhs (last_stmt);
4093 oprnd0 = gimple_assign_rhs1 (last_stmt);
4094 oprnd1 = gimple_assign_rhs2 (last_stmt);
4095 if (TREE_CODE (oprnd0) != SSA_NAME
4096 || TREE_CODE (oprnd1) != SSA_NAME
4097 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
4098 || !INTEGRAL_TYPE_P (TREE_TYPE (oprnd0))
4099 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
4100 || TYPE_PRECISION (TREE_TYPE (lhs))
4101 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
4102 return NULL;
4104 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
4105 if (!def_vinfo)
4106 return NULL;
4108 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
4109 if (*type_out == NULL_TREE)
4110 return NULL;
4112 tree def = NULL_TREE;
4113 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
4114 if (def_stmt && gimple_assign_cast_p (def_stmt))
4116 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4117 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
4118 && TYPE_PRECISION (TREE_TYPE (rhs1))
4119 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
4121 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
4122 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
4123 def = rhs1;
4124 else
4126 tree mask
4127 = build_low_bits_mask (TREE_TYPE (rhs1),
4128 TYPE_PRECISION (TREE_TYPE (oprnd1)));
4129 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4130 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
4131 tree vecstype = get_vectype_for_scalar_type (vinfo,
4132 TREE_TYPE (rhs1));
4133 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
4138 if (def == NULL_TREE)
4140 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
4141 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
4142 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4145 /* Pattern detected. */
4146 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
4148 /* Pattern supported. Create a stmt to be used to replace the pattern. */
4149 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
4150 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
4152 return pattern_stmt;
4155 /* Return true iff the target has a vector optab implementing the operation
4156 CODE on type VECTYPE. */
4158 static bool
4159 target_has_vecop_for_code (tree_code code, tree vectype)
4161 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
4162 return voptab
4163 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
4166 /* Verify that the target has optabs of VECTYPE to perform all the steps
4167 needed by the multiplication-by-immediate synthesis algorithm described by
4168 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
4169 present. Return true iff the target supports all the steps. */
4171 static bool
4172 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
4173 tree vectype, bool synth_shift_p)
4175 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
4176 return false;
4178 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
4179 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
4181 if (var == negate_variant
4182 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
4183 return false;
4185 /* If we must synthesize shifts with additions make sure that vector
4186 addition is available. */
4187 if ((var == add_variant || synth_shift_p) && !supports_vplus)
4188 return false;
4190 for (int i = 1; i < alg->ops; i++)
4192 switch (alg->op[i])
4194 case alg_shift:
4195 break;
4196 case alg_add_t_m2:
4197 case alg_add_t2_m:
4198 case alg_add_factor:
4199 if (!supports_vplus)
4200 return false;
4201 break;
4202 case alg_sub_t_m2:
4203 case alg_sub_t2_m:
4204 case alg_sub_factor:
4205 if (!supports_vminus)
4206 return false;
4207 break;
4208 case alg_unknown:
4209 case alg_m:
4210 case alg_zero:
4211 case alg_impossible:
4212 return false;
4213 default:
4214 gcc_unreachable ();
4218 return true;
4221 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
4222 putting the final result in DEST. Append all statements but the last into
4223 VINFO. Return the last statement. */
4225 static gimple *
4226 synth_lshift_by_additions (vec_info *vinfo,
4227 tree dest, tree op, HOST_WIDE_INT amnt,
4228 stmt_vec_info stmt_info)
4230 HOST_WIDE_INT i;
4231 tree itype = TREE_TYPE (op);
4232 tree prev_res = op;
4233 gcc_assert (amnt >= 0);
4234 for (i = 0; i < amnt; i++)
4236 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
4237 : dest;
4238 gimple *stmt
4239 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
4240 prev_res = tmp_var;
4241 if (i < amnt - 1)
4242 append_pattern_def_seq (vinfo, stmt_info, stmt);
4243 else
4244 return stmt;
4246 gcc_unreachable ();
4247 return NULL;
4250 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
4251 CODE to operands OP1 and OP2, creating a new temporary SSA var in
4252 the process if necessary. Append the resulting assignment statements
4253 to the sequence in STMT_VINFO. Return the SSA variable that holds the
4254 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
4255 left shifts using additions. */
4257 static tree
4258 apply_binop_and_append_stmt (vec_info *vinfo,
4259 tree_code code, tree op1, tree op2,
4260 stmt_vec_info stmt_vinfo, bool synth_shift_p)
4262 if (integer_zerop (op2)
4263 && (code == LSHIFT_EXPR
4264 || code == PLUS_EXPR))
4266 gcc_assert (TREE_CODE (op1) == SSA_NAME);
4267 return op1;
4270 gimple *stmt;
4271 tree itype = TREE_TYPE (op1);
4272 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
4274 if (code == LSHIFT_EXPR
4275 && synth_shift_p)
4277 stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
4278 TREE_INT_CST_LOW (op2), stmt_vinfo);
4279 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4280 return tmp_var;
4283 stmt = gimple_build_assign (tmp_var, code, op1, op2);
4284 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4285 return tmp_var;
4288 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
4289 and simple arithmetic operations to be vectorized. Record the statements
4290 produced in STMT_VINFO and return the last statement in the sequence or
4291 NULL if it's not possible to synthesize such a multiplication.
4292 This function mirrors the behavior of expand_mult_const in expmed.cc but
4293 works on tree-ssa form. */
4295 static gimple *
4296 vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
4297 stmt_vec_info stmt_vinfo)
4299 tree itype = TREE_TYPE (op);
4300 machine_mode mode = TYPE_MODE (itype);
4301 struct algorithm alg;
4302 mult_variant variant;
4303 if (!tree_fits_shwi_p (val))
4304 return NULL;
4306 /* Multiplication synthesis by shifts, adds and subs can introduce
4307 signed overflow where the original operation didn't. Perform the
4308 operations on an unsigned type and cast back to avoid this.
4309 In the future we may want to relax this for synthesis algorithms
4310 that we can prove do not cause unexpected overflow. */
4311 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
4313 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
4314 tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
4315 if (!vectype)
4316 return NULL;
4318 /* Targets that don't support vector shifts but support vector additions
4319 can synthesize shifts that way. */
4320 bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
4322 HOST_WIDE_INT hwval = tree_to_shwi (val);
4323 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
4324 The vectorizer's benefit analysis will decide whether it's beneficial
4325 to do this. */
4326 bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
4327 ? TYPE_MODE (vectype) : mode,
4328 hwval, &alg, &variant, MAX_COST);
4329 if (!possible)
4330 return NULL;
4332 if (!target_supports_mult_synth_alg (&alg, variant, vectype, synth_shift_p))
4333 return NULL;
4335 tree accumulator;
4337 /* Clear out the sequence of statements so we can populate it below. */
4338 gimple *stmt = NULL;
4340 if (cast_to_unsigned_p)
4342 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
4343 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
4344 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4345 op = tmp_op;
4348 if (alg.op[0] == alg_zero)
4349 accumulator = build_int_cst (multtype, 0);
4350 else
4351 accumulator = op;
4353 bool needs_fixup = (variant == negate_variant)
4354 || (variant == add_variant);
4356 for (int i = 1; i < alg.ops; i++)
4358 tree shft_log = build_int_cst (multtype, alg.log[i]);
4359 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4360 tree tmp_var = NULL_TREE;
4362 switch (alg.op[i])
4364 case alg_shift:
4365 if (synth_shift_p)
4366 stmt
4367 = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
4368 alg.log[i], stmt_vinfo);
4369 else
4370 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
4371 shft_log);
4372 break;
4373 case alg_add_t_m2:
4374 tmp_var
4375 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
4376 stmt_vinfo, synth_shift_p);
4377 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
4378 tmp_var);
4379 break;
4380 case alg_sub_t_m2:
4381 tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
4382 shft_log, stmt_vinfo,
4383 synth_shift_p);
4384 /* In some algorithms the first step involves zeroing the
4385 accumulator. If subtracting from such an accumulator
4386 just emit the negation directly. */
4387 if (integer_zerop (accumulator))
4388 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
4389 else
4390 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
4391 tmp_var);
4392 break;
4393 case alg_add_t2_m:
4394 tmp_var
4395 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4396 shft_log, stmt_vinfo, synth_shift_p);
4397 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
4398 break;
4399 case alg_sub_t2_m:
4400 tmp_var
4401 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4402 shft_log, stmt_vinfo, synth_shift_p);
4403 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
4404 break;
4405 case alg_add_factor:
4406 tmp_var
4407 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4408 shft_log, stmt_vinfo, synth_shift_p);
4409 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
4410 tmp_var);
4411 break;
4412 case alg_sub_factor:
4413 tmp_var
4414 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4415 shft_log, stmt_vinfo, synth_shift_p);
4416 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
4417 accumulator);
4418 break;
4419 default:
4420 gcc_unreachable ();
4422 /* We don't want to append the last stmt in the sequence to stmt_vinfo
4423 but rather return it directly. */
4425 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
4426 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4427 accumulator = accum_tmp;
4429 if (variant == negate_variant)
4431 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4432 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
4433 accumulator = accum_tmp;
4434 if (cast_to_unsigned_p)
4435 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4437 else if (variant == add_variant)
4439 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4440 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
4441 accumulator = accum_tmp;
4442 if (cast_to_unsigned_p)
4443 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4445 /* Move back to a signed if needed. */
4446 if (cast_to_unsigned_p)
4448 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
4449 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
4452 return stmt;
4455 /* Detect multiplication by constant and convert it into a sequence of
4456 shifts and additions, subtractions, negations. We reuse the
4457 choose_mult_variant algorithms from expmed.cc
4459 Input/Output:
4461 STMT_VINFO: The stmt from which the pattern search begins,
4462 i.e. the mult stmt.
4464 Output:
4466 * TYPE_OUT: The type of the output of this pattern.
4468 * Return value: A new stmt that will be used to replace
4469 the multiplication. */
4471 static gimple *
4472 vect_recog_mult_pattern (vec_info *vinfo,
4473 stmt_vec_info stmt_vinfo, tree *type_out)
4475 gimple *last_stmt = stmt_vinfo->stmt;
4476 tree oprnd0, oprnd1, vectype, itype;
4477 gimple *pattern_stmt;
4479 if (!is_gimple_assign (last_stmt))
4480 return NULL;
4482 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
4483 return NULL;
4485 oprnd0 = gimple_assign_rhs1 (last_stmt);
4486 oprnd1 = gimple_assign_rhs2 (last_stmt);
4487 itype = TREE_TYPE (oprnd0);
4489 if (TREE_CODE (oprnd0) != SSA_NAME
4490 || TREE_CODE (oprnd1) != INTEGER_CST
4491 || !INTEGRAL_TYPE_P (itype)
4492 || !type_has_mode_precision_p (itype))
4493 return NULL;
4495 vectype = get_vectype_for_scalar_type (vinfo, itype);
4496 if (vectype == NULL_TREE)
4497 return NULL;
4499 /* If the target can handle vectorized multiplication natively,
4500 don't attempt to optimize this. */
4501 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
4502 if (mul_optab != unknown_optab)
4504 machine_mode vec_mode = TYPE_MODE (vectype);
4505 int icode = (int) optab_handler (mul_optab, vec_mode);
4506 if (icode != CODE_FOR_nothing)
4507 return NULL;
4510 pattern_stmt = vect_synth_mult_by_constant (vinfo,
4511 oprnd0, oprnd1, stmt_vinfo);
4512 if (!pattern_stmt)
4513 return NULL;
4515 /* Pattern detected. */
4516 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
4518 *type_out = vectype;
4520 return pattern_stmt;
4523 /* Detect a signed division by a constant that wouldn't be
4524 otherwise vectorized:
4526 type a_t, b_t;
4528 S1 a_t = b_t / N;
4530 where type 'type' is an integral type and N is a constant.
4532 Similarly handle modulo by a constant:
4534 S4 a_t = b_t % N;
4536 Input/Output:
4538 * STMT_VINFO: The stmt from which the pattern search begins,
4539 i.e. the division stmt. S1 is replaced by if N is a power
4540 of two constant and type is signed:
4541 S3 y_t = b_t < 0 ? N - 1 : 0;
4542 S2 x_t = b_t + y_t;
4543 S1' a_t = x_t >> log2 (N);
4545 S4 is replaced if N is a power of two constant and
4546 type is signed by (where *_T temporaries have unsigned type):
4547 S9 y_T = b_t < 0 ? -1U : 0U;
4548 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
4549 S7 z_t = (type) z_T;
4550 S6 w_t = b_t + z_t;
4551 S5 x_t = w_t & (N - 1);
4552 S4' a_t = x_t - z_t;
4554 Output:
4556 * TYPE_OUT: The type of the output of this pattern.
4558 * Return value: A new stmt that will be used to replace the division
4559 S1 or modulo S4 stmt. */
4561 static gimple *
4562 vect_recog_divmod_pattern (vec_info *vinfo,
4563 stmt_vec_info stmt_vinfo, tree *type_out)
4565 gimple *last_stmt = stmt_vinfo->stmt;
4566 tree oprnd0, oprnd1, vectype, itype, cond;
4567 gimple *pattern_stmt, *def_stmt;
4568 enum tree_code rhs_code;
4569 optab optab;
4570 tree q, cst;
4571 int dummy_int, prec;
4573 if (!is_gimple_assign (last_stmt))
4574 return NULL;
4576 rhs_code = gimple_assign_rhs_code (last_stmt);
4577 switch (rhs_code)
4579 case TRUNC_DIV_EXPR:
4580 case EXACT_DIV_EXPR:
4581 case TRUNC_MOD_EXPR:
4582 break;
4583 default:
4584 return NULL;
4587 oprnd0 = gimple_assign_rhs1 (last_stmt);
4588 oprnd1 = gimple_assign_rhs2 (last_stmt);
4589 itype = TREE_TYPE (oprnd0);
4590 if (TREE_CODE (oprnd0) != SSA_NAME
4591 || TREE_CODE (oprnd1) != INTEGER_CST
4592 || TREE_CODE (itype) != INTEGER_TYPE
4593 || !type_has_mode_precision_p (itype))
4594 return NULL;
4596 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
4597 vectype = get_vectype_for_scalar_type (vinfo, itype);
4598 if (vectype == NULL_TREE)
4599 return NULL;
4601 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
4603 /* If the target can handle vectorized division or modulo natively,
4604 don't attempt to optimize this, since native division is likely
4605 to give smaller code. */
4606 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
4607 if (optab != unknown_optab)
4609 machine_mode vec_mode = TYPE_MODE (vectype);
4610 int icode = (int) optab_handler (optab, vec_mode);
4611 if (icode != CODE_FOR_nothing)
4612 return NULL;
4616 prec = TYPE_PRECISION (itype);
4617 if (integer_pow2p (oprnd1))
4619 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
4620 return NULL;
4622 /* Pattern detected. */
4623 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
4625 *type_out = vectype;
4627 /* Check if the target supports this internal function. */
4628 internal_fn ifn = IFN_DIV_POW2;
4629 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
4631 tree shift = build_int_cst (itype, tree_log2 (oprnd1));
4633 tree var_div = vect_recog_temp_ssa_var (itype, NULL);
4634 gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
4635 gimple_call_set_lhs (div_stmt, var_div);
4637 if (rhs_code == TRUNC_MOD_EXPR)
4639 append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
4640 def_stmt
4641 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4642 LSHIFT_EXPR, var_div, shift);
4643 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4644 pattern_stmt
4645 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4646 MINUS_EXPR, oprnd0,
4647 gimple_assign_lhs (def_stmt));
4649 else
4650 pattern_stmt = div_stmt;
4651 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
4653 return pattern_stmt;
4656 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
4657 build_int_cst (itype, 0));
4658 if (rhs_code == TRUNC_DIV_EXPR
4659 || rhs_code == EXACT_DIV_EXPR)
4661 tree var = vect_recog_temp_ssa_var (itype, NULL);
4662 tree shift;
4663 def_stmt
4664 = gimple_build_assign (var, COND_EXPR, cond,
4665 fold_build2 (MINUS_EXPR, itype, oprnd1,
4666 build_int_cst (itype, 1)),
4667 build_int_cst (itype, 0));
4668 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4669 var = vect_recog_temp_ssa_var (itype, NULL);
4670 def_stmt
4671 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
4672 gimple_assign_lhs (def_stmt));
4673 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4675 shift = build_int_cst (itype, tree_log2 (oprnd1));
4676 pattern_stmt
4677 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4678 RSHIFT_EXPR, var, shift);
4680 else
4682 tree signmask;
4683 if (compare_tree_int (oprnd1, 2) == 0)
4685 signmask = vect_recog_temp_ssa_var (itype, NULL);
4686 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
4687 build_int_cst (itype, 1),
4688 build_int_cst (itype, 0));
4689 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4691 else
4693 tree utype
4694 = build_nonstandard_integer_type (prec, 1);
4695 tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
4696 tree shift
4697 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
4698 - tree_log2 (oprnd1));
4699 tree var = vect_recog_temp_ssa_var (utype, NULL);
4701 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
4702 build_int_cst (utype, -1),
4703 build_int_cst (utype, 0));
4704 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
4705 var = vect_recog_temp_ssa_var (utype, NULL);
4706 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
4707 gimple_assign_lhs (def_stmt),
4708 shift);
4709 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
4710 signmask = vect_recog_temp_ssa_var (itype, NULL);
4711 def_stmt
4712 = gimple_build_assign (signmask, NOP_EXPR, var);
4713 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4715 def_stmt
4716 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4717 PLUS_EXPR, oprnd0, signmask);
4718 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4719 def_stmt
4720 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4721 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
4722 fold_build2 (MINUS_EXPR, itype, oprnd1,
4723 build_int_cst (itype, 1)));
4724 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4726 pattern_stmt
4727 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4728 MINUS_EXPR, gimple_assign_lhs (def_stmt),
4729 signmask);
4732 return pattern_stmt;
4735 if ((cst = uniform_integer_cst_p (oprnd1))
4736 && TYPE_UNSIGNED (itype)
4737 && rhs_code == TRUNC_DIV_EXPR
4738 && vectype
4739 && targetm.vectorize.preferred_div_as_shifts_over_mult (vectype))
4741 /* We can use the relationship:
4743 x // N == ((x+N+2) // (N+1) + x) // (N+1) for 0 <= x < N(N+3)
4745 to optimize cases where N+1 is a power of 2, and where // (N+1)
4746 is therefore a shift right. When operating in modes that are
4747 multiples of a byte in size, there are two cases:
4749 (1) N(N+3) is not representable, in which case the question
4750 becomes whether the replacement expression overflows.
4751 It is enough to test that x+N+2 does not overflow,
4752 i.e. that x < MAX-(N+1).
4754 (2) N(N+3) is representable, in which case it is the (only)
4755 bound that we need to check.
4757 ??? For now we just handle the case where // (N+1) is a shift
4758 right by half the precision, since some architectures can
4759 optimize the associated addition and shift combinations
4760 into single instructions. */
4762 auto wcst = wi::to_wide (cst);
4763 int pow = wi::exact_log2 (wcst + 1);
4764 if (pow == prec / 2)
4766 gimple *stmt = SSA_NAME_DEF_STMT (oprnd0);
4768 gimple_ranger ranger;
4769 int_range_max r;
4771 /* Check that no overflow will occur. If we don't have range
4772 information we can't perform the optimization. */
4774 if (ranger.range_of_expr (r, oprnd0, stmt) && !r.undefined_p ())
4776 wide_int max = r.upper_bound ();
4777 wide_int one = wi::shwi (1, prec);
4778 wide_int adder = wi::add (one, wi::lshift (one, pow));
4779 wi::overflow_type ovf;
4780 wi::add (max, adder, UNSIGNED, &ovf);
4781 if (ovf == wi::OVF_NONE)
4783 *type_out = vectype;
4784 tree tadder = wide_int_to_tree (itype, adder);
4785 tree rshift = wide_int_to_tree (itype, pow);
4787 tree new_lhs1 = vect_recog_temp_ssa_var (itype, NULL);
4788 gassign *patt1
4789 = gimple_build_assign (new_lhs1, PLUS_EXPR, oprnd0, tadder);
4790 append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
4792 tree new_lhs2 = vect_recog_temp_ssa_var (itype, NULL);
4793 patt1 = gimple_build_assign (new_lhs2, RSHIFT_EXPR, new_lhs1,
4794 rshift);
4795 append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
4797 tree new_lhs3 = vect_recog_temp_ssa_var (itype, NULL);
4798 patt1 = gimple_build_assign (new_lhs3, PLUS_EXPR, new_lhs2,
4799 oprnd0);
4800 append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
4802 tree new_lhs4 = vect_recog_temp_ssa_var (itype, NULL);
4803 pattern_stmt = gimple_build_assign (new_lhs4, RSHIFT_EXPR,
4804 new_lhs3, rshift);
4806 return pattern_stmt;
4812 if (prec > HOST_BITS_PER_WIDE_INT
4813 || integer_zerop (oprnd1))
4814 return NULL;
4816 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
4817 return NULL;
4819 if (TYPE_UNSIGNED (itype))
4821 unsigned HOST_WIDE_INT mh, ml;
4822 int pre_shift, post_shift;
4823 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
4824 & GET_MODE_MASK (itype_mode));
4825 tree t1, t2, t3, t4;
4827 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
4828 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
4829 return NULL;
4831 /* Find a suitable multiplier and right shift count
4832 instead of multiplying with D. */
4833 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
4835 /* If the suggested multiplier is more than SIZE bits, we can do better
4836 for even divisors, using an initial right shift. */
4837 if (mh != 0 && (d & 1) == 0)
4839 pre_shift = ctz_or_zero (d);
4840 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
4841 &ml, &post_shift, &dummy_int);
4842 gcc_assert (!mh);
4844 else
4845 pre_shift = 0;
4847 if (mh != 0)
4849 if (post_shift - 1 >= prec)
4850 return NULL;
4852 /* t1 = oprnd0 h* ml;
4853 t2 = oprnd0 - t1;
4854 t3 = t2 >> 1;
4855 t4 = t1 + t3;
4856 q = t4 >> (post_shift - 1); */
4857 t1 = vect_recog_temp_ssa_var (itype, NULL);
4858 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
4859 build_int_cst (itype, ml));
4860 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4862 t2 = vect_recog_temp_ssa_var (itype, NULL);
4863 def_stmt
4864 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
4865 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4867 t3 = vect_recog_temp_ssa_var (itype, NULL);
4868 def_stmt
4869 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
4870 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4872 t4 = vect_recog_temp_ssa_var (itype, NULL);
4873 def_stmt
4874 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
4876 if (post_shift != 1)
4878 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4880 q = vect_recog_temp_ssa_var (itype, NULL);
4881 pattern_stmt
4882 = gimple_build_assign (q, RSHIFT_EXPR, t4,
4883 build_int_cst (itype, post_shift - 1));
4885 else
4887 q = t4;
4888 pattern_stmt = def_stmt;
4891 else
4893 if (pre_shift >= prec || post_shift >= prec)
4894 return NULL;
4896 /* t1 = oprnd0 >> pre_shift;
4897 t2 = t1 h* ml;
4898 q = t2 >> post_shift; */
4899 if (pre_shift)
4901 t1 = vect_recog_temp_ssa_var (itype, NULL);
4902 def_stmt
4903 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
4904 build_int_cst (NULL, pre_shift));
4905 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4907 else
4908 t1 = oprnd0;
4910 t2 = vect_recog_temp_ssa_var (itype, NULL);
4911 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
4912 build_int_cst (itype, ml));
4914 if (post_shift)
4916 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4918 q = vect_recog_temp_ssa_var (itype, NULL);
4919 def_stmt
4920 = gimple_build_assign (q, RSHIFT_EXPR, t2,
4921 build_int_cst (itype, post_shift));
4923 else
4924 q = t2;
4926 pattern_stmt = def_stmt;
4929 else
4931 unsigned HOST_WIDE_INT ml;
4932 int post_shift;
4933 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
4934 unsigned HOST_WIDE_INT abs_d;
4935 bool add = false;
4936 tree t1, t2, t3, t4;
4938 /* Give up for -1. */
4939 if (d == -1)
4940 return NULL;
4942 /* Since d might be INT_MIN, we have to cast to
4943 unsigned HOST_WIDE_INT before negating to avoid
4944 undefined signed overflow. */
4945 abs_d = (d >= 0
4946 ? (unsigned HOST_WIDE_INT) d
4947 : - (unsigned HOST_WIDE_INT) d);
4949 /* n rem d = n rem -d */
4950 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
4952 d = abs_d;
4953 oprnd1 = build_int_cst (itype, abs_d);
4955 if (HOST_BITS_PER_WIDE_INT >= prec
4956 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
4957 /* This case is not handled correctly below. */
4958 return NULL;
4960 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
4961 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
4963 add = true;
4964 ml |= HOST_WIDE_INT_M1U << (prec - 1);
4966 if (post_shift >= prec)
4967 return NULL;
4969 /* t1 = oprnd0 h* ml; */
4970 t1 = vect_recog_temp_ssa_var (itype, NULL);
4971 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
4972 build_int_cst (itype, ml));
4974 if (add)
4976 /* t2 = t1 + oprnd0; */
4977 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4978 t2 = vect_recog_temp_ssa_var (itype, NULL);
4979 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
4981 else
4982 t2 = t1;
4984 if (post_shift)
4986 /* t3 = t2 >> post_shift; */
4987 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4988 t3 = vect_recog_temp_ssa_var (itype, NULL);
4989 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
4990 build_int_cst (itype, post_shift));
4992 else
4993 t3 = t2;
4995 int msb = 1;
4996 value_range r;
4997 get_range_query (cfun)->range_of_expr (r, oprnd0);
4998 if (!r.varying_p () && !r.undefined_p ())
5000 if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
5001 msb = 0;
5002 else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
5003 msb = -1;
5006 if (msb == 0 && d >= 0)
5008 /* q = t3; */
5009 q = t3;
5010 pattern_stmt = def_stmt;
5012 else
5014 /* t4 = oprnd0 >> (prec - 1);
5015 or if we know from VRP that oprnd0 >= 0
5016 t4 = 0;
5017 or if we know from VRP that oprnd0 < 0
5018 t4 = -1; */
5019 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5020 t4 = vect_recog_temp_ssa_var (itype, NULL);
5021 if (msb != 1)
5022 def_stmt = gimple_build_assign (t4, INTEGER_CST,
5023 build_int_cst (itype, msb));
5024 else
5025 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
5026 build_int_cst (itype, prec - 1));
5027 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5029 /* q = t3 - t4; or q = t4 - t3; */
5030 q = vect_recog_temp_ssa_var (itype, NULL);
5031 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
5032 d < 0 ? t3 : t4);
5036 if (rhs_code == TRUNC_MOD_EXPR)
5038 tree r, t1;
5040 /* We divided. Now finish by:
5041 t1 = q * oprnd1;
5042 r = oprnd0 - t1; */
5043 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
5045 t1 = vect_recog_temp_ssa_var (itype, NULL);
5046 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
5047 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5049 r = vect_recog_temp_ssa_var (itype, NULL);
5050 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
5053 /* Pattern detected. */
5054 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
5056 *type_out = vectype;
5057 return pattern_stmt;
5060 /* Function vect_recog_mixed_size_cond_pattern
5062 Try to find the following pattern:
5064 type x_t, y_t;
5065 TYPE a_T, b_T, c_T;
5066 loop:
5067 S1 a_T = x_t CMP y_t ? b_T : c_T;
5069 where type 'TYPE' is an integral type which has different size
5070 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
5071 than 'type', the constants need to fit into an integer type
5072 with the same width as 'type') or results of conversion from 'type'.
5074 Input:
5076 * STMT_VINFO: The stmt from which the pattern search begins.
5078 Output:
5080 * TYPE_OUT: The type of the output of this pattern.
5082 * Return value: A new stmt that will be used to replace the pattern.
5083 Additionally a def_stmt is added.
5085 a_it = x_t CMP y_t ? b_it : c_it;
5086 a_T = (TYPE) a_it; */
5088 static gimple *
5089 vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
5090 stmt_vec_info stmt_vinfo, tree *type_out)
5092 gimple *last_stmt = stmt_vinfo->stmt;
5093 tree cond_expr, then_clause, else_clause;
5094 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
5095 gimple *pattern_stmt, *def_stmt;
5096 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
5097 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
5098 bool promotion;
5099 tree comp_scalar_type;
5101 if (!is_gimple_assign (last_stmt)
5102 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
5103 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
5104 return NULL;
5106 cond_expr = gimple_assign_rhs1 (last_stmt);
5107 then_clause = gimple_assign_rhs2 (last_stmt);
5108 else_clause = gimple_assign_rhs3 (last_stmt);
5110 if (!COMPARISON_CLASS_P (cond_expr))
5111 return NULL;
5113 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
5114 comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
5115 if (comp_vectype == NULL_TREE)
5116 return NULL;
5118 type = TREE_TYPE (gimple_assign_lhs (last_stmt));
5119 if (types_compatible_p (type, comp_scalar_type)
5120 || ((TREE_CODE (then_clause) != INTEGER_CST
5121 || TREE_CODE (else_clause) != INTEGER_CST)
5122 && !INTEGRAL_TYPE_P (comp_scalar_type))
5123 || !INTEGRAL_TYPE_P (type))
5124 return NULL;
5126 if ((TREE_CODE (then_clause) != INTEGER_CST
5127 && !type_conversion_p (vinfo, then_clause, false,
5128 &orig_type0, &def_stmt0, &promotion))
5129 || (TREE_CODE (else_clause) != INTEGER_CST
5130 && !type_conversion_p (vinfo, else_clause, false,
5131 &orig_type1, &def_stmt1, &promotion)))
5132 return NULL;
5134 if (orig_type0 && orig_type1
5135 && !types_compatible_p (orig_type0, orig_type1))
5136 return NULL;
5138 if (orig_type0)
5140 if (!types_compatible_p (orig_type0, comp_scalar_type))
5141 return NULL;
5142 then_clause = gimple_assign_rhs1 (def_stmt0);
5143 itype = orig_type0;
5146 if (orig_type1)
5148 if (!types_compatible_p (orig_type1, comp_scalar_type))
5149 return NULL;
5150 else_clause = gimple_assign_rhs1 (def_stmt1);
5151 itype = orig_type1;
5155 HOST_WIDE_INT cmp_mode_size
5156 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
5158 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
5159 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
5160 return NULL;
5162 vectype = get_vectype_for_scalar_type (vinfo, type);
5163 if (vectype == NULL_TREE)
5164 return NULL;
5166 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
5167 return NULL;
5169 if (itype == NULL_TREE)
5170 itype = build_nonstandard_integer_type (cmp_mode_size,
5171 TYPE_UNSIGNED (type));
5173 if (itype == NULL_TREE
5174 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
5175 return NULL;
5177 vecitype = get_vectype_for_scalar_type (vinfo, itype);
5178 if (vecitype == NULL_TREE)
5179 return NULL;
5181 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
5182 return NULL;
5184 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
5186 if ((TREE_CODE (then_clause) == INTEGER_CST
5187 && !int_fits_type_p (then_clause, itype))
5188 || (TREE_CODE (else_clause) == INTEGER_CST
5189 && !int_fits_type_p (else_clause, itype)))
5190 return NULL;
5193 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5194 COND_EXPR, unshare_expr (cond_expr),
5195 fold_convert (itype, then_clause),
5196 fold_convert (itype, else_clause));
5197 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
5198 NOP_EXPR, gimple_assign_lhs (def_stmt));
5200 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
5201 *type_out = vectype;
5203 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
5205 return pattern_stmt;
5209 /* Helper function of vect_recog_bool_pattern. Called recursively, return
5210 true if bool VAR can and should be optimized that way. Assume it shouldn't
5211 in case it's a result of a comparison which can be directly vectorized into
5212 a vector comparison. Fills in STMTS with all stmts visited during the
5213 walk. */
5215 static bool
5216 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
5218 tree rhs1;
5219 enum tree_code rhs_code;
5221 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
5222 if (!def_stmt_info)
5223 return false;
5225 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
5226 if (!def_stmt)
5227 return false;
5229 if (stmts.contains (def_stmt))
5230 return true;
5232 rhs1 = gimple_assign_rhs1 (def_stmt);
5233 rhs_code = gimple_assign_rhs_code (def_stmt);
5234 switch (rhs_code)
5236 case SSA_NAME:
5237 if (! check_bool_pattern (rhs1, vinfo, stmts))
5238 return false;
5239 break;
5241 CASE_CONVERT:
5242 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
5243 return false;
5244 if (! check_bool_pattern (rhs1, vinfo, stmts))
5245 return false;
5246 break;
5248 case BIT_NOT_EXPR:
5249 if (! check_bool_pattern (rhs1, vinfo, stmts))
5250 return false;
5251 break;
5253 case BIT_AND_EXPR:
5254 case BIT_IOR_EXPR:
5255 case BIT_XOR_EXPR:
5256 if (! check_bool_pattern (rhs1, vinfo, stmts)
5257 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
5258 return false;
5259 break;
5261 default:
5262 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5264 tree vecitype, comp_vectype;
5266 /* If the comparison can throw, then is_gimple_condexpr will be
5267 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
5268 if (stmt_could_throw_p (cfun, def_stmt))
5269 return false;
5271 comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
5272 if (comp_vectype == NULL_TREE)
5273 return false;
5275 tree mask_type = get_mask_type_for_scalar_type (vinfo,
5276 TREE_TYPE (rhs1));
5277 if (mask_type
5278 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
5279 return false;
5281 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
5283 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
5284 tree itype
5285 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
5286 vecitype = get_vectype_for_scalar_type (vinfo, itype);
5287 if (vecitype == NULL_TREE)
5288 return false;
5290 else
5291 vecitype = comp_vectype;
5292 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
5293 return false;
5295 else
5296 return false;
5297 break;
5300 bool res = stmts.add (def_stmt);
5301 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
5302 gcc_assert (!res);
5304 return true;
5308 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
5309 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
5310 pattern sequence. */
5312 static tree
5313 adjust_bool_pattern_cast (vec_info *vinfo,
5314 tree type, tree var, stmt_vec_info stmt_info)
5316 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
5317 NOP_EXPR, var);
5318 append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
5319 get_vectype_for_scalar_type (vinfo, type));
5320 return gimple_assign_lhs (cast_stmt);
5323 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
5324 VAR is an SSA_NAME that should be transformed from bool to a wider integer
5325 type, OUT_TYPE is the desired final integer type of the whole pattern.
5326 STMT_INFO is the info of the pattern root and is where pattern stmts should
5327 be associated with. DEFS is a map of pattern defs. */
5329 static void
5330 adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
5331 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
5333 gimple *stmt = SSA_NAME_DEF_STMT (var);
5334 enum tree_code rhs_code, def_rhs_code;
5335 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
5336 location_t loc;
5337 gimple *pattern_stmt, *def_stmt;
5338 tree trueval = NULL_TREE;
5340 rhs1 = gimple_assign_rhs1 (stmt);
5341 rhs2 = gimple_assign_rhs2 (stmt);
5342 rhs_code = gimple_assign_rhs_code (stmt);
5343 loc = gimple_location (stmt);
5344 switch (rhs_code)
5346 case SSA_NAME:
5347 CASE_CONVERT:
5348 irhs1 = *defs.get (rhs1);
5349 itype = TREE_TYPE (irhs1);
5350 pattern_stmt
5351 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5352 SSA_NAME, irhs1);
5353 break;
5355 case BIT_NOT_EXPR:
5356 irhs1 = *defs.get (rhs1);
5357 itype = TREE_TYPE (irhs1);
5358 pattern_stmt
5359 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5360 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
5361 break;
5363 case BIT_AND_EXPR:
5364 /* Try to optimize x = y & (a < b ? 1 : 0); into
5365 x = (a < b ? y : 0);
5367 E.g. for:
5368 bool a_b, b_b, c_b;
5369 TYPE d_T;
5371 S1 a_b = x1 CMP1 y1;
5372 S2 b_b = x2 CMP2 y2;
5373 S3 c_b = a_b & b_b;
5374 S4 d_T = (TYPE) c_b;
5376 we would normally emit:
5378 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5379 S2' b_T = x2 CMP2 y2 ? 1 : 0;
5380 S3' c_T = a_T & b_T;
5381 S4' d_T = c_T;
5383 but we can save one stmt by using the
5384 result of one of the COND_EXPRs in the other COND_EXPR and leave
5385 BIT_AND_EXPR stmt out:
5387 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5388 S3' c_T = x2 CMP2 y2 ? a_T : 0;
5389 S4' f_T = c_T;
5391 At least when VEC_COND_EXPR is implemented using masks
5392 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
5393 computes the comparison masks and ands it, in one case with
5394 all ones vector, in the other case with a vector register.
5395 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
5396 often more expensive. */
5397 def_stmt = SSA_NAME_DEF_STMT (rhs2);
5398 def_rhs_code = gimple_assign_rhs_code (def_stmt);
5399 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
5401 irhs1 = *defs.get (rhs1);
5402 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
5403 if (TYPE_PRECISION (TREE_TYPE (irhs1))
5404 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
5406 rhs_code = def_rhs_code;
5407 rhs1 = def_rhs1;
5408 rhs2 = gimple_assign_rhs2 (def_stmt);
5409 trueval = irhs1;
5410 goto do_compare;
5412 else
5413 irhs2 = *defs.get (rhs2);
5414 goto and_ior_xor;
5416 def_stmt = SSA_NAME_DEF_STMT (rhs1);
5417 def_rhs_code = gimple_assign_rhs_code (def_stmt);
5418 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
5420 irhs2 = *defs.get (rhs2);
5421 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
5422 if (TYPE_PRECISION (TREE_TYPE (irhs2))
5423 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
5425 rhs_code = def_rhs_code;
5426 rhs1 = def_rhs1;
5427 rhs2 = gimple_assign_rhs2 (def_stmt);
5428 trueval = irhs2;
5429 goto do_compare;
5431 else
5432 irhs1 = *defs.get (rhs1);
5433 goto and_ior_xor;
5435 /* FALLTHRU */
5436 case BIT_IOR_EXPR:
5437 case BIT_XOR_EXPR:
5438 irhs1 = *defs.get (rhs1);
5439 irhs2 = *defs.get (rhs2);
5440 and_ior_xor:
5441 if (TYPE_PRECISION (TREE_TYPE (irhs1))
5442 != TYPE_PRECISION (TREE_TYPE (irhs2)))
5444 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
5445 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
5446 int out_prec = TYPE_PRECISION (out_type);
5447 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
5448 irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
5449 stmt_info);
5450 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
5451 irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
5452 stmt_info);
5453 else
5455 irhs1 = adjust_bool_pattern_cast (vinfo,
5456 out_type, irhs1, stmt_info);
5457 irhs2 = adjust_bool_pattern_cast (vinfo,
5458 out_type, irhs2, stmt_info);
5461 itype = TREE_TYPE (irhs1);
5462 pattern_stmt
5463 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5464 rhs_code, irhs1, irhs2);
5465 break;
5467 default:
5468 do_compare:
5469 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
5470 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
5471 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
5472 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
5473 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
5475 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
5476 itype
5477 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
5479 else
5480 itype = TREE_TYPE (rhs1);
5481 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
5482 if (trueval == NULL_TREE)
5483 trueval = build_int_cst (itype, 1);
5484 else
5485 gcc_checking_assert (useless_type_conversion_p (itype,
5486 TREE_TYPE (trueval)));
5487 pattern_stmt
5488 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5489 COND_EXPR, cond_expr, trueval,
5490 build_int_cst (itype, 0));
5491 break;
5494 gimple_set_location (pattern_stmt, loc);
5495 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
5496 get_vectype_for_scalar_type (vinfo, itype));
5497 defs.put (var, gimple_assign_lhs (pattern_stmt));
5500 /* Comparison function to qsort a vector of gimple stmts after UID. */
5502 static int
5503 sort_after_uid (const void *p1, const void *p2)
5505 const gimple *stmt1 = *(const gimple * const *)p1;
5506 const gimple *stmt2 = *(const gimple * const *)p2;
5507 return gimple_uid (stmt1) - gimple_uid (stmt2);
5510 /* Create pattern stmts for all stmts participating in the bool pattern
5511 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
5512 OUT_TYPE. Return the def of the pattern root. */
5514 static tree
5515 adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
5516 tree out_type, stmt_vec_info stmt_info)
5518 /* Gather original stmts in the bool pattern in their order of appearance
5519 in the IL. */
5520 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
5521 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
5522 i != bool_stmt_set.end (); ++i)
5523 bool_stmts.quick_push (*i);
5524 bool_stmts.qsort (sort_after_uid);
5526 /* Now process them in that order, producing pattern stmts. */
5527 hash_map <tree, tree> defs;
5528 for (unsigned i = 0; i < bool_stmts.length (); ++i)
5529 adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
5530 out_type, stmt_info, defs);
5532 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
5533 gimple *pattern_stmt
5534 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5535 return gimple_assign_lhs (pattern_stmt);
5538 /* Return the proper type for converting bool VAR into
5539 an integer value or NULL_TREE if no such type exists.
5540 The type is chosen so that the converted value has the
5541 same number of elements as VAR's vector type. */
5543 static tree
5544 integer_type_for_mask (tree var, vec_info *vinfo)
5546 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
5547 return NULL_TREE;
5549 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
5550 if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
5551 return NULL_TREE;
5553 return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
5556 /* Function vect_recog_bool_pattern
5558 Try to find pattern like following:
5560 bool a_b, b_b, c_b, d_b, e_b;
5561 TYPE f_T;
5562 loop:
5563 S1 a_b = x1 CMP1 y1;
5564 S2 b_b = x2 CMP2 y2;
5565 S3 c_b = a_b & b_b;
5566 S4 d_b = x3 CMP3 y3;
5567 S5 e_b = c_b | d_b;
5568 S6 f_T = (TYPE) e_b;
5570 where type 'TYPE' is an integral type. Or a similar pattern
5571 ending in
5573 S6 f_Y = e_b ? r_Y : s_Y;
5575 as results from if-conversion of a complex condition.
5577 Input:
5579 * STMT_VINFO: The stmt at the end from which the pattern
5580 search begins, i.e. cast of a bool to
5581 an integer type.
5583 Output:
5585 * TYPE_OUT: The type of the output of this pattern.
5587 * Return value: A new stmt that will be used to replace the pattern.
5589 Assuming size of TYPE is the same as size of all comparisons
5590 (otherwise some casts would be added where needed), the above
5591 sequence we create related pattern stmts:
5592 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5593 S3' c_T = x2 CMP2 y2 ? a_T : 0;
5594 S4' d_T = x3 CMP3 y3 ? 1 : 0;
5595 S5' e_T = c_T | d_T;
5596 S6' f_T = e_T;
5598 Instead of the above S3' we could emit:
5599 S2' b_T = x2 CMP2 y2 ? 1 : 0;
5600 S3' c_T = a_T | b_T;
5601 but the above is more efficient. */
5603 static gimple *
5604 vect_recog_bool_pattern (vec_info *vinfo,
5605 stmt_vec_info stmt_vinfo, tree *type_out)
5607 gimple *last_stmt = stmt_vinfo->stmt;
5608 enum tree_code rhs_code;
5609 tree var, lhs, rhs, vectype;
5610 gimple *pattern_stmt;
5612 if (!is_gimple_assign (last_stmt))
5613 return NULL;
5615 var = gimple_assign_rhs1 (last_stmt);
5616 lhs = gimple_assign_lhs (last_stmt);
5617 rhs_code = gimple_assign_rhs_code (last_stmt);
5619 if (rhs_code == VIEW_CONVERT_EXPR)
5620 var = TREE_OPERAND (var, 0);
5622 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
5623 return NULL;
5625 hash_set<gimple *> bool_stmts;
5627 if (CONVERT_EXPR_CODE_P (rhs_code)
5628 || rhs_code == VIEW_CONVERT_EXPR)
5630 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
5631 || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5632 return NULL;
5633 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5635 if (check_bool_pattern (var, vinfo, bool_stmts))
5637 rhs = adjust_bool_stmts (vinfo, bool_stmts,
5638 TREE_TYPE (lhs), stmt_vinfo);
5639 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5640 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
5641 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
5642 else
5643 pattern_stmt
5644 = gimple_build_assign (lhs, NOP_EXPR, rhs);
5646 else
5648 tree type = integer_type_for_mask (var, vinfo);
5649 tree cst0, cst1, tmp;
5651 if (!type)
5652 return NULL;
5654 /* We may directly use cond with narrowed type to avoid
5655 multiple cond exprs with following result packing and
5656 perform single cond with packed mask instead. In case
5657 of widening we better make cond first and then extract
5658 results. */
5659 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
5660 type = TREE_TYPE (lhs);
5662 cst0 = build_int_cst (type, 0);
5663 cst1 = build_int_cst (type, 1);
5664 tmp = vect_recog_temp_ssa_var (type, NULL);
5665 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
5667 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
5669 tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
5670 append_pattern_def_seq (vinfo, stmt_vinfo,
5671 pattern_stmt, new_vectype);
5673 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5674 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
5678 *type_out = vectype;
5679 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
5681 return pattern_stmt;
5683 else if (rhs_code == COND_EXPR
5684 && TREE_CODE (var) == SSA_NAME)
5686 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5687 if (vectype == NULL_TREE)
5688 return NULL;
5690 /* Build a scalar type for the boolean result that when
5691 vectorized matches the vector type of the result in
5692 size and number of elements. */
5693 unsigned prec
5694 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
5695 TYPE_VECTOR_SUBPARTS (vectype));
5697 tree type
5698 = build_nonstandard_integer_type (prec,
5699 TYPE_UNSIGNED (TREE_TYPE (var)));
5700 if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
5701 return NULL;
5703 if (check_bool_pattern (var, vinfo, bool_stmts))
5704 var = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
5705 else if (integer_type_for_mask (var, vinfo))
5706 return NULL;
5708 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5709 pattern_stmt
5710 = gimple_build_assign (lhs, COND_EXPR,
5711 build2 (NE_EXPR, boolean_type_node,
5712 var, build_int_cst (TREE_TYPE (var), 0)),
5713 gimple_assign_rhs2 (last_stmt),
5714 gimple_assign_rhs3 (last_stmt));
5715 *type_out = vectype;
5716 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
5718 return pattern_stmt;
5720 else if (rhs_code == SSA_NAME
5721 && STMT_VINFO_DATA_REF (stmt_vinfo))
5723 stmt_vec_info pattern_stmt_info;
5724 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5725 if (!vectype || !VECTOR_MODE_P (TYPE_MODE (vectype)))
5726 return NULL;
5728 if (check_bool_pattern (var, vinfo, bool_stmts))
5729 rhs = adjust_bool_stmts (vinfo, bool_stmts,
5730 TREE_TYPE (vectype), stmt_vinfo);
5731 else
5733 tree type = integer_type_for_mask (var, vinfo);
5734 tree cst0, cst1, new_vectype;
5736 if (!type)
5737 return NULL;
5739 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
5740 type = TREE_TYPE (vectype);
5742 cst0 = build_int_cst (type, 0);
5743 cst1 = build_int_cst (type, 1);
5744 new_vectype = get_vectype_for_scalar_type (vinfo, type);
5746 rhs = vect_recog_temp_ssa_var (type, NULL);
5747 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
5748 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
5751 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
5752 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
5754 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5755 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
5756 append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
5757 rhs = rhs2;
5759 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
5760 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
5761 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
5762 *type_out = vectype;
5763 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
5765 return pattern_stmt;
5767 else
5768 return NULL;
5772 /* A helper for vect_recog_mask_conversion_pattern. Build
5773 conversion of MASK to a type suitable for masking VECTYPE.
5774 Built statement gets required vectype and is appended to
5775 a pattern sequence of STMT_VINFO.
5777 Return converted mask. */
5779 static tree
5780 build_mask_conversion (vec_info *vinfo,
5781 tree mask, tree vectype, stmt_vec_info stmt_vinfo)
5783 gimple *stmt;
5784 tree masktype, tmp;
5786 masktype = truth_type_for (vectype);
5787 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
5788 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
5789 append_pattern_def_seq (vinfo, stmt_vinfo,
5790 stmt, masktype, TREE_TYPE (vectype));
5792 return tmp;
5796 /* Function vect_recog_mask_conversion_pattern
5798 Try to find statements which require boolean type
5799 converison. Additional conversion statements are
5800 added to handle such cases. For example:
5802 bool m_1, m_2, m_3;
5803 int i_4, i_5;
5804 double d_6, d_7;
5805 char c_1, c_2, c_3;
5807 S1 m_1 = i_4 > i_5;
5808 S2 m_2 = d_6 < d_7;
5809 S3 m_3 = m_1 & m_2;
5810 S4 c_1 = m_3 ? c_2 : c_3;
5812 Will be transformed into:
5814 S1 m_1 = i_4 > i_5;
5815 S2 m_2 = d_6 < d_7;
5816 S3'' m_2' = (_Bool[bitsize=32])m_2
5817 S3' m_3' = m_1 & m_2';
5818 S4'' m_3'' = (_Bool[bitsize=8])m_3'
5819 S4' c_1' = m_3'' ? c_2 : c_3; */
5821 static gimple *
5822 vect_recog_mask_conversion_pattern (vec_info *vinfo,
5823 stmt_vec_info stmt_vinfo, tree *type_out)
5825 gimple *last_stmt = stmt_vinfo->stmt;
5826 enum tree_code rhs_code;
5827 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
5828 tree vectype1, vectype2;
5829 stmt_vec_info pattern_stmt_info;
5830 tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
5831 tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
5833 /* Check for MASK_LOAD and MASK_STORE as well as COND_OP calls requiring mask
5834 conversion. */
5835 if (is_gimple_call (last_stmt)
5836 && gimple_call_internal_p (last_stmt))
5838 gcall *pattern_stmt;
5840 internal_fn ifn = gimple_call_internal_fn (last_stmt);
5841 int mask_argno = internal_fn_mask_index (ifn);
5842 if (mask_argno < 0)
5843 return NULL;
5845 bool store_p = internal_store_fn_p (ifn);
5846 bool load_p = internal_store_fn_p (ifn);
5847 if (store_p)
5849 int rhs_index = internal_fn_stored_value_index (ifn);
5850 tree rhs = gimple_call_arg (last_stmt, rhs_index);
5851 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
5853 else
5855 lhs = gimple_call_lhs (last_stmt);
5856 if (!lhs)
5857 return NULL;
5858 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5861 if (!vectype1)
5862 return NULL;
5864 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
5865 tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
5866 if (mask_arg_type)
5868 vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
5870 if (!vectype2
5871 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
5872 TYPE_VECTOR_SUBPARTS (vectype2)))
5873 return NULL;
5875 else if (store_p || load_p)
5876 return NULL;
5878 tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
5880 auto_vec<tree, 8> args;
5881 unsigned int nargs = gimple_call_num_args (last_stmt);
5882 args.safe_grow (nargs, true);
5883 for (unsigned int i = 0; i < nargs; ++i)
5884 args[i] = ((int) i == mask_argno
5885 ? tmp
5886 : gimple_call_arg (last_stmt, i));
5887 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
5889 if (!store_p)
5891 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5892 gimple_call_set_lhs (pattern_stmt, lhs);
5895 if (load_p || store_p)
5896 gimple_call_set_nothrow (pattern_stmt, true);
5898 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
5899 if (STMT_VINFO_DATA_REF (stmt_vinfo))
5900 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
5902 *type_out = vectype1;
5903 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
5905 return pattern_stmt;
5908 if (!is_gimple_assign (last_stmt))
5909 return NULL;
5911 gimple *pattern_stmt;
5912 lhs = gimple_assign_lhs (last_stmt);
5913 rhs1 = gimple_assign_rhs1 (last_stmt);
5914 rhs_code = gimple_assign_rhs_code (last_stmt);
5916 /* Check for cond expression requiring mask conversion. */
5917 if (rhs_code == COND_EXPR)
5919 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5921 if (TREE_CODE (rhs1) == SSA_NAME)
5923 rhs1_type = integer_type_for_mask (rhs1, vinfo);
5924 if (!rhs1_type)
5925 return NULL;
5927 else if (COMPARISON_CLASS_P (rhs1))
5929 /* Check whether we're comparing scalar booleans and (if so)
5930 whether a better mask type exists than the mask associated
5931 with boolean-sized elements. This avoids unnecessary packs
5932 and unpacks if the booleans are set from comparisons of
5933 wider types. E.g. in:
5935 int x1, x2, x3, x4, y1, y1;
5937 bool b1 = (x1 == x2);
5938 bool b2 = (x3 == x4);
5939 ... = b1 == b2 ? y1 : y2;
5941 it is better for b1 and b2 to use the mask type associated
5942 with int elements rather bool (byte) elements. */
5943 rhs1_op0 = TREE_OPERAND (rhs1, 0);
5944 rhs1_op1 = TREE_OPERAND (rhs1, 1);
5945 if (!rhs1_op0 || !rhs1_op1)
5946 return NULL;
5947 rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
5948 rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
5950 if (!rhs1_op0_type)
5951 rhs1_type = TREE_TYPE (rhs1_op0);
5952 else if (!rhs1_op1_type)
5953 rhs1_type = TREE_TYPE (rhs1_op1);
5954 else if (TYPE_PRECISION (rhs1_op0_type)
5955 != TYPE_PRECISION (rhs1_op1_type))
5957 int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
5958 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
5959 int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
5960 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
5961 if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
5963 if (abs (tmp0) > abs (tmp1))
5964 rhs1_type = rhs1_op1_type;
5965 else
5966 rhs1_type = rhs1_op0_type;
5968 else
5969 rhs1_type = build_nonstandard_integer_type
5970 (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
5972 else
5973 rhs1_type = rhs1_op0_type;
5975 else
5976 return NULL;
5978 vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
5980 if (!vectype1 || !vectype2)
5981 return NULL;
5983 /* Continue if a conversion is needed. Also continue if we have
5984 a comparison whose vector type would normally be different from
5985 VECTYPE2 when considered in isolation. In that case we'll
5986 replace the comparison with an SSA name (so that we can record
5987 its vector type) and behave as though the comparison was an SSA
5988 name from the outset. */
5989 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
5990 TYPE_VECTOR_SUBPARTS (vectype2))
5991 && !rhs1_op0_type
5992 && !rhs1_op1_type)
5993 return NULL;
5995 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
5996 in place, we can handle it in vectorizable_condition. This avoids
5997 unnecessary promotion stmts and increased vectorization factor. */
5998 if (COMPARISON_CLASS_P (rhs1)
5999 && INTEGRAL_TYPE_P (rhs1_type)
6000 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
6001 TYPE_VECTOR_SUBPARTS (vectype2)))
6003 enum vect_def_type dt;
6004 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
6005 && dt == vect_external_def
6006 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
6007 && (dt == vect_external_def
6008 || dt == vect_constant_def))
6010 tree wide_scalar_type = build_nonstandard_integer_type
6011 (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
6012 tree vectype3 = get_vectype_for_scalar_type (vinfo,
6013 wide_scalar_type);
6014 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
6015 return NULL;
6019 /* If rhs1 is a comparison we need to move it into a
6020 separate statement. */
6021 if (TREE_CODE (rhs1) != SSA_NAME)
6023 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
6024 if (rhs1_op0_type
6025 && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
6026 rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
6027 vectype2, stmt_vinfo);
6028 if (rhs1_op1_type
6029 && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
6030 rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
6031 vectype2, stmt_vinfo);
6032 pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
6033 rhs1_op0, rhs1_op1);
6034 rhs1 = tmp;
6035 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
6036 rhs1_type);
6039 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
6040 TYPE_VECTOR_SUBPARTS (vectype2)))
6041 tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
6042 else
6043 tmp = rhs1;
6045 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6046 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
6047 gimple_assign_rhs2 (last_stmt),
6048 gimple_assign_rhs3 (last_stmt));
6050 *type_out = vectype1;
6051 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
6053 return pattern_stmt;
6056 /* Now check for binary boolean operations requiring conversion for
6057 one of operands. */
6058 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
6059 return NULL;
6061 if (rhs_code != BIT_IOR_EXPR
6062 && rhs_code != BIT_XOR_EXPR
6063 && rhs_code != BIT_AND_EXPR
6064 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
6065 return NULL;
6067 rhs2 = gimple_assign_rhs2 (last_stmt);
6069 rhs1_type = integer_type_for_mask (rhs1, vinfo);
6070 rhs2_type = integer_type_for_mask (rhs2, vinfo);
6072 if (!rhs1_type || !rhs2_type
6073 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
6074 return NULL;
6076 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
6078 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
6079 if (!vectype1)
6080 return NULL;
6081 rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
6083 else
6085 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
6086 if (!vectype1)
6087 return NULL;
6088 rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
6091 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6092 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
6094 *type_out = vectype1;
6095 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
6097 return pattern_stmt;
6100 /* STMT_INFO is a load or store. If the load or store is conditional, return
6101 the boolean condition under which it occurs, otherwise return null. */
6103 static tree
6104 vect_get_load_store_mask (stmt_vec_info stmt_info)
6106 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
6108 gcc_assert (gimple_assign_single_p (def_assign));
6109 return NULL_TREE;
6112 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
6114 internal_fn ifn = gimple_call_internal_fn (def_call);
6115 int mask_index = internal_fn_mask_index (ifn);
6116 return gimple_call_arg (def_call, mask_index);
6119 gcc_unreachable ();
6122 /* Return MASK if MASK is suitable for masking an operation on vectors
6123 of type VECTYPE, otherwise convert it into such a form and return
6124 the result. Associate any conversion statements with STMT_INFO's
6125 pattern. */
6127 static tree
6128 vect_convert_mask_for_vectype (tree mask, tree vectype,
6129 stmt_vec_info stmt_info, vec_info *vinfo)
6131 tree mask_type = integer_type_for_mask (mask, vinfo);
6132 if (mask_type)
6134 tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
6135 if (mask_vectype
6136 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
6137 TYPE_VECTOR_SUBPARTS (mask_vectype)))
6138 mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
6140 return mask;
6143 /* Return the equivalent of:
6145 fold_convert (TYPE, VALUE)
6147 with the expectation that the operation will be vectorized.
6148 If new statements are needed, add them as pattern statements
6149 to STMT_INFO. */
6151 static tree
6152 vect_add_conversion_to_pattern (vec_info *vinfo,
6153 tree type, tree value, stmt_vec_info stmt_info)
6155 if (useless_type_conversion_p (type, TREE_TYPE (value)))
6156 return value;
6158 tree new_value = vect_recog_temp_ssa_var (type, NULL);
6159 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
6160 append_pattern_def_seq (vinfo, stmt_info, conversion,
6161 get_vectype_for_scalar_type (vinfo, type));
6162 return new_value;
6165 /* Try to convert STMT_INFO into a call to a gather load or scatter store
6166 internal function. Return the final statement on success and set
6167 *TYPE_OUT to the vector type being loaded or stored.
6169 This function only handles gathers and scatters that were recognized
6170 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
6172 static gimple *
6173 vect_recog_gather_scatter_pattern (vec_info *vinfo,
6174 stmt_vec_info stmt_info, tree *type_out)
6176 /* Currently we only support this for loop vectorization. */
6177 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6178 if (!loop_vinfo)
6179 return NULL;
6181 /* Make sure that we're looking at a gather load or scatter store. */
6182 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
6183 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6184 return NULL;
6186 /* Get the boolean that controls whether the load or store happens.
6187 This is null if the operation is unconditional. */
6188 tree mask = vect_get_load_store_mask (stmt_info);
6190 /* Make sure that the target supports an appropriate internal
6191 function for the gather/scatter operation. */
6192 gather_scatter_info gs_info;
6193 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
6194 || gs_info.ifn == IFN_LAST)
6195 return NULL;
6197 /* Convert the mask to the right form. */
6198 tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
6199 gs_info.element_type);
6200 if (mask)
6201 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
6202 loop_vinfo);
6203 else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
6204 || gs_info.ifn == IFN_MASK_GATHER_LOAD
6205 || gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE
6206 || gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
6207 mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
6209 /* Get the invariant base and non-invariant offset, converting the
6210 latter to the same width as the vector elements. */
6211 tree base = gs_info.base;
6212 tree offset_type = TREE_TYPE (gs_info.offset_vectype);
6213 tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
6214 gs_info.offset, stmt_info);
6216 /* Build the new pattern statement. */
6217 tree scale = size_int (gs_info.scale);
6218 gcall *pattern_stmt;
6219 if (DR_IS_READ (dr))
6221 tree zero = build_zero_cst (gs_info.element_type);
6222 if (mask != NULL)
6223 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
6224 offset, scale, zero, mask);
6225 else
6226 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
6227 offset, scale, zero);
6228 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
6229 gimple_call_set_lhs (pattern_stmt, load_lhs);
6231 else
6233 tree rhs = vect_get_store_rhs (stmt_info);
6234 if (mask != NULL)
6235 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
6236 base, offset, scale, rhs,
6237 mask);
6238 else
6239 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
6240 base, offset, scale, rhs);
6242 gimple_call_set_nothrow (pattern_stmt, true);
6244 /* Copy across relevant vectorization info and associate DR with the
6245 new pattern statement instead of the original statement. */
6246 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
6247 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
6249 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6250 *type_out = vectype;
6251 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
6253 return pattern_stmt;
6256 /* Return true if TYPE is a non-boolean integer type. These are the types
6257 that we want to consider for narrowing. */
6259 static bool
6260 vect_narrowable_type_p (tree type)
6262 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
6265 /* Return true if the operation given by CODE can be truncated to N bits
6266 when only N bits of the output are needed. This is only true if bit N+1
6267 of the inputs has no effect on the low N bits of the result. */
6269 static bool
6270 vect_truncatable_operation_p (tree_code code)
6272 switch (code)
6274 case PLUS_EXPR:
6275 case MINUS_EXPR:
6276 case MULT_EXPR:
6277 case BIT_AND_EXPR:
6278 case BIT_IOR_EXPR:
6279 case BIT_XOR_EXPR:
6280 case COND_EXPR:
6281 return true;
6283 default:
6284 return false;
6288 /* Record that STMT_INFO could be changed from operating on TYPE to
6289 operating on a type with the precision and sign given by PRECISION
6290 and SIGN respectively. PRECISION is an arbitrary bit precision;
6291 it might not be a whole number of bytes. */
6293 static void
6294 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
6295 unsigned int precision, signop sign)
6297 /* Round the precision up to a whole number of bytes. */
6298 precision = vect_element_precision (precision);
6299 if (precision < TYPE_PRECISION (type)
6300 && (!stmt_info->operation_precision
6301 || stmt_info->operation_precision > precision))
6303 stmt_info->operation_precision = precision;
6304 stmt_info->operation_sign = sign;
6308 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
6309 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
6310 is an arbitrary bit precision; it might not be a whole number of bytes. */
6312 static void
6313 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
6314 unsigned int min_input_precision)
6316 /* This operation in isolation only requires the inputs to have
6317 MIN_INPUT_PRECISION of precision, However, that doesn't mean
6318 that MIN_INPUT_PRECISION is a natural precision for the chain
6319 as a whole. E.g. consider something like:
6321 unsigned short *x, *y;
6322 *y = ((*x & 0xf0) >> 4) | (*y << 4);
6324 The right shift can be done on unsigned chars, and only requires the
6325 result of "*x & 0xf0" to be done on unsigned chars. But taking that
6326 approach would mean turning a natural chain of single-vector unsigned
6327 short operations into one that truncates "*x" and then extends
6328 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
6329 operation and one vector for each unsigned char operation.
6330 This would be a significant pessimization.
6332 Instead only propagate the maximum of this precision and the precision
6333 required by the users of the result. This means that we don't pessimize
6334 the case above but continue to optimize things like:
6336 unsigned char *y;
6337 unsigned short *x;
6338 *y = ((*x & 0xf0) >> 4) | (*y << 4);
6340 Here we would truncate two vectors of *x to a single vector of
6341 unsigned chars and use single-vector unsigned char operations for
6342 everything else, rather than doing two unsigned short copies of
6343 "(*x & 0xf0) >> 4" and then truncating the result. */
6344 min_input_precision = MAX (min_input_precision,
6345 stmt_info->min_output_precision);
6347 if (min_input_precision < TYPE_PRECISION (type)
6348 && (!stmt_info->min_input_precision
6349 || stmt_info->min_input_precision > min_input_precision))
6350 stmt_info->min_input_precision = min_input_precision;
6353 /* Subroutine of vect_determine_min_output_precision. Return true if
6354 we can calculate a reduced number of output bits for STMT_INFO,
6355 whose result is LHS. */
6357 static bool
6358 vect_determine_min_output_precision_1 (vec_info *vinfo,
6359 stmt_vec_info stmt_info, tree lhs)
6361 /* Take the maximum precision required by users of the result. */
6362 unsigned int precision = 0;
6363 imm_use_iterator iter;
6364 use_operand_p use;
6365 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
6367 gimple *use_stmt = USE_STMT (use);
6368 if (is_gimple_debug (use_stmt))
6369 continue;
6370 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
6371 if (!use_stmt_info || !use_stmt_info->min_input_precision)
6372 return false;
6373 /* The input precision recorded for COND_EXPRs applies only to the
6374 "then" and "else" values. */
6375 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
6376 if (assign
6377 && gimple_assign_rhs_code (assign) == COND_EXPR
6378 && use->use != gimple_assign_rhs2_ptr (assign)
6379 && use->use != gimple_assign_rhs3_ptr (assign))
6380 return false;
6381 precision = MAX (precision, use_stmt_info->min_input_precision);
6384 if (dump_enabled_p ())
6385 dump_printf_loc (MSG_NOTE, vect_location,
6386 "only the low %d bits of %T are significant\n",
6387 precision, lhs);
6388 stmt_info->min_output_precision = precision;
6389 return true;
6392 /* Calculate min_output_precision for STMT_INFO. */
6394 static void
6395 vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
6397 /* We're only interested in statements with a narrowable result. */
6398 tree lhs = gimple_get_lhs (stmt_info->stmt);
6399 if (!lhs
6400 || TREE_CODE (lhs) != SSA_NAME
6401 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
6402 return;
6404 if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
6405 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
6408 /* Use range information to decide whether STMT (described by STMT_INFO)
6409 could be done in a narrower type. This is effectively a forward
6410 propagation, since it uses context-independent information that applies
6411 to all users of an SSA name. */
6413 static void
6414 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
6416 tree lhs = gimple_assign_lhs (stmt);
6417 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
6418 return;
6420 tree type = TREE_TYPE (lhs);
6421 if (!vect_narrowable_type_p (type))
6422 return;
6424 /* First see whether we have any useful range information for the result. */
6425 unsigned int precision = TYPE_PRECISION (type);
6426 signop sign = TYPE_SIGN (type);
6427 wide_int min_value, max_value;
6428 if (!vect_get_range_info (lhs, &min_value, &max_value))
6429 return;
6431 tree_code code = gimple_assign_rhs_code (stmt);
6432 unsigned int nops = gimple_num_ops (stmt);
6434 if (!vect_truncatable_operation_p (code))
6435 /* Check that all relevant input operands are compatible, and update
6436 [MIN_VALUE, MAX_VALUE] to include their ranges. */
6437 for (unsigned int i = 1; i < nops; ++i)
6439 tree op = gimple_op (stmt, i);
6440 if (TREE_CODE (op) == INTEGER_CST)
6442 /* Don't require the integer to have RHS_TYPE (which it might
6443 not for things like shift amounts, etc.), but do require it
6444 to fit the type. */
6445 if (!int_fits_type_p (op, type))
6446 return;
6448 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
6449 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
6451 else if (TREE_CODE (op) == SSA_NAME)
6453 /* Ignore codes that don't take uniform arguments. */
6454 if (!types_compatible_p (TREE_TYPE (op), type))
6455 return;
6457 wide_int op_min_value, op_max_value;
6458 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
6459 return;
6461 min_value = wi::min (min_value, op_min_value, sign);
6462 max_value = wi::max (max_value, op_max_value, sign);
6464 else
6465 return;
6468 /* Try to switch signed types for unsigned types if we can.
6469 This is better for two reasons. First, unsigned ops tend
6470 to be cheaper than signed ops. Second, it means that we can
6471 handle things like:
6473 signed char c;
6474 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
6478 signed char c;
6479 unsigned short res_1 = (unsigned short) c & 0xff00;
6480 int res = (int) res_1;
6482 where the intermediate result res_1 has unsigned rather than
6483 signed type. */
6484 if (sign == SIGNED && !wi::neg_p (min_value))
6485 sign = UNSIGNED;
6487 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
6488 unsigned int precision1 = wi::min_precision (min_value, sign);
6489 unsigned int precision2 = wi::min_precision (max_value, sign);
6490 unsigned int value_precision = MAX (precision1, precision2);
6491 if (value_precision >= precision)
6492 return;
6494 if (dump_enabled_p ())
6495 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
6496 " without loss of precision: %G",
6497 sign == SIGNED ? "signed" : "unsigned",
6498 value_precision, (gimple *) stmt);
6500 vect_set_operation_type (stmt_info, type, value_precision, sign);
6501 vect_set_min_input_precision (stmt_info, type, value_precision);
6504 /* Use information about the users of STMT's result to decide whether
6505 STMT (described by STMT_INFO) could be done in a narrower type.
6506 This is effectively a backward propagation. */
6508 static void
6509 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
6511 tree_code code = gimple_assign_rhs_code (stmt);
6512 unsigned int opno = (code == COND_EXPR ? 2 : 1);
6513 tree type = TREE_TYPE (gimple_op (stmt, opno));
6514 if (!vect_narrowable_type_p (type))
6515 return;
6517 unsigned int precision = TYPE_PRECISION (type);
6518 unsigned int operation_precision, min_input_precision;
6519 switch (code)
6521 CASE_CONVERT:
6522 /* Only the bits that contribute to the output matter. Don't change
6523 the precision of the operation itself. */
6524 operation_precision = precision;
6525 min_input_precision = stmt_info->min_output_precision;
6526 break;
6528 case LSHIFT_EXPR:
6529 case RSHIFT_EXPR:
6531 tree shift = gimple_assign_rhs2 (stmt);
6532 if (TREE_CODE (shift) != INTEGER_CST
6533 || !wi::ltu_p (wi::to_widest (shift), precision))
6534 return;
6535 unsigned int const_shift = TREE_INT_CST_LOW (shift);
6536 if (code == LSHIFT_EXPR)
6538 /* Avoid creating an undefined shift.
6540 ??? We could instead use min_output_precision as-is and
6541 optimize out-of-range shifts to zero. However, only
6542 degenerate testcases shift away all their useful input data,
6543 and it isn't natural to drop input operations in the middle
6544 of vectorization. This sort of thing should really be
6545 handled before vectorization. */
6546 operation_precision = MAX (stmt_info->min_output_precision,
6547 const_shift + 1);
6548 /* We need CONST_SHIFT fewer bits of the input. */
6549 min_input_precision = (MAX (operation_precision, const_shift)
6550 - const_shift);
6552 else
6554 /* We need CONST_SHIFT extra bits to do the operation. */
6555 operation_precision = (stmt_info->min_output_precision
6556 + const_shift);
6557 min_input_precision = operation_precision;
6559 break;
6562 default:
6563 if (vect_truncatable_operation_p (code))
6565 /* Input bit N has no effect on output bits N-1 and lower. */
6566 operation_precision = stmt_info->min_output_precision;
6567 min_input_precision = operation_precision;
6568 break;
6570 return;
6573 if (operation_precision < precision)
6575 if (dump_enabled_p ())
6576 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
6577 " without affecting users: %G",
6578 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
6579 operation_precision, (gimple *) stmt);
6580 vect_set_operation_type (stmt_info, type, operation_precision,
6581 TYPE_SIGN (type));
6583 vect_set_min_input_precision (stmt_info, type, min_input_precision);
6586 /* Return true if the statement described by STMT_INFO sets a boolean
6587 SSA_NAME and if we know how to vectorize this kind of statement using
6588 vector mask types. */
6590 static bool
6591 possible_vector_mask_operation_p (stmt_vec_info stmt_info)
6593 tree lhs = gimple_get_lhs (stmt_info->stmt);
6594 if (!lhs
6595 || TREE_CODE (lhs) != SSA_NAME
6596 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
6597 return false;
6599 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
6601 tree_code rhs_code = gimple_assign_rhs_code (assign);
6602 switch (rhs_code)
6604 CASE_CONVERT:
6605 case SSA_NAME:
6606 case BIT_NOT_EXPR:
6607 case BIT_IOR_EXPR:
6608 case BIT_XOR_EXPR:
6609 case BIT_AND_EXPR:
6610 return true;
6612 default:
6613 return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
6616 else if (is_a <gphi *> (stmt_info->stmt))
6617 return true;
6618 return false;
6621 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
6622 a vector mask type instead of a normal vector type. Record the
6623 result in STMT_INFO->mask_precision. */
6625 static void
6626 vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
6628 if (!possible_vector_mask_operation_p (stmt_info))
6629 return;
6631 /* If at least one boolean input uses a vector mask type,
6632 pick the mask type with the narrowest elements.
6634 ??? This is the traditional behavior. It should always produce
6635 the smallest number of operations, but isn't necessarily the
6636 optimal choice. For example, if we have:
6638 a = b & c
6640 where:
6642 - the user of a wants it to have a mask type for 16-bit elements (M16)
6643 - b also uses M16
6644 - c uses a mask type for 8-bit elements (M8)
6646 then picking M8 gives:
6648 - 1 M16->M8 pack for b
6649 - 1 M8 AND for a
6650 - 2 M8->M16 unpacks for the user of a
6652 whereas picking M16 would have given:
6654 - 2 M8->M16 unpacks for c
6655 - 2 M16 ANDs for a
6657 The number of operations are equal, but M16 would have given
6658 a shorter dependency chain and allowed more ILP. */
6659 unsigned int precision = ~0U;
6660 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
6662 unsigned int nops = gimple_num_ops (assign);
6663 for (unsigned int i = 1; i < nops; ++i)
6665 tree rhs = gimple_op (assign, i);
6666 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
6667 continue;
6669 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
6670 if (!def_stmt_info)
6671 /* Don't let external or constant operands influence the choice.
6672 We can convert them to whichever vector type we pick. */
6673 continue;
6675 if (def_stmt_info->mask_precision)
6677 if (precision > def_stmt_info->mask_precision)
6678 precision = def_stmt_info->mask_precision;
6682 /* If the statement compares two values that shouldn't use vector masks,
6683 try comparing the values as normal scalars instead. */
6684 tree_code rhs_code = gimple_assign_rhs_code (assign);
6685 if (precision == ~0U
6686 && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
6688 tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
6689 scalar_mode mode;
6690 tree vectype, mask_type;
6691 if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
6692 && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
6693 && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
6694 && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
6695 precision = GET_MODE_BITSIZE (mode);
6698 else
6700 gphi *phi = as_a <gphi *> (stmt_info->stmt);
6701 for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
6703 tree rhs = gimple_phi_arg_def (phi, i);
6705 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
6706 if (!def_stmt_info)
6707 /* Don't let external or constant operands influence the choice.
6708 We can convert them to whichever vector type we pick. */
6709 continue;
6711 if (def_stmt_info->mask_precision)
6713 if (precision > def_stmt_info->mask_precision)
6714 precision = def_stmt_info->mask_precision;
6719 if (dump_enabled_p ())
6721 if (precision == ~0U)
6722 dump_printf_loc (MSG_NOTE, vect_location,
6723 "using normal nonmask vectors for %G",
6724 stmt_info->stmt);
6725 else
6726 dump_printf_loc (MSG_NOTE, vect_location,
6727 "using boolean precision %d for %G",
6728 precision, stmt_info->stmt);
6731 stmt_info->mask_precision = precision;
6734 /* Handle vect_determine_precisions for STMT_INFO, given that we
6735 have already done so for the users of its result. */
6737 void
6738 vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
6740 vect_determine_min_output_precision (vinfo, stmt_info);
6741 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
6743 vect_determine_precisions_from_range (stmt_info, stmt);
6744 vect_determine_precisions_from_users (stmt_info, stmt);
6748 /* Walk backwards through the vectorizable region to determine the
6749 values of these fields:
6751 - min_output_precision
6752 - min_input_precision
6753 - operation_precision
6754 - operation_sign. */
6756 void
6757 vect_determine_precisions (vec_info *vinfo)
6759 DUMP_VECT_SCOPE ("vect_determine_precisions");
6761 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
6763 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6764 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
6765 unsigned int nbbs = loop->num_nodes;
6767 for (unsigned int i = 0; i < nbbs; i++)
6769 basic_block bb = bbs[i];
6770 for (auto gsi = gsi_start_phis (bb);
6771 !gsi_end_p (gsi); gsi_next (&gsi))
6773 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6774 if (stmt_info)
6775 vect_determine_mask_precision (vinfo, stmt_info);
6777 for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
6778 if (!is_gimple_debug (gsi_stmt (si)))
6779 vect_determine_mask_precision
6780 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
6782 for (unsigned int i = 0; i < nbbs; i++)
6784 basic_block bb = bbs[nbbs - i - 1];
6785 for (gimple_stmt_iterator si = gsi_last_bb (bb);
6786 !gsi_end_p (si); gsi_prev (&si))
6787 if (!is_gimple_debug (gsi_stmt (si)))
6788 vect_determine_stmt_precisions
6789 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
6790 for (auto gsi = gsi_start_phis (bb);
6791 !gsi_end_p (gsi); gsi_next (&gsi))
6793 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6794 if (stmt_info)
6795 vect_determine_stmt_precisions (vinfo, stmt_info);
6799 else
6801 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
6802 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
6804 basic_block bb = bb_vinfo->bbs[i];
6805 for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6807 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6808 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6809 vect_determine_mask_precision (vinfo, stmt_info);
6811 for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6813 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
6814 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6815 vect_determine_mask_precision (vinfo, stmt_info);
6818 for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
6820 for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]);
6821 !gsi_end_p (gsi); gsi_prev (&gsi))
6823 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
6824 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6825 vect_determine_stmt_precisions (vinfo, stmt_info);
6827 for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]);
6828 !gsi_end_p (gsi); gsi_next (&gsi))
6830 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6831 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6832 vect_determine_stmt_precisions (vinfo, stmt_info);
6838 typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
6840 struct vect_recog_func
6842 vect_recog_func_ptr fn;
6843 const char *name;
6846 /* Note that ordering matters - the first pattern matching on a stmt is
6847 taken which means usually the more complex one needs to preceed the
6848 less comples onex (widen_sum only after dot_prod or sad for example). */
6849 static vect_recog_func vect_vect_recog_func_ptrs[] = {
6850 { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
6851 { vect_recog_bit_insert_pattern, "bit_insert" },
6852 { vect_recog_abd_pattern, "abd" },
6853 { vect_recog_over_widening_pattern, "over_widening" },
6854 /* Must come after over_widening, which narrows the shift as much as
6855 possible beforehand. */
6856 { vect_recog_average_pattern, "average" },
6857 { vect_recog_cond_expr_convert_pattern, "cond_expr_convert" },
6858 { vect_recog_mulhs_pattern, "mult_high" },
6859 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
6860 { vect_recog_widen_mult_pattern, "widen_mult" },
6861 { vect_recog_dot_prod_pattern, "dot_prod" },
6862 { vect_recog_sad_pattern, "sad" },
6863 { vect_recog_widen_sum_pattern, "widen_sum" },
6864 { vect_recog_pow_pattern, "pow" },
6865 { vect_recog_popcount_clz_ctz_ffs_pattern, "popcount_clz_ctz_ffs" },
6866 { vect_recog_ctz_ffs_pattern, "ctz_ffs" },
6867 { vect_recog_widen_shift_pattern, "widen_shift" },
6868 { vect_recog_rotate_pattern, "rotate" },
6869 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
6870 { vect_recog_divmod_pattern, "divmod" },
6871 { vect_recog_mult_pattern, "mult" },
6872 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
6873 { vect_recog_bool_pattern, "bool" },
6874 /* This must come before mask conversion, and includes the parts
6875 of mask conversion that are needed for gather and scatter
6876 internal functions. */
6877 { vect_recog_gather_scatter_pattern, "gather_scatter" },
6878 { vect_recog_mask_conversion_pattern, "mask_conversion" },
6879 { vect_recog_widen_plus_pattern, "widen_plus" },
6880 { vect_recog_widen_minus_pattern, "widen_minus" },
6881 { vect_recog_widen_abd_pattern, "widen_abd" },
6882 /* These must come after the double widening ones. */
6885 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
6887 /* Mark statements that are involved in a pattern. */
6889 void
6890 vect_mark_pattern_stmts (vec_info *vinfo,
6891 stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
6892 tree pattern_vectype)
6894 stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
6895 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
6897 gimple *orig_pattern_stmt = NULL;
6898 if (is_pattern_stmt_p (orig_stmt_info))
6900 /* We're replacing a statement in an existing pattern definition
6901 sequence. */
6902 orig_pattern_stmt = orig_stmt_info->stmt;
6903 if (dump_enabled_p ())
6904 dump_printf_loc (MSG_NOTE, vect_location,
6905 "replacing earlier pattern %G", orig_pattern_stmt);
6907 /* To keep the book-keeping simple, just swap the lhs of the
6908 old and new statements, so that the old one has a valid but
6909 unused lhs. */
6910 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
6911 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
6912 gimple_set_lhs (pattern_stmt, old_lhs);
6914 if (dump_enabled_p ())
6915 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
6917 /* Switch to the statement that ORIG replaces. */
6918 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
6920 /* We shouldn't be replacing the main pattern statement. */
6921 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
6922 != orig_pattern_stmt);
6925 if (def_seq)
6926 for (gimple_stmt_iterator si = gsi_start (def_seq);
6927 !gsi_end_p (si); gsi_next (&si))
6929 if (dump_enabled_p ())
6930 dump_printf_loc (MSG_NOTE, vect_location,
6931 "extra pattern stmt: %G", gsi_stmt (si));
6932 stmt_vec_info pattern_stmt_info
6933 = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
6934 orig_stmt_info, pattern_vectype);
6935 /* Stmts in the def sequence are not vectorizable cycle or
6936 induction defs, instead they should all be vect_internal_def
6937 feeding the main pattern stmt which retains this def type. */
6938 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
6941 if (orig_pattern_stmt)
6943 vect_init_pattern_stmt (vinfo, pattern_stmt,
6944 orig_stmt_info, pattern_vectype);
6946 /* Insert all the new pattern statements before the original one. */
6947 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
6948 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
6949 orig_def_seq);
6950 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
6951 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
6953 /* Remove the pattern statement that this new pattern replaces. */
6954 gsi_remove (&gsi, false);
6956 else
6957 vect_set_pattern_stmt (vinfo,
6958 pattern_stmt, orig_stmt_info, pattern_vectype);
6960 /* Transfer reduction path info to the pattern. */
6961 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
6963 gimple_match_op op;
6964 if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op))
6965 gcc_unreachable ();
6966 tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)];
6967 /* Search the pattern def sequence and the main pattern stmt. Note
6968 we may have inserted all into a containing pattern def sequence
6969 so the following is a bit awkward. */
6970 gimple_stmt_iterator si;
6971 gimple *s;
6972 if (def_seq)
6974 si = gsi_start (def_seq);
6975 s = gsi_stmt (si);
6976 gsi_next (&si);
6978 else
6980 si = gsi_none ();
6981 s = pattern_stmt;
6985 bool found = false;
6986 if (gimple_extract_op (s, &op))
6987 for (unsigned i = 0; i < op.num_ops; ++i)
6988 if (op.ops[i] == lookfor)
6990 STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
6991 lookfor = gimple_get_lhs (s);
6992 found = true;
6993 break;
6995 if (s == pattern_stmt)
6997 if (!found && dump_enabled_p ())
6998 dump_printf_loc (MSG_NOTE, vect_location,
6999 "failed to update reduction index.\n");
7000 break;
7002 if (gsi_end_p (si))
7003 s = pattern_stmt;
7004 else
7006 s = gsi_stmt (si);
7007 if (s == pattern_stmt)
7008 /* Found the end inside a bigger pattern def seq. */
7009 si = gsi_none ();
7010 else
7011 gsi_next (&si);
7013 } while (1);
7017 /* Function vect_pattern_recog_1
7019 Input:
7020 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
7021 computation pattern.
7022 STMT_INFO: A stmt from which the pattern search should start.
7024 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
7025 a sequence of statements that has the same functionality and can be
7026 used to replace STMT_INFO. It returns the last statement in the sequence
7027 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
7028 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
7029 statement, having first checked that the target supports the new operation
7030 in that type.
7032 This function also does some bookkeeping, as explained in the documentation
7033 for vect_recog_pattern. */
7035 static void
7036 vect_pattern_recog_1 (vec_info *vinfo,
7037 vect_recog_func *recog_func, stmt_vec_info stmt_info)
7039 gimple *pattern_stmt;
7040 loop_vec_info loop_vinfo;
7041 tree pattern_vectype;
7043 /* If this statement has already been replaced with pattern statements,
7044 leave the original statement alone, since the first match wins.
7045 Instead try to match against the definition statements that feed
7046 the main pattern statement. */
7047 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7049 gimple_stmt_iterator gsi;
7050 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
7051 !gsi_end_p (gsi); gsi_next (&gsi))
7052 vect_pattern_recog_1 (vinfo, recog_func,
7053 vinfo->lookup_stmt (gsi_stmt (gsi)));
7054 return;
7057 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
7058 pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
7059 if (!pattern_stmt)
7061 /* Clear any half-formed pattern definition sequence. */
7062 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
7063 return;
7066 loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7068 /* Found a vectorizable pattern. */
7069 if (dump_enabled_p ())
7070 dump_printf_loc (MSG_NOTE, vect_location,
7071 "%s pattern recognized: %G",
7072 recog_func->name, pattern_stmt);
7074 /* Mark the stmts that are involved in the pattern. */
7075 vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
7077 /* Patterns cannot be vectorized using SLP, because they change the order of
7078 computation. */
7079 if (loop_vinfo)
7081 unsigned ix, ix2;
7082 stmt_vec_info *elem_ptr;
7083 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
7084 elem_ptr, *elem_ptr == stmt_info);
7089 /* Function vect_pattern_recog
7091 Input:
7092 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
7093 computation idioms.
7095 Output - for each computation idiom that is detected we create a new stmt
7096 that provides the same functionality and that can be vectorized. We
7097 also record some information in the struct_stmt_info of the relevant
7098 stmts, as explained below:
7100 At the entry to this function we have the following stmts, with the
7101 following initial value in the STMT_VINFO fields:
7103 stmt in_pattern_p related_stmt vec_stmt
7104 S1: a_i = .... - - -
7105 S2: a_2 = ..use(a_i).. - - -
7106 S3: a_1 = ..use(a_2).. - - -
7107 S4: a_0 = ..use(a_1).. - - -
7108 S5: ... = ..use(a_0).. - - -
7110 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
7111 represented by a single stmt. We then:
7112 - create a new stmt S6 equivalent to the pattern (the stmt is not
7113 inserted into the code)
7114 - fill in the STMT_VINFO fields as follows:
7116 in_pattern_p related_stmt vec_stmt
7117 S1: a_i = .... - - -
7118 S2: a_2 = ..use(a_i).. - - -
7119 S3: a_1 = ..use(a_2).. - - -
7120 S4: a_0 = ..use(a_1).. true S6 -
7121 '---> S6: a_new = .... - S4 -
7122 S5: ... = ..use(a_0).. - - -
7124 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
7125 to each other through the RELATED_STMT field).
7127 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
7128 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
7129 remain irrelevant unless used by stmts other than S4.
7131 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
7132 (because they are marked as irrelevant). It will vectorize S6, and record
7133 a pointer to the new vector stmt VS6 from S6 (as usual).
7134 S4 will be skipped, and S5 will be vectorized as usual:
7136 in_pattern_p related_stmt vec_stmt
7137 S1: a_i = .... - - -
7138 S2: a_2 = ..use(a_i).. - - -
7139 S3: a_1 = ..use(a_2).. - - -
7140 > VS6: va_new = .... - - -
7141 S4: a_0 = ..use(a_1).. true S6 VS6
7142 '---> S6: a_new = .... - S4 VS6
7143 > VS5: ... = ..vuse(va_new).. - - -
7144 S5: ... = ..use(a_0).. - - -
7146 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
7147 elsewhere), and we'll end up with:
7149 VS6: va_new = ....
7150 VS5: ... = ..vuse(va_new)..
7152 In case of more than one pattern statements, e.g., widen-mult with
7153 intermediate type:
7155 S1 a_t = ;
7156 S2 a_T = (TYPE) a_t;
7157 '--> S3: a_it = (interm_type) a_t;
7158 S4 prod_T = a_T * CONST;
7159 '--> S5: prod_T' = a_it w* CONST;
7161 there may be other users of a_T outside the pattern. In that case S2 will
7162 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
7163 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
7164 be recorded in S3. */
7166 void
7167 vect_pattern_recog (vec_info *vinfo)
7169 class loop *loop;
7170 basic_block *bbs;
7171 unsigned int nbbs;
7172 gimple_stmt_iterator si;
7173 unsigned int i, j;
7175 vect_determine_precisions (vinfo);
7177 DUMP_VECT_SCOPE ("vect_pattern_recog");
7179 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
7181 loop = LOOP_VINFO_LOOP (loop_vinfo);
7182 bbs = LOOP_VINFO_BBS (loop_vinfo);
7183 nbbs = loop->num_nodes;
7185 /* Scan through the loop stmts, applying the pattern recognition
7186 functions starting at each stmt visited: */
7187 for (i = 0; i < nbbs; i++)
7189 basic_block bb = bbs[i];
7190 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
7192 if (is_gimple_debug (gsi_stmt (si)))
7193 continue;
7194 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
7195 /* Scan over all generic vect_recog_xxx_pattern functions. */
7196 for (j = 0; j < NUM_PATTERNS; j++)
7197 vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j],
7198 stmt_info);
7202 else
7204 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
7205 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
7206 for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]);
7207 !gsi_end_p (gsi); gsi_next (&gsi))
7209 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (gsi));
7210 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
7211 continue;
7213 /* Scan over all generic vect_recog_xxx_pattern functions. */
7214 for (j = 0; j < NUM_PATTERNS; j++)
7215 vect_pattern_recog_1 (vinfo,
7216 &vect_vect_recog_func_ptrs[j], stmt_info);
7220 /* After this no more add_stmt calls are allowed. */
7221 vinfo->stmt_vec_info_ro = true;
7224 /* Build a GIMPLE_ASSIGN or GIMPLE_CALL with the tree_code,
7225 or internal_fn contained in ch, respectively. */
7226 gimple *
7227 vect_gimple_build (tree lhs, code_helper ch, tree op0, tree op1)
7229 gcc_assert (op0 != NULL_TREE);
7230 if (ch.is_tree_code ())
7231 return gimple_build_assign (lhs, (tree_code) ch, op0, op1);
7233 gcc_assert (ch.is_internal_fn ());
7234 gimple* stmt = gimple_build_call_internal (as_internal_fn ((combined_fn) ch),
7235 op1 == NULL_TREE ? 1 : 2,
7236 op0, op1);
7237 gimple_call_set_lhs (stmt, lhs);
7238 return stmt;