c++: fix explicit/copy problem [PR109247]
[official-gcc.git] / gcc / tree-vect-patterns.cc
blob917f7bcdcc1ab7a50a32d826aa08ef1ed5f67a9e
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2023 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "gimple-fold.h"
39 #include "gimplify-me.h"
40 #include "cfgloop.h"
41 #include "tree-vectorizer.h"
42 #include "dumpfile.h"
43 #include "builtins.h"
44 #include "internal-fn.h"
45 #include "case-cfn-macros.h"
46 #include "fold-const-call.h"
47 #include "attribs.h"
48 #include "cgraph.h"
49 #include "omp-simd-clone.h"
50 #include "predict.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53 #include "gimple-range.h"
56 /* TODO: Note the vectorizer still builds COND_EXPRs with GENERIC compares
57 in the first operand. Disentangling this is future work, the
58 IL is properly transfered to VEC_COND_EXPRs with separate compares. */
61 /* Return true if we have a useful VR_RANGE range for VAR, storing it
62 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
64 bool
65 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
67 value_range vr;
68 tree vr_min, vr_max;
69 get_range_query (cfun)->range_of_expr (vr, var);
70 if (vr.undefined_p ())
71 vr.set_varying (TREE_TYPE (var));
72 value_range_kind vr_type = get_legacy_range (vr, vr_min, vr_max);
73 *min_value = wi::to_wide (vr_min);
74 *max_value = wi::to_wide (vr_max);
75 wide_int nonzero = get_nonzero_bits (var);
76 signop sgn = TYPE_SIGN (TREE_TYPE (var));
77 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
78 nonzero, sgn) == VR_RANGE)
80 if (dump_enabled_p ())
82 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
83 dump_printf (MSG_NOTE, " has range [");
84 dump_hex (MSG_NOTE, *min_value);
85 dump_printf (MSG_NOTE, ", ");
86 dump_hex (MSG_NOTE, *max_value);
87 dump_printf (MSG_NOTE, "]\n");
89 return true;
91 else
93 if (dump_enabled_p ())
95 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
96 dump_printf (MSG_NOTE, " has no range info\n");
98 return false;
102 /* Report that we've found an instance of pattern PATTERN in
103 statement STMT. */
105 static void
106 vect_pattern_detected (const char *name, gimple *stmt)
108 if (dump_enabled_p ())
109 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
112 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
113 return the pattern statement's stmt_vec_info. Set its vector type to
114 VECTYPE if it doesn't have one already. */
116 static stmt_vec_info
117 vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
118 stmt_vec_info orig_stmt_info, tree vectype)
120 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
121 if (pattern_stmt_info == NULL)
122 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
123 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
125 pattern_stmt_info->pattern_stmt_p = true;
126 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
127 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
128 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
129 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
131 gcc_assert (!vectype
132 || (VECTOR_BOOLEAN_TYPE_P (vectype)
133 == vect_use_mask_type_p (orig_stmt_info)));
134 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
135 pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
137 return pattern_stmt_info;
140 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
141 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
142 have one already. */
144 static void
145 vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
146 stmt_vec_info orig_stmt_info, tree vectype)
148 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
149 STMT_VINFO_RELATED_STMT (orig_stmt_info)
150 = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
153 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
154 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
155 be different from the vector type of the final pattern statement.
156 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
157 from which it was derived. */
159 static inline void
160 append_pattern_def_seq (vec_info *vinfo,
161 stmt_vec_info stmt_info, gimple *new_stmt,
162 tree vectype = NULL_TREE,
163 tree scalar_type_for_mask = NULL_TREE)
165 gcc_assert (!scalar_type_for_mask
166 == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
167 if (vectype)
169 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
170 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
171 if (scalar_type_for_mask)
172 new_stmt_info->mask_precision
173 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
175 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
176 new_stmt);
179 /* The caller wants to perform new operations on vect_external variable
180 VAR, so that the result of the operations would also be vect_external.
181 Return the edge on which the operations can be performed, if one exists.
182 Return null if the operations should instead be treated as part of
183 the pattern that needs them. */
185 static edge
186 vect_get_external_def_edge (vec_info *vinfo, tree var)
188 edge e = NULL;
189 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
191 e = loop_preheader_edge (loop_vinfo->loop);
192 if (!SSA_NAME_IS_DEFAULT_DEF (var))
194 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
195 if (bb == NULL
196 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
197 e = NULL;
200 return e;
203 /* Return true if the target supports a vector version of CODE,
204 where CODE is known to map to a direct optab with the given SUBTYPE.
205 ITYPE specifies the type of (some of) the scalar inputs and OTYPE
206 specifies the type of the scalar result.
208 If CODE allows the inputs and outputs to have different type
209 (such as for WIDEN_SUM_EXPR), it is the input mode rather
210 than the output mode that determines the appropriate target pattern.
211 Operand 0 of the target pattern then specifies the mode that the output
212 must have.
214 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
215 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
216 is nonnull. */
218 static bool
219 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
220 tree itype, tree *vecotype_out,
221 tree *vecitype_out = NULL,
222 enum optab_subtype subtype = optab_default)
224 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
225 if (!vecitype)
226 return false;
228 tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
229 if (!vecotype)
230 return false;
232 optab optab = optab_for_tree_code (code, vecitype, subtype);
233 if (!optab)
234 return false;
236 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
237 if (icode == CODE_FOR_nothing
238 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
239 return false;
241 *vecotype_out = vecotype;
242 if (vecitype_out)
243 *vecitype_out = vecitype;
244 return true;
247 /* Round bit precision PRECISION up to a full element. */
249 static unsigned int
250 vect_element_precision (unsigned int precision)
252 precision = 1 << ceil_log2 (precision);
253 return MAX (precision, BITS_PER_UNIT);
256 /* If OP is defined by a statement that's being considered for vectorization,
257 return information about that statement, otherwise return NULL. */
259 static stmt_vec_info
260 vect_get_internal_def (vec_info *vinfo, tree op)
262 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
263 if (def_stmt_info
264 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
265 return def_stmt_info;
266 return NULL;
269 /* Check whether NAME, an ssa-name used in STMT_VINFO,
270 is a result of a type promotion, such that:
271 DEF_STMT: NAME = NOP (name0)
272 If CHECK_SIGN is TRUE, check that either both types are signed or both are
273 unsigned. */
275 static bool
276 type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
277 tree *orig_type, gimple **def_stmt, bool *promotion)
279 tree type = TREE_TYPE (name);
280 tree oprnd0;
281 enum vect_def_type dt;
283 stmt_vec_info def_stmt_info;
284 if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
285 return false;
287 if (dt != vect_internal_def
288 && dt != vect_external_def && dt != vect_constant_def)
289 return false;
291 if (!*def_stmt)
292 return false;
294 if (!is_gimple_assign (*def_stmt))
295 return false;
297 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
298 return false;
300 oprnd0 = gimple_assign_rhs1 (*def_stmt);
302 *orig_type = TREE_TYPE (oprnd0);
303 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
304 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
305 return false;
307 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
308 *promotion = true;
309 else
310 *promotion = false;
312 if (!vect_is_simple_use (oprnd0, vinfo, &dt))
313 return false;
315 return true;
318 /* Holds information about an input operand after some sign changes
319 and type promotions have been peeled away. */
320 class vect_unpromoted_value {
321 public:
322 vect_unpromoted_value ();
324 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
326 /* The value obtained after peeling away zero or more casts. */
327 tree op;
329 /* The type of OP. */
330 tree type;
332 /* The definition type of OP. */
333 vect_def_type dt;
335 /* If OP is the result of peeling at least one cast, and if the cast
336 of OP itself is a vectorizable statement, CASTER identifies that
337 statement, otherwise it is null. */
338 stmt_vec_info caster;
341 inline vect_unpromoted_value::vect_unpromoted_value ()
342 : op (NULL_TREE),
343 type (NULL_TREE),
344 dt (vect_uninitialized_def),
345 caster (NULL)
349 /* Set the operand to OP_IN, its definition type to DT_IN, and the
350 statement that casts it to CASTER_IN. */
352 inline void
353 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
354 stmt_vec_info caster_in)
356 op = op_in;
357 type = TREE_TYPE (op);
358 dt = dt_in;
359 caster = caster_in;
362 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
363 to reach some vectorizable inner operand OP', continuing as long as it
364 is possible to convert OP' back to OP using a possible sign change
365 followed by a possible promotion P. Return this OP', or null if OP is
366 not a vectorizable SSA name. If there is a promotion P, describe its
367 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
368 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
369 have more than one user.
371 A successful return means that it is possible to go from OP' to OP
372 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
373 whereas the cast from UNPROM to OP might be a promotion, a sign
374 change, or a nop.
376 E.g. say we have:
378 signed short *ptr = ...;
379 signed short C = *ptr;
380 unsigned short B = (unsigned short) C; // sign change
381 signed int A = (signed int) B; // unsigned promotion
382 ...possible other uses of A...
383 unsigned int OP = (unsigned int) A; // sign change
385 In this case it's possible to go directly from C to OP using:
387 OP = (unsigned int) (unsigned short) C;
388 +------------+ +--------------+
389 promotion sign change
391 so OP' would be C. The input to the promotion is B, so UNPROM
392 would describe B. */
394 static tree
395 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
396 vect_unpromoted_value *unprom,
397 bool *single_use_p = NULL)
399 tree res = NULL_TREE;
400 tree op_type = TREE_TYPE (op);
401 unsigned int orig_precision = TYPE_PRECISION (op_type);
402 unsigned int min_precision = orig_precision;
403 stmt_vec_info caster = NULL;
404 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
406 /* See whether OP is simple enough to vectorize. */
407 stmt_vec_info def_stmt_info;
408 gimple *def_stmt;
409 vect_def_type dt;
410 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
411 break;
413 /* If OP is the input of a demotion, skip over it to see whether
414 OP is itself the result of a promotion. If so, the combined
415 effect of the promotion and the demotion might fit the required
416 pattern, otherwise neither operation fits.
418 This copes with cases such as the result of an arithmetic
419 operation being truncated before being stored, and where that
420 arithmetic operation has been recognized as an over-widened one. */
421 if (TYPE_PRECISION (op_type) <= min_precision)
423 /* Use OP as the UNPROM described above if we haven't yet
424 found a promotion, or if using the new input preserves the
425 sign of the previous promotion. */
426 if (!res
427 || TYPE_PRECISION (unprom->type) == orig_precision
428 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
430 unprom->set_op (op, dt, caster);
431 min_precision = TYPE_PRECISION (op_type);
433 /* Stop if we've already seen a promotion and if this
434 conversion does more than change the sign. */
435 else if (TYPE_PRECISION (op_type)
436 != TYPE_PRECISION (unprom->type))
437 break;
439 /* The sequence now extends to OP. */
440 res = op;
443 /* See whether OP is defined by a cast. Record it as CASTER if
444 the cast is potentially vectorizable. */
445 if (!def_stmt)
446 break;
447 caster = def_stmt_info;
449 /* Ignore pattern statements, since we don't link uses for them. */
450 if (caster
451 && single_use_p
452 && !STMT_VINFO_RELATED_STMT (caster)
453 && !has_single_use (res))
454 *single_use_p = false;
456 gassign *assign = dyn_cast <gassign *> (def_stmt);
457 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
458 break;
460 /* Continue with the input to the cast. */
461 op = gimple_assign_rhs1 (def_stmt);
462 op_type = TREE_TYPE (op);
464 return res;
467 /* OP is an integer operand to an operation that returns TYPE, and we
468 want to treat the operation as a widening one. So far we can treat
469 it as widening from *COMMON_TYPE.
471 Return true if OP is suitable for such a widening operation,
472 either widening from *COMMON_TYPE or from some supertype of it.
473 Update *COMMON_TYPE to the supertype in the latter case.
475 SHIFT_P is true if OP is a shift amount. */
477 static bool
478 vect_joust_widened_integer (tree type, bool shift_p, tree op,
479 tree *common_type)
481 /* Calculate the minimum precision required by OP, without changing
482 the sign of either operand. */
483 unsigned int precision;
484 if (shift_p)
486 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
487 return false;
488 precision = TREE_INT_CST_LOW (op);
490 else
492 precision = wi::min_precision (wi::to_widest (op),
493 TYPE_SIGN (*common_type));
494 if (precision * 2 > TYPE_PRECISION (type))
495 return false;
498 /* If OP requires a wider type, switch to that type. The checks
499 above ensure that this is still narrower than the result. */
500 precision = vect_element_precision (precision);
501 if (TYPE_PRECISION (*common_type) < precision)
502 *common_type = build_nonstandard_integer_type
503 (precision, TYPE_UNSIGNED (*common_type));
504 return true;
507 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
508 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
510 static bool
511 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
513 if (types_compatible_p (*common_type, new_type))
514 return true;
516 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
517 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
518 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
519 return true;
521 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
522 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
523 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
525 *common_type = new_type;
526 return true;
529 /* We have mismatched signs, with the signed type being
530 no wider than the unsigned type. In this case we need
531 a wider signed type. */
532 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
533 TYPE_PRECISION (new_type));
534 precision *= 2;
536 if (precision * 2 > TYPE_PRECISION (type))
537 return false;
539 *common_type = build_nonstandard_integer_type (precision, false);
540 return true;
543 /* Check whether STMT_INFO can be viewed as a tree of integer operations
544 in which each node either performs CODE or WIDENED_CODE, and where
545 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
546 specifies the maximum number of leaf operands. SHIFT_P says whether
547 CODE and WIDENED_CODE are some sort of shift.
549 If STMT_INFO is such a tree, return the number of leaf operands
550 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
551 to a type that (a) is narrower than the result of STMT_INFO and
552 (b) can hold all leaf operand values.
554 If SUBTYPE then allow that the signs of the operands
555 may differ in signs but not in precision. SUBTYPE is updated to reflect
556 this.
558 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
559 exists. */
561 static unsigned int
562 vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
563 tree_code widened_code, bool shift_p,
564 unsigned int max_nops,
565 vect_unpromoted_value *unprom, tree *common_type,
566 enum optab_subtype *subtype = NULL)
568 /* Check for an integer operation with the right code. */
569 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
570 if (!assign)
571 return 0;
573 tree_code rhs_code = gimple_assign_rhs_code (assign);
574 if (rhs_code != code && rhs_code != widened_code)
575 return 0;
577 tree type = TREE_TYPE (gimple_assign_lhs (assign));
578 if (!INTEGRAL_TYPE_P (type))
579 return 0;
581 /* Assume that both operands will be leaf operands. */
582 max_nops -= 2;
584 /* Check the operands. */
585 unsigned int next_op = 0;
586 for (unsigned int i = 0; i < 2; ++i)
588 vect_unpromoted_value *this_unprom = &unprom[next_op];
589 unsigned int nops = 1;
590 tree op = gimple_op (assign, i + 1);
591 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
593 /* We already have a common type from earlier operands.
594 Update it to account for OP. */
595 this_unprom->set_op (op, vect_constant_def);
596 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
597 return 0;
599 else
601 /* Only allow shifts by constants. */
602 if (shift_p && i == 1)
603 return 0;
605 if (rhs_code != code)
607 /* If rhs_code is widened_code, don't look through further
608 possible promotions, there is a promotion already embedded
609 in the WIDEN_*_EXPR. */
610 if (TREE_CODE (op) != SSA_NAME
611 || !INTEGRAL_TYPE_P (TREE_TYPE (op)))
612 return 0;
614 stmt_vec_info def_stmt_info;
615 gimple *def_stmt;
616 vect_def_type dt;
617 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info,
618 &def_stmt))
619 return 0;
620 this_unprom->set_op (op, dt, NULL);
622 else if (!vect_look_through_possible_promotion (vinfo, op,
623 this_unprom))
624 return 0;
626 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
628 /* The operand isn't widened. If STMT_INFO has the code
629 for an unwidened operation, recursively check whether
630 this operand is a node of the tree. */
631 if (rhs_code != code
632 || max_nops == 0
633 || this_unprom->dt != vect_internal_def)
634 return 0;
636 /* Give back the leaf slot allocated above now that we're
637 not treating this as a leaf operand. */
638 max_nops += 1;
640 /* Recursively process the definition of the operand. */
641 stmt_vec_info def_stmt_info
642 = vinfo->lookup_def (this_unprom->op);
643 nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
644 widened_code, shift_p, max_nops,
645 this_unprom, common_type,
646 subtype);
647 if (nops == 0)
648 return 0;
650 max_nops -= nops;
652 else
654 /* Make sure that the operand is narrower than the result. */
655 if (TYPE_PRECISION (this_unprom->type) * 2
656 > TYPE_PRECISION (type))
657 return 0;
659 /* Update COMMON_TYPE for the new operand. */
660 if (i == 0)
661 *common_type = this_unprom->type;
662 else if (!vect_joust_widened_type (type, this_unprom->type,
663 common_type))
665 if (subtype)
667 /* See if we can sign extend the smaller type. */
668 if (TYPE_PRECISION (this_unprom->type)
669 > TYPE_PRECISION (*common_type))
670 *common_type = this_unprom->type;
671 *subtype = optab_vector_mixed_sign;
673 else
674 return 0;
678 next_op += nops;
680 return next_op;
683 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
684 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
686 static tree
687 vect_recog_temp_ssa_var (tree type, gimple *stmt = NULL)
689 return make_temp_ssa_name (type, stmt, "patt");
692 /* STMT2_INFO describes a type conversion that could be split into STMT1
693 followed by a version of STMT2_INFO that takes NEW_RHS as its first
694 input. Try to do this using pattern statements, returning true on
695 success. */
697 static bool
698 vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
699 gimple *stmt1, tree vectype)
701 if (is_pattern_stmt_p (stmt2_info))
703 /* STMT2_INFO is part of a pattern. Get the statement to which
704 the pattern is attached. */
705 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
706 vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
708 if (dump_enabled_p ())
709 dump_printf_loc (MSG_NOTE, vect_location,
710 "Splitting pattern statement: %G", stmt2_info->stmt);
712 /* Since STMT2_INFO is a pattern statement, we can change it
713 in-situ without worrying about changing the code for the
714 containing block. */
715 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
717 if (dump_enabled_p ())
719 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
720 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
721 stmt2_info->stmt);
724 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
725 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
726 /* STMT2_INFO is the actual pattern statement. Add STMT1
727 to the end of the definition sequence. */
728 gimple_seq_add_stmt_without_update (def_seq, stmt1);
729 else
731 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
732 before it. */
733 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
734 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
736 return true;
738 else
740 /* STMT2_INFO doesn't yet have a pattern. Try to create a
741 two-statement pattern now. */
742 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
743 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
744 tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
745 if (!lhs_vectype)
746 return false;
748 if (dump_enabled_p ())
749 dump_printf_loc (MSG_NOTE, vect_location,
750 "Splitting statement: %G", stmt2_info->stmt);
752 /* Add STMT1 as a singleton pattern definition sequence. */
753 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
754 vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
755 gimple_seq_add_stmt_without_update (def_seq, stmt1);
757 /* Build the second of the two pattern statements. */
758 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
759 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
760 vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
762 if (dump_enabled_p ())
764 dump_printf_loc (MSG_NOTE, vect_location,
765 "into pattern statements: %G", stmt1);
766 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
767 (gimple *) new_stmt2);
770 return true;
774 /* Convert UNPROM to TYPE and return the result, adding new statements
775 to STMT_INFO's pattern definition statements if no better way is
776 available. VECTYPE is the vector form of TYPE.
778 If SUBTYPE then convert the type based on the subtype. */
780 static tree
781 vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
782 vect_unpromoted_value *unprom, tree vectype,
783 enum optab_subtype subtype = optab_default)
785 /* Update the type if the signs differ. */
786 if (subtype == optab_vector_mixed_sign)
788 gcc_assert (!TYPE_UNSIGNED (type));
789 if (TYPE_UNSIGNED (TREE_TYPE (unprom->op)))
791 type = unsigned_type_for (type);
792 vectype = unsigned_type_for (vectype);
796 /* Check for a no-op conversion. */
797 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
798 return unprom->op;
800 /* Allow the caller to create constant vect_unpromoted_values. */
801 if (TREE_CODE (unprom->op) == INTEGER_CST)
802 return wide_int_to_tree (type, wi::to_widest (unprom->op));
804 tree input = unprom->op;
805 if (unprom->caster)
807 tree lhs = gimple_get_lhs (unprom->caster->stmt);
808 tree lhs_type = TREE_TYPE (lhs);
810 /* If the result of the existing cast is the right width, use it
811 instead of the source of the cast. */
812 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
813 input = lhs;
814 /* If the precision we want is between the source and result
815 precisions of the existing cast, try splitting the cast into
816 two and tapping into a mid-way point. */
817 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
818 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
820 /* In order to preserve the semantics of the original cast,
821 give the mid-way point the same signedness as the input value.
823 It would be possible to use a signed type here instead if
824 TYPE is signed and UNPROM->TYPE is unsigned, but that would
825 make the sign of the midtype sensitive to the order in
826 which we process the statements, since the signedness of
827 TYPE is the signedness required by just one of possibly
828 many users. Also, unsigned promotions are usually as cheap
829 as or cheaper than signed ones, so it's better to keep an
830 unsigned promotion. */
831 tree midtype = build_nonstandard_integer_type
832 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
833 tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
834 if (vec_midtype)
836 input = vect_recog_temp_ssa_var (midtype, NULL);
837 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
838 unprom->op);
839 if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
840 vec_midtype))
841 append_pattern_def_seq (vinfo, stmt_info,
842 new_stmt, vec_midtype);
846 /* See if we can reuse an existing result. */
847 if (types_compatible_p (type, TREE_TYPE (input)))
848 return input;
851 /* We need a new conversion statement. */
852 tree new_op = vect_recog_temp_ssa_var (type, NULL);
853 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
855 /* If OP is an external value, see if we can insert the new statement
856 on an incoming edge. */
857 if (input == unprom->op && unprom->dt == vect_external_def)
858 if (edge e = vect_get_external_def_edge (vinfo, input))
860 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
861 gcc_assert (!new_bb);
862 return new_op;
865 /* As a (common) last resort, add the statement to the pattern itself. */
866 append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
867 return new_op;
870 /* Invoke vect_convert_input for N elements of UNPROM and store the
871 result in the corresponding elements of RESULT.
873 If SUBTYPE then convert the type based on the subtype. */
875 static void
876 vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
877 tree *result, tree type, vect_unpromoted_value *unprom,
878 tree vectype, enum optab_subtype subtype = optab_default)
880 for (unsigned int i = 0; i < n; ++i)
882 unsigned int j;
883 for (j = 0; j < i; ++j)
884 if (unprom[j].op == unprom[i].op)
885 break;
887 if (j < i)
888 result[i] = result[j];
889 else
890 result[i] = vect_convert_input (vinfo, stmt_info,
891 type, &unprom[i], vectype, subtype);
895 /* The caller has created a (possibly empty) sequence of pattern definition
896 statements followed by a single statement PATTERN_STMT. Cast the result
897 of this final statement to TYPE. If a new statement is needed, add
898 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
899 and return the new statement, otherwise return PATTERN_STMT as-is.
900 VECITYPE is the vector form of PATTERN_STMT's result type. */
902 static gimple *
903 vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
904 gimple *pattern_stmt, tree vecitype)
906 tree lhs = gimple_get_lhs (pattern_stmt);
907 if (!types_compatible_p (type, TREE_TYPE (lhs)))
909 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
910 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
911 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
913 return pattern_stmt;
916 /* Return true if STMT_VINFO describes a reduction for which reassociation
917 is allowed. If STMT_INFO is part of a group, assume that it's part of
918 a reduction chain and optimistically assume that all statements
919 except the last allow reassociation.
920 Also require it to have code CODE and to be a reduction
921 in the outermost loop. When returning true, store the operands in
922 *OP0_OUT and *OP1_OUT. */
924 static bool
925 vect_reassociating_reduction_p (vec_info *vinfo,
926 stmt_vec_info stmt_info, tree_code code,
927 tree *op0_out, tree *op1_out)
929 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
930 if (!loop_info)
931 return false;
933 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
934 if (!assign || gimple_assign_rhs_code (assign) != code)
935 return false;
937 /* We don't allow changing the order of the computation in the inner-loop
938 when doing outer-loop vectorization. */
939 class loop *loop = LOOP_VINFO_LOOP (loop_info);
940 if (loop && nested_in_vect_loop_p (loop, stmt_info))
941 return false;
943 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
945 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
946 code))
947 return false;
949 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
950 return false;
952 *op0_out = gimple_assign_rhs1 (assign);
953 *op1_out = gimple_assign_rhs2 (assign);
954 if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
955 std::swap (*op0_out, *op1_out);
956 return true;
959 /* match.pd function to match
960 (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
961 with conditions:
962 1) @1, @2, c, d, a, b are all integral type.
963 2) There's single_use for both @1 and @2.
964 3) a, c have same precision.
965 4) c and @1 have different precision.
966 5) c, d are the same type or they can differ in sign when convert is
967 truncation.
969 record a and c and d and @3. */
971 extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree));
973 /* Function vect_recog_cond_expr_convert
975 Try to find the following pattern:
977 TYPE_AB A,B;
978 TYPE_CD C,D;
979 TYPE_E E;
980 TYPE_E op_true = (TYPE_E) A;
981 TYPE_E op_false = (TYPE_E) B;
983 E = C cmp D ? op_true : op_false;
985 where
986 TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
987 TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
988 single_use of op_true and op_false.
989 TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
991 Input:
993 * STMT_VINFO: The stmt from which the pattern search begins.
994 here it starts with E = c cmp D ? op_true : op_false;
996 Output:
998 TYPE1 E' = C cmp D ? A : B;
999 TYPE3 E = (TYPE3) E';
1001 There may extra nop_convert for A or B to handle different signness.
1003 * TYPE_OUT: The vector type of the output of this pattern.
1005 * Return value: A new stmt that will be used to replace the sequence of
1006 stmts that constitute the pattern. In this case it will be:
1007 E = (TYPE3)E';
1008 E' = C cmp D ? A : B; is recorded in pattern definition statements; */
1010 static gimple *
1011 vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
1012 stmt_vec_info stmt_vinfo, tree *type_out)
1014 gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
1015 tree lhs, match[4], temp, type, new_lhs, op2;
1016 gimple *cond_stmt;
1017 gimple *pattern_stmt;
1019 if (!last_stmt)
1020 return NULL;
1022 lhs = gimple_assign_lhs (last_stmt);
1024 /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
1025 TYPE_PRECISION (A) == TYPE_PRECISION (C). */
1026 if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
1027 return NULL;
1029 vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
1031 op2 = match[2];
1032 type = TREE_TYPE (match[1]);
1033 if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
1035 op2 = vect_recog_temp_ssa_var (type, NULL);
1036 gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
1037 append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt,
1038 get_vectype_for_scalar_type (vinfo, type));
1041 temp = vect_recog_temp_ssa_var (type, NULL);
1042 cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
1043 match[1], op2));
1044 append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt,
1045 get_vectype_for_scalar_type (vinfo, type));
1046 new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
1047 pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp);
1048 *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
1050 if (dump_enabled_p ())
1051 dump_printf_loc (MSG_NOTE, vect_location,
1052 "created pattern stmt: %G", pattern_stmt);
1053 return pattern_stmt;
1056 /* Function vect_recog_dot_prod_pattern
1058 Try to find the following pattern:
1060 type1a x_t
1061 type1b y_t;
1062 TYPE1 prod;
1063 TYPE2 sum = init;
1064 loop:
1065 sum_0 = phi <init, sum_1>
1066 S1 x_t = ...
1067 S2 y_t = ...
1068 S3 x_T = (TYPE1) x_t;
1069 S4 y_T = (TYPE1) y_t;
1070 S5 prod = x_T * y_T;
1071 [S6 prod = (TYPE2) prod; #optional]
1072 S7 sum_1 = prod + sum_0;
1074 where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
1075 the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
1076 'type1a' and 'type1b' can differ.
1078 Input:
1080 * STMT_VINFO: The stmt from which the pattern search begins. In the
1081 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
1082 will be detected.
1084 Output:
1086 * TYPE_OUT: The type of the output of this pattern.
1088 * Return value: A new stmt that will be used to replace the sequence of
1089 stmts that constitute the pattern. In this case it will be:
1090 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
1092 Note: The dot-prod idiom is a widening reduction pattern that is
1093 vectorized without preserving all the intermediate results. It
1094 produces only N/2 (widened) results (by summing up pairs of
1095 intermediate results) rather than all N results. Therefore, we
1096 cannot allow this pattern when we want to get all the results and in
1097 the correct order (as is the case when this computation is in an
1098 inner-loop nested in an outer-loop that us being vectorized). */
1100 static gimple *
1101 vect_recog_dot_prod_pattern (vec_info *vinfo,
1102 stmt_vec_info stmt_vinfo, tree *type_out)
1104 tree oprnd0, oprnd1;
1105 gimple *last_stmt = stmt_vinfo->stmt;
1106 tree type, half_type;
1107 gimple *pattern_stmt;
1108 tree var;
1110 /* Look for the following pattern
1111 DX = (TYPE1) X;
1112 DY = (TYPE1) Y;
1113 DPROD = DX * DY;
1114 DDPROD = (TYPE2) DPROD;
1115 sum_1 = DDPROD + sum_0;
1116 In which
1117 - DX is double the size of X
1118 - DY is double the size of Y
1119 - DX, DY, DPROD all have the same type but the sign
1120 between X, Y and DPROD can differ.
1121 - sum is the same size of DPROD or bigger
1122 - sum has been recognized as a reduction variable.
1124 This is equivalent to:
1125 DPROD = X w* Y; #widen mult
1126 sum_1 = DPROD w+ sum_0; #widen summation
1128 DPROD = X w* Y; #widen mult
1129 sum_1 = DPROD + sum_0; #summation
1132 /* Starting from LAST_STMT, follow the defs of its uses in search
1133 of the above pattern. */
1135 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1136 &oprnd0, &oprnd1))
1137 return NULL;
1139 type = TREE_TYPE (gimple_get_lhs (last_stmt));
1141 vect_unpromoted_value unprom_mult;
1142 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
1144 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1145 we know that oprnd1 is the reduction variable (defined by a loop-header
1146 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1147 Left to check that oprnd0 is defined by a (widen_)mult_expr */
1148 if (!oprnd0)
1149 return NULL;
1151 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
1152 if (!mult_vinfo)
1153 return NULL;
1155 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1156 inside the loop (in case we are analyzing an outer-loop). */
1157 vect_unpromoted_value unprom0[2];
1158 enum optab_subtype subtype = optab_vector;
1159 if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
1160 false, 2, unprom0, &half_type, &subtype))
1161 return NULL;
1163 /* If there are two widening operations, make sure they agree on the sign
1164 of the extension. The result of an optab_vector_mixed_sign operation
1165 is signed; otherwise, the result has the same sign as the operands. */
1166 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
1167 && (subtype == optab_vector_mixed_sign
1168 ? TYPE_UNSIGNED (unprom_mult.type)
1169 : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
1170 return NULL;
1172 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
1174 /* If the inputs have mixed signs, canonicalize on using the signed
1175 input type for analysis. This also helps when emulating mixed-sign
1176 operations using signed operations. */
1177 if (subtype == optab_vector_mixed_sign)
1178 half_type = signed_type_for (half_type);
1180 tree half_vectype;
1181 if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1182 type_out, &half_vectype, subtype))
1184 /* We can emulate a mixed-sign dot-product using a sequence of
1185 signed dot-products; see vect_emulate_mixed_dot_prod for details. */
1186 if (subtype != optab_vector_mixed_sign
1187 || !vect_supportable_direct_optab_p (vinfo, signed_type_for (type),
1188 DOT_PROD_EXPR, half_type,
1189 type_out, &half_vectype,
1190 optab_vector))
1191 return NULL;
1193 *type_out = signed_or_unsigned_type_for (TYPE_UNSIGNED (type),
1194 *type_out);
1197 /* Get the inputs in the appropriate types. */
1198 tree mult_oprnd[2];
1199 vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
1200 unprom0, half_vectype, subtype);
1202 var = vect_recog_temp_ssa_var (type, NULL);
1203 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1204 mult_oprnd[0], mult_oprnd[1], oprnd1);
1206 return pattern_stmt;
1210 /* Function vect_recog_sad_pattern
1212 Try to find the following Sum of Absolute Difference (SAD) pattern:
1214 type x_t, y_t;
1215 signed TYPE1 diff, abs_diff;
1216 TYPE2 sum = init;
1217 loop:
1218 sum_0 = phi <init, sum_1>
1219 S1 x_t = ...
1220 S2 y_t = ...
1221 S3 x_T = (TYPE1) x_t;
1222 S4 y_T = (TYPE1) y_t;
1223 S5 diff = x_T - y_T;
1224 S6 abs_diff = ABS_EXPR <diff>;
1225 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1226 S8 sum_1 = abs_diff + sum_0;
1228 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1229 same size of 'TYPE1' or bigger. This is a special case of a reduction
1230 computation.
1232 Input:
1234 * STMT_VINFO: The stmt from which the pattern search begins. In the
1235 example, when this function is called with S8, the pattern
1236 {S3,S4,S5,S6,S7,S8} will be detected.
1238 Output:
1240 * TYPE_OUT: The type of the output of this pattern.
1242 * Return value: A new stmt that will be used to replace the sequence of
1243 stmts that constitute the pattern. In this case it will be:
1244 SAD_EXPR <x_t, y_t, sum_0>
1247 static gimple *
1248 vect_recog_sad_pattern (vec_info *vinfo,
1249 stmt_vec_info stmt_vinfo, tree *type_out)
1251 gimple *last_stmt = stmt_vinfo->stmt;
1252 tree half_type;
1254 /* Look for the following pattern
1255 DX = (TYPE1) X;
1256 DY = (TYPE1) Y;
1257 DDIFF = DX - DY;
1258 DAD = ABS_EXPR <DDIFF>;
1259 DDPROD = (TYPE2) DPROD;
1260 sum_1 = DAD + sum_0;
1261 In which
1262 - DX is at least double the size of X
1263 - DY is at least double the size of Y
1264 - DX, DY, DDIFF, DAD all have the same type
1265 - sum is the same size of DAD or bigger
1266 - sum has been recognized as a reduction variable.
1268 This is equivalent to:
1269 DDIFF = X w- Y; #widen sub
1270 DAD = ABS_EXPR <DDIFF>;
1271 sum_1 = DAD w+ sum_0; #widen summation
1273 DDIFF = X w- Y; #widen sub
1274 DAD = ABS_EXPR <DDIFF>;
1275 sum_1 = DAD + sum_0; #summation
1278 /* Starting from LAST_STMT, follow the defs of its uses in search
1279 of the above pattern. */
1281 tree plus_oprnd0, plus_oprnd1;
1282 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1283 &plus_oprnd0, &plus_oprnd1))
1284 return NULL;
1286 tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
1288 /* Any non-truncating sequence of conversions is OK here, since
1289 with a successful match, the result of the ABS(U) is known to fit
1290 within the nonnegative range of the result type. (It cannot be the
1291 negative of the minimum signed value due to the range of the widening
1292 MINUS_EXPR.) */
1293 vect_unpromoted_value unprom_abs;
1294 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1295 &unprom_abs);
1297 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1298 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1299 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1300 Then check that plus_oprnd0 is defined by an abs_expr. */
1302 if (!plus_oprnd0)
1303 return NULL;
1305 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1306 if (!abs_stmt_vinfo)
1307 return NULL;
1309 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1310 inside the loop (in case we are analyzing an outer-loop). */
1311 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1312 if (!abs_stmt
1313 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1314 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1315 return NULL;
1317 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1318 tree abs_type = TREE_TYPE (abs_oprnd);
1319 if (TYPE_UNSIGNED (abs_type))
1320 return NULL;
1322 /* Peel off conversions from the ABS input. This can involve sign
1323 changes (e.g. from an unsigned subtraction to a signed ABS input)
1324 or signed promotion, but it can't include unsigned promotion.
1325 (Note that ABS of an unsigned promotion should have been folded
1326 away before now anyway.) */
1327 vect_unpromoted_value unprom_diff;
1328 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1329 &unprom_diff);
1330 if (!abs_oprnd)
1331 return NULL;
1332 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1333 && TYPE_UNSIGNED (unprom_diff.type))
1334 return NULL;
1336 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1337 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1338 if (!diff_stmt_vinfo)
1339 return NULL;
1341 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1342 inside the loop (in case we are analyzing an outer-loop). */
1343 vect_unpromoted_value unprom[2];
1344 if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
1345 false, 2, unprom, &half_type))
1346 return NULL;
1348 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1350 tree half_vectype;
1351 if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1352 type_out, &half_vectype))
1353 return NULL;
1355 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1356 tree sad_oprnd[2];
1357 vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
1358 unprom, half_vectype);
1360 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1361 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1362 sad_oprnd[1], plus_oprnd1);
1364 return pattern_stmt;
1367 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1368 so that it can be treated as though it had the form:
1370 A_TYPE a;
1371 B_TYPE b;
1372 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1373 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1374 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1375 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1376 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1378 Try to replace the pattern with:
1380 A_TYPE a;
1381 B_TYPE b;
1382 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1383 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1384 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1385 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1387 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1389 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1390 name of the pattern being matched, for dump purposes. */
1392 static gimple *
1393 vect_recog_widen_op_pattern (vec_info *vinfo,
1394 stmt_vec_info last_stmt_info, tree *type_out,
1395 tree_code orig_code, tree_code wide_code,
1396 bool shift_p, const char *name)
1398 gimple *last_stmt = last_stmt_info->stmt;
1400 vect_unpromoted_value unprom[2];
1401 tree half_type;
1402 if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
1403 shift_p, 2, unprom, &half_type))
1404 return NULL;
1406 /* Pattern detected. */
1407 vect_pattern_detected (name, last_stmt);
1409 tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
1410 tree itype = type;
1411 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1412 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1413 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1414 TYPE_UNSIGNED (half_type));
1416 /* Check target support */
1417 tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1418 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1419 tree ctype = itype;
1420 tree vecctype = vecitype;
1421 if (orig_code == MINUS_EXPR
1422 && TYPE_UNSIGNED (itype)
1423 && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
1425 /* Subtraction is special, even if half_type is unsigned and no matter
1426 whether type is signed or unsigned, if type is wider than itype,
1427 we need to sign-extend from the widening operation result to the
1428 result type.
1429 Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1430 itype unsigned short and type either int or unsigned int.
1431 Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1432 (unsigned short) 0xffff, but for type int we want the result -1
1433 and for type unsigned int 0xffffffff rather than 0xffff. */
1434 ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
1435 vecctype = get_vectype_for_scalar_type (vinfo, ctype);
1438 enum tree_code dummy_code;
1439 int dummy_int;
1440 auto_vec<tree> dummy_vec;
1441 if (!vectype
1442 || !vecitype
1443 || !vecctype
1444 || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1445 vecitype, vectype,
1446 &dummy_code, &dummy_code,
1447 &dummy_int, &dummy_vec))
1448 return NULL;
1450 *type_out = get_vectype_for_scalar_type (vinfo, type);
1451 if (!*type_out)
1452 return NULL;
1454 tree oprnd[2];
1455 vect_convert_inputs (vinfo, last_stmt_info,
1456 2, oprnd, half_type, unprom, vectype);
1458 tree var = vect_recog_temp_ssa_var (itype, NULL);
1459 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1460 oprnd[0], oprnd[1]);
1462 if (vecctype != vecitype)
1463 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
1464 pattern_stmt, vecitype);
1466 return vect_convert_output (vinfo, last_stmt_info,
1467 type, pattern_stmt, vecctype);
1470 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1471 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1473 static gimple *
1474 vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1475 tree *type_out)
1477 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1478 MULT_EXPR, WIDEN_MULT_EXPR, false,
1479 "vect_recog_widen_mult_pattern");
1482 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1483 to WIDEN_PLUS_EXPR. See vect_recog_widen_op_pattern for details. */
1485 static gimple *
1486 vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1487 tree *type_out)
1489 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1490 PLUS_EXPR, WIDEN_PLUS_EXPR, false,
1491 "vect_recog_widen_plus_pattern");
1494 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1495 to WIDEN_MINUS_EXPR. See vect_recog_widen_op_pattern for details. */
1496 static gimple *
1497 vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1498 tree *type_out)
1500 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1501 MINUS_EXPR, WIDEN_MINUS_EXPR, false,
1502 "vect_recog_widen_minus_pattern");
1505 /* Function vect_recog_ctz_ffs_pattern
1507 Try to find the following pattern:
1509 TYPE1 A;
1510 TYPE1 B;
1512 B = __builtin_ctz{,l,ll} (A);
1516 B = __builtin_ffs{,l,ll} (A);
1518 Input:
1520 * STMT_VINFO: The stmt from which the pattern search begins.
1521 here it starts with B = __builtin_* (A);
1523 Output:
1525 * TYPE_OUT: The vector type of the output of this pattern.
1527 * Return value: A new stmt that will be used to replace the sequence of
1528 stmts that constitute the pattern, using clz or popcount builtins. */
1530 static gimple *
1531 vect_recog_ctz_ffs_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1532 tree *type_out)
1534 gimple *call_stmt = stmt_vinfo->stmt;
1535 gimple *pattern_stmt;
1536 tree rhs_oprnd, rhs_type, lhs_oprnd, lhs_type, vec_type, vec_rhs_type;
1537 tree new_var;
1538 internal_fn ifn = IFN_LAST, ifnnew = IFN_LAST;
1539 bool defined_at_zero = true, defined_at_zero_new = false;
1540 int val = 0, val_new = 0;
1541 int prec;
1542 int sub = 0, add = 0;
1543 location_t loc;
1545 if (!is_gimple_call (call_stmt))
1546 return NULL;
1548 if (gimple_call_num_args (call_stmt) != 1)
1549 return NULL;
1551 rhs_oprnd = gimple_call_arg (call_stmt, 0);
1552 rhs_type = TREE_TYPE (rhs_oprnd);
1553 lhs_oprnd = gimple_call_lhs (call_stmt);
1554 if (!lhs_oprnd)
1555 return NULL;
1556 lhs_type = TREE_TYPE (lhs_oprnd);
1557 if (!INTEGRAL_TYPE_P (lhs_type)
1558 || !INTEGRAL_TYPE_P (rhs_type)
1559 || !type_has_mode_precision_p (rhs_type)
1560 || TREE_CODE (rhs_oprnd) != SSA_NAME)
1561 return NULL;
1563 switch (gimple_call_combined_fn (call_stmt))
1565 CASE_CFN_CTZ:
1566 ifn = IFN_CTZ;
1567 if (!gimple_call_internal_p (call_stmt)
1568 || CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
1569 val) != 2)
1570 defined_at_zero = false;
1571 break;
1572 CASE_CFN_FFS:
1573 ifn = IFN_FFS;
1574 break;
1575 default:
1576 return NULL;
1579 prec = TYPE_PRECISION (rhs_type);
1580 loc = gimple_location (call_stmt);
1582 vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
1583 if (!vec_type)
1584 return NULL;
1586 vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
1587 if (!vec_rhs_type)
1588 return NULL;
1590 /* Do it only if the backend doesn't have ctz<vector_mode>2 or
1591 ffs<vector_mode>2 pattern but does have clz<vector_mode>2 or
1592 popcount<vector_mode>2. */
1593 if (!vec_type
1594 || direct_internal_fn_supported_p (ifn, vec_rhs_type,
1595 OPTIMIZE_FOR_SPEED))
1596 return NULL;
1598 if (ifn == IFN_FFS
1599 && direct_internal_fn_supported_p (IFN_CTZ, vec_rhs_type,
1600 OPTIMIZE_FOR_SPEED))
1602 ifnnew = IFN_CTZ;
1603 defined_at_zero_new
1604 = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
1605 val_new) == 2;
1607 else if (direct_internal_fn_supported_p (IFN_CLZ, vec_rhs_type,
1608 OPTIMIZE_FOR_SPEED))
1610 ifnnew = IFN_CLZ;
1611 defined_at_zero_new
1612 = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
1613 val_new) == 2;
1615 if ((ifnnew == IFN_LAST
1616 || (defined_at_zero && !defined_at_zero_new))
1617 && direct_internal_fn_supported_p (IFN_POPCOUNT, vec_rhs_type,
1618 OPTIMIZE_FOR_SPEED))
1620 ifnnew = IFN_POPCOUNT;
1621 defined_at_zero_new = true;
1622 val_new = prec;
1624 if (ifnnew == IFN_LAST)
1625 return NULL;
1627 vect_pattern_detected ("vec_recog_ctz_ffs_pattern", call_stmt);
1629 if ((ifnnew == IFN_CLZ
1630 && defined_at_zero
1631 && defined_at_zero_new
1632 && val == prec
1633 && val_new == prec)
1634 || (ifnnew == IFN_POPCOUNT && ifn == IFN_CTZ))
1636 /* .CTZ (X) = PREC - .CLZ ((X - 1) & ~X)
1637 .CTZ (X) = .POPCOUNT ((X - 1) & ~X). */
1638 if (ifnnew == IFN_CLZ)
1639 sub = prec;
1640 val_new = prec;
1642 if (!TYPE_UNSIGNED (rhs_type))
1644 rhs_type = unsigned_type_for (rhs_type);
1645 vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
1646 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1647 pattern_stmt = gimple_build_assign (new_var, NOP_EXPR, rhs_oprnd);
1648 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
1649 vec_rhs_type);
1650 rhs_oprnd = new_var;
1653 tree m1 = vect_recog_temp_ssa_var (rhs_type, NULL);
1654 pattern_stmt = gimple_build_assign (m1, PLUS_EXPR, rhs_oprnd,
1655 build_int_cst (rhs_type, -1));
1656 gimple_set_location (pattern_stmt, loc);
1657 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1659 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1660 pattern_stmt = gimple_build_assign (new_var, BIT_NOT_EXPR, rhs_oprnd);
1661 gimple_set_location (pattern_stmt, loc);
1662 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1663 rhs_oprnd = new_var;
1665 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1666 pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
1667 m1, rhs_oprnd);
1668 gimple_set_location (pattern_stmt, loc);
1669 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1670 rhs_oprnd = new_var;
1672 else if (ifnnew == IFN_CLZ)
1674 /* .CTZ (X) = (PREC - 1) - .CLZ (X & -X)
1675 .FFS (X) = PREC - .CLZ (X & -X). */
1676 sub = prec - (ifn == IFN_CTZ);
1677 val_new = sub - val_new;
1679 tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
1680 pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
1681 gimple_set_location (pattern_stmt, loc);
1682 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1684 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1685 pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
1686 rhs_oprnd, neg);
1687 gimple_set_location (pattern_stmt, loc);
1688 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1689 rhs_oprnd = new_var;
1691 else if (ifnnew == IFN_POPCOUNT)
1693 /* .CTZ (X) = PREC - .POPCOUNT (X | -X)
1694 .FFS (X) = (PREC + 1) - .POPCOUNT (X | -X). */
1695 sub = prec + (ifn == IFN_FFS);
1696 val_new = sub;
1698 tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
1699 pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
1700 gimple_set_location (pattern_stmt, loc);
1701 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1703 new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1704 pattern_stmt = gimple_build_assign (new_var, BIT_IOR_EXPR,
1705 rhs_oprnd, neg);
1706 gimple_set_location (pattern_stmt, loc);
1707 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1708 rhs_oprnd = new_var;
1710 else if (ifnnew == IFN_CTZ)
1712 /* .FFS (X) = .CTZ (X) + 1. */
1713 add = 1;
1714 val_new++;
1717 /* Create B = .IFNNEW (A). */
1718 new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1719 pattern_stmt = gimple_build_call_internal (ifnnew, 1, rhs_oprnd);
1720 gimple_call_set_lhs (pattern_stmt, new_var);
1721 gimple_set_location (pattern_stmt, loc);
1722 *type_out = vec_type;
1724 if (sub)
1726 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
1727 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1728 pattern_stmt = gimple_build_assign (ret_var, MINUS_EXPR,
1729 build_int_cst (lhs_type, sub),
1730 new_var);
1731 gimple_set_location (pattern_stmt, loc);
1732 new_var = ret_var;
1734 else if (add)
1736 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
1737 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1738 pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
1739 build_int_cst (lhs_type, add));
1740 gimple_set_location (pattern_stmt, loc);
1741 new_var = ret_var;
1744 if (defined_at_zero
1745 && (!defined_at_zero_new || val != val_new))
1747 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
1748 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1749 rhs_oprnd = gimple_call_arg (call_stmt, 0);
1750 rhs_type = TREE_TYPE (rhs_oprnd);
1751 tree cmp = build2_loc (loc, NE_EXPR, boolean_type_node,
1752 rhs_oprnd, build_zero_cst (rhs_type));
1753 pattern_stmt = gimple_build_assign (ret_var, COND_EXPR, cmp,
1754 new_var,
1755 build_int_cst (lhs_type, val));
1758 if (dump_enabled_p ())
1759 dump_printf_loc (MSG_NOTE, vect_location,
1760 "created pattern stmt: %G", pattern_stmt);
1762 return pattern_stmt;
1765 /* Function vect_recog_popcount_clz_ctz_ffs_pattern
1767 Try to find the following pattern:
1769 UTYPE1 A;
1770 TYPE1 B;
1771 UTYPE2 temp_in;
1772 TYPE3 temp_out;
1773 temp_in = (UTYPE2)A;
1775 temp_out = __builtin_popcount{,l,ll} (temp_in);
1776 B = (TYPE1) temp_out;
1778 TYPE2 may or may not be equal to TYPE3.
1779 i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
1780 i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
1782 Input:
1784 * STMT_VINFO: The stmt from which the pattern search begins.
1785 here it starts with B = (TYPE1) temp_out;
1787 Output:
1789 * TYPE_OUT: The vector type of the output of this pattern.
1791 * Return value: A new stmt that will be used to replace the sequence of
1792 stmts that constitute the pattern. In this case it will be:
1793 B = .POPCOUNT (A);
1795 Similarly for clz, ctz and ffs.
1798 static gimple *
1799 vect_recog_popcount_clz_ctz_ffs_pattern (vec_info *vinfo,
1800 stmt_vec_info stmt_vinfo,
1801 tree *type_out)
1803 gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
1804 gimple *call_stmt, *pattern_stmt;
1805 tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
1806 internal_fn ifn = IFN_LAST;
1807 int addend = 0;
1809 /* Find B = (TYPE1) temp_out. */
1810 if (!last_stmt)
1811 return NULL;
1812 tree_code code = gimple_assign_rhs_code (last_stmt);
1813 if (!CONVERT_EXPR_CODE_P (code))
1814 return NULL;
1816 lhs_oprnd = gimple_assign_lhs (last_stmt);
1817 lhs_type = TREE_TYPE (lhs_oprnd);
1818 if (!INTEGRAL_TYPE_P (lhs_type))
1819 return NULL;
1821 rhs_oprnd = gimple_assign_rhs1 (last_stmt);
1822 if (TREE_CODE (rhs_oprnd) != SSA_NAME
1823 || !has_single_use (rhs_oprnd))
1824 return NULL;
1825 call_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
1827 /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
1828 if (!is_gimple_call (call_stmt))
1829 return NULL;
1830 switch (gimple_call_combined_fn (call_stmt))
1832 int val;
1833 CASE_CFN_POPCOUNT:
1834 ifn = IFN_POPCOUNT;
1835 break;
1836 CASE_CFN_CLZ:
1837 ifn = IFN_CLZ;
1838 /* Punt if call result is unsigned and defined value at zero
1839 is negative, as the negative value doesn't extend correctly. */
1840 if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
1841 && gimple_call_internal_p (call_stmt)
1842 && CLZ_DEFINED_VALUE_AT_ZERO
1843 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
1844 && val < 0)
1845 return NULL;
1846 break;
1847 CASE_CFN_CTZ:
1848 ifn = IFN_CTZ;
1849 /* Punt if call result is unsigned and defined value at zero
1850 is negative, as the negative value doesn't extend correctly. */
1851 if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
1852 && gimple_call_internal_p (call_stmt)
1853 && CTZ_DEFINED_VALUE_AT_ZERO
1854 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
1855 && val < 0)
1856 return NULL;
1857 break;
1858 CASE_CFN_FFS:
1859 ifn = IFN_FFS;
1860 break;
1861 default:
1862 return NULL;
1865 if (gimple_call_num_args (call_stmt) != 1)
1866 return NULL;
1868 rhs_oprnd = gimple_call_arg (call_stmt, 0);
1869 vect_unpromoted_value unprom_diff;
1870 rhs_origin
1871 = vect_look_through_possible_promotion (vinfo, rhs_oprnd, &unprom_diff);
1873 if (!rhs_origin)
1874 return NULL;
1876 /* Input and output of .POPCOUNT should be same-precision integer. */
1877 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type))
1878 return NULL;
1880 /* Also A should be unsigned or same precision as temp_in, otherwise
1881 different builtins/internal functions have different behaviors. */
1882 if (TYPE_PRECISION (unprom_diff.type)
1883 != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))
1884 switch (ifn)
1886 case IFN_POPCOUNT:
1887 /* For popcount require zero extension, which doesn't add any
1888 further bits to the count. */
1889 if (!TYPE_UNSIGNED (unprom_diff.type))
1890 return NULL;
1891 break;
1892 case IFN_CLZ:
1893 /* clzll (x) == clz (x) + 32 for unsigned x != 0, so ok
1894 if it is undefined at zero or if it matches also for the
1895 defined value there. */
1896 if (!TYPE_UNSIGNED (unprom_diff.type))
1897 return NULL;
1898 if (!type_has_mode_precision_p (lhs_type)
1899 || !type_has_mode_precision_p (TREE_TYPE (rhs_oprnd)))
1900 return NULL;
1901 addend = (TYPE_PRECISION (TREE_TYPE (rhs_oprnd))
1902 - TYPE_PRECISION (lhs_type));
1903 if (gimple_call_internal_p (call_stmt))
1905 int val1, val2;
1906 int d1
1907 = CLZ_DEFINED_VALUE_AT_ZERO
1908 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val1);
1909 int d2
1910 = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
1911 val2);
1912 if (d1 != 2)
1913 break;
1914 if (d2 != 2 || val1 != val2 + addend)
1915 return NULL;
1917 break;
1918 case IFN_CTZ:
1919 /* ctzll (x) == ctz (x) for unsigned or signed x != 0, so ok
1920 if it is undefined at zero or if it matches also for the
1921 defined value there. */
1922 if (gimple_call_internal_p (call_stmt))
1924 int val1, val2;
1925 int d1
1926 = CTZ_DEFINED_VALUE_AT_ZERO
1927 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val1);
1928 int d2
1929 = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
1930 val2);
1931 if (d1 != 2)
1932 break;
1933 if (d2 != 2 || val1 != val2)
1934 return NULL;
1936 break;
1937 case IFN_FFS:
1938 /* ffsll (x) == ffs (x) for unsigned or signed x. */
1939 break;
1940 default:
1941 gcc_unreachable ();
1944 vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
1945 /* Do it only if the backend has popcount<vector_mode>2 etc. pattern. */
1946 if (!vec_type)
1947 return NULL;
1949 bool supported
1950 = direct_internal_fn_supported_p (ifn, vec_type, OPTIMIZE_FOR_SPEED);
1951 if (!supported)
1952 switch (ifn)
1954 case IFN_POPCOUNT:
1955 case IFN_CLZ:
1956 return NULL;
1957 case IFN_FFS:
1958 /* vect_recog_ctz_ffs_pattern can implement ffs using ctz. */
1959 if (direct_internal_fn_supported_p (IFN_CTZ, vec_type,
1960 OPTIMIZE_FOR_SPEED))
1961 break;
1962 /* FALLTHRU */
1963 case IFN_CTZ:
1964 /* vect_recog_ctz_ffs_pattern can implement ffs or ctz using
1965 clz or popcount. */
1966 if (direct_internal_fn_supported_p (IFN_CLZ, vec_type,
1967 OPTIMIZE_FOR_SPEED))
1968 break;
1969 if (direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
1970 OPTIMIZE_FOR_SPEED))
1971 break;
1972 return NULL;
1973 default:
1974 gcc_unreachable ();
1977 vect_pattern_detected ("vec_recog_popcount_clz_ctz_ffs_pattern",
1978 call_stmt);
1980 /* Create B = .POPCOUNT (A). */
1981 new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1982 pattern_stmt = gimple_build_call_internal (ifn, 1, unprom_diff.op);
1983 gimple_call_set_lhs (pattern_stmt, new_var);
1984 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1985 *type_out = vec_type;
1987 if (dump_enabled_p ())
1988 dump_printf_loc (MSG_NOTE, vect_location,
1989 "created pattern stmt: %G", pattern_stmt);
1991 if (addend)
1993 gcc_assert (supported);
1994 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
1995 tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1996 pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
1997 build_int_cst (lhs_type, addend));
1999 else if (!supported)
2001 stmt_vec_info new_stmt_info = vinfo->add_stmt (pattern_stmt);
2002 STMT_VINFO_VECTYPE (new_stmt_info) = vec_type;
2003 pattern_stmt
2004 = vect_recog_ctz_ffs_pattern (vinfo, new_stmt_info, type_out);
2005 if (pattern_stmt == NULL)
2006 return NULL;
2007 if (gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (new_stmt_info))
2009 gimple_seq *pseq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
2010 gimple_seq_add_seq_without_update (pseq, seq);
2013 return pattern_stmt;
2016 /* Function vect_recog_pow_pattern
2018 Try to find the following pattern:
2020 x = POW (y, N);
2022 with POW being one of pow, powf, powi, powif and N being
2023 either 2 or 0.5.
2025 Input:
2027 * STMT_VINFO: The stmt from which the pattern search begins.
2029 Output:
2031 * TYPE_OUT: The type of the output of this pattern.
2033 * Return value: A new stmt that will be used to replace the sequence of
2034 stmts that constitute the pattern. In this case it will be:
2035 x = x * x
2037 x = sqrt (x)
2040 static gimple *
2041 vect_recog_pow_pattern (vec_info *vinfo,
2042 stmt_vec_info stmt_vinfo, tree *type_out)
2044 gimple *last_stmt = stmt_vinfo->stmt;
2045 tree base, exp;
2046 gimple *stmt;
2047 tree var;
2049 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
2050 return NULL;
2052 switch (gimple_call_combined_fn (last_stmt))
2054 CASE_CFN_POW:
2055 CASE_CFN_POWI:
2056 break;
2058 default:
2059 return NULL;
2062 base = gimple_call_arg (last_stmt, 0);
2063 exp = gimple_call_arg (last_stmt, 1);
2064 if (TREE_CODE (exp) != REAL_CST
2065 && TREE_CODE (exp) != INTEGER_CST)
2067 if (flag_unsafe_math_optimizations
2068 && TREE_CODE (base) == REAL_CST
2069 && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
2071 combined_fn log_cfn;
2072 built_in_function exp_bfn;
2073 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
2075 case BUILT_IN_POW:
2076 log_cfn = CFN_BUILT_IN_LOG;
2077 exp_bfn = BUILT_IN_EXP;
2078 break;
2079 case BUILT_IN_POWF:
2080 log_cfn = CFN_BUILT_IN_LOGF;
2081 exp_bfn = BUILT_IN_EXPF;
2082 break;
2083 case BUILT_IN_POWL:
2084 log_cfn = CFN_BUILT_IN_LOGL;
2085 exp_bfn = BUILT_IN_EXPL;
2086 break;
2087 default:
2088 return NULL;
2090 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
2091 tree exp_decl = builtin_decl_implicit (exp_bfn);
2092 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
2093 does that, but if C is a power of 2, we want to use
2094 exp2 (log2 (C) * x) in the non-vectorized version, but for
2095 vectorization we don't have vectorized exp2. */
2096 if (logc
2097 && TREE_CODE (logc) == REAL_CST
2098 && exp_decl
2099 && lookup_attribute ("omp declare simd",
2100 DECL_ATTRIBUTES (exp_decl)))
2102 cgraph_node *node = cgraph_node::get_create (exp_decl);
2103 if (node->simd_clones == NULL)
2105 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
2106 || node->definition)
2107 return NULL;
2108 expand_simd_clones (node);
2109 if (node->simd_clones == NULL)
2110 return NULL;
2112 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
2113 if (!*type_out)
2114 return NULL;
2115 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2116 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
2117 append_pattern_def_seq (vinfo, stmt_vinfo, g);
2118 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2119 g = gimple_build_call (exp_decl, 1, def);
2120 gimple_call_set_lhs (g, res);
2121 return g;
2125 return NULL;
2128 /* We now have a pow or powi builtin function call with a constant
2129 exponent. */
2131 /* Catch squaring. */
2132 if ((tree_fits_shwi_p (exp)
2133 && tree_to_shwi (exp) == 2)
2134 || (TREE_CODE (exp) == REAL_CST
2135 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
2137 if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
2138 TREE_TYPE (base), type_out))
2139 return NULL;
2141 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2142 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
2143 return stmt;
2146 /* Catch square root. */
2147 if (TREE_CODE (exp) == REAL_CST
2148 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
2150 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
2151 if (*type_out
2152 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
2153 OPTIMIZE_FOR_SPEED))
2155 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
2156 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
2157 gimple_call_set_lhs (stmt, var);
2158 gimple_call_set_nothrow (stmt, true);
2159 return stmt;
2163 return NULL;
2167 /* Function vect_recog_widen_sum_pattern
2169 Try to find the following pattern:
2171 type x_t;
2172 TYPE x_T, sum = init;
2173 loop:
2174 sum_0 = phi <init, sum_1>
2175 S1 x_t = *p;
2176 S2 x_T = (TYPE) x_t;
2177 S3 sum_1 = x_T + sum_0;
2179 where type 'TYPE' is at least double the size of type 'type', i.e - we're
2180 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
2181 a special case of a reduction computation.
2183 Input:
2185 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
2186 when this function is called with S3, the pattern {S2,S3} will be detected.
2188 Output:
2190 * TYPE_OUT: The type of the output of this pattern.
2192 * Return value: A new stmt that will be used to replace the sequence of
2193 stmts that constitute the pattern. In this case it will be:
2194 WIDEN_SUM <x_t, sum_0>
2196 Note: The widening-sum idiom is a widening reduction pattern that is
2197 vectorized without preserving all the intermediate results. It
2198 produces only N/2 (widened) results (by summing up pairs of
2199 intermediate results) rather than all N results. Therefore, we
2200 cannot allow this pattern when we want to get all the results and in
2201 the correct order (as is the case when this computation is in an
2202 inner-loop nested in an outer-loop that us being vectorized). */
2204 static gimple *
2205 vect_recog_widen_sum_pattern (vec_info *vinfo,
2206 stmt_vec_info stmt_vinfo, tree *type_out)
2208 gimple *last_stmt = stmt_vinfo->stmt;
2209 tree oprnd0, oprnd1;
2210 tree type;
2211 gimple *pattern_stmt;
2212 tree var;
2214 /* Look for the following pattern
2215 DX = (TYPE) X;
2216 sum_1 = DX + sum_0;
2217 In which DX is at least double the size of X, and sum_1 has been
2218 recognized as a reduction variable.
2221 /* Starting from LAST_STMT, follow the defs of its uses in search
2222 of the above pattern. */
2224 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
2225 &oprnd0, &oprnd1)
2226 || TREE_CODE (oprnd0) != SSA_NAME
2227 || !vinfo->lookup_def (oprnd0))
2228 return NULL;
2230 type = TREE_TYPE (gimple_get_lhs (last_stmt));
2232 /* So far so good. Since last_stmt was detected as a (summation) reduction,
2233 we know that oprnd1 is the reduction variable (defined by a loop-header
2234 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
2235 Left to check that oprnd0 is defined by a cast from type 'type' to type
2236 'TYPE'. */
2238 vect_unpromoted_value unprom0;
2239 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
2240 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
2241 return NULL;
2243 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
2245 if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
2246 unprom0.type, type_out))
2247 return NULL;
2249 var = vect_recog_temp_ssa_var (type, NULL);
2250 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
2252 return pattern_stmt;
2255 /* Function vect_recog_bitfield_ref_pattern
2257 Try to find the following pattern:
2259 bf_value = BIT_FIELD_REF (container, bitsize, bitpos);
2260 result = (type_out) bf_value;
2262 where type_out is a non-bitfield type, that is to say, it's precision matches
2263 2^(TYPE_SIZE(type_out) - (TYPE_UNSIGNED (type_out) ? 1 : 2)).
2265 Input:
2267 * STMT_VINFO: The stmt from which the pattern search begins.
2268 here it starts with:
2269 result = (type_out) bf_value;
2271 Output:
2273 * TYPE_OUT: The vector type of the output of this pattern.
2275 * Return value: A new stmt that will be used to replace the sequence of
2276 stmts that constitute the pattern. If the precision of type_out is bigger
2277 than the precision type of _1 we perform the widening before the shifting,
2278 since the new precision will be large enough to shift the value and moving
2279 widening operations up the statement chain enables the generation of
2280 widening loads. If we are widening and the operation after the pattern is
2281 an addition then we mask first and shift later, to enable the generation of
2282 shifting adds. In the case of narrowing we will always mask first, shift
2283 last and then perform a narrowing operation. This will enable the
2284 generation of narrowing shifts.
2286 Widening with mask first, shift later:
2287 container = (type_out) container;
2288 masked = container & (((1 << bitsize) - 1) << bitpos);
2289 result = patt2 >> masked;
2291 Widening with shift first, mask last:
2292 container = (type_out) container;
2293 shifted = container >> bitpos;
2294 result = shifted & ((1 << bitsize) - 1);
2296 Narrowing:
2297 masked = container & (((1 << bitsize) - 1) << bitpos);
2298 result = masked >> bitpos;
2299 result = (type_out) result;
2301 The shifting is always optional depending on whether bitpos != 0.
2305 static gimple *
2306 vect_recog_bitfield_ref_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
2307 tree *type_out)
2309 gassign *first_stmt = dyn_cast <gassign *> (stmt_info->stmt);
2311 if (!first_stmt)
2312 return NULL;
2314 gassign *bf_stmt;
2315 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (first_stmt))
2316 && TREE_CODE (gimple_assign_rhs1 (first_stmt)) == SSA_NAME)
2318 gimple *second_stmt
2319 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (first_stmt));
2320 bf_stmt = dyn_cast <gassign *> (second_stmt);
2321 if (!bf_stmt
2322 || gimple_assign_rhs_code (bf_stmt) != BIT_FIELD_REF)
2323 return NULL;
2325 else
2326 return NULL;
2328 tree bf_ref = gimple_assign_rhs1 (bf_stmt);
2329 tree container = TREE_OPERAND (bf_ref, 0);
2331 if (!bit_field_offset (bf_ref).is_constant ()
2332 || !bit_field_size (bf_ref).is_constant ()
2333 || !tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (container))))
2334 return NULL;
2336 if (!INTEGRAL_TYPE_P (TREE_TYPE (bf_ref))
2337 || !INTEGRAL_TYPE_P (TREE_TYPE (container))
2338 || TYPE_MODE (TREE_TYPE (container)) == E_BLKmode)
2339 return NULL;
2341 gimple *use_stmt, *pattern_stmt;
2342 use_operand_p use_p;
2343 tree ret = gimple_assign_lhs (first_stmt);
2344 tree ret_type = TREE_TYPE (ret);
2345 bool shift_first = true;
2346 tree container_type = TREE_TYPE (container);
2347 tree vectype = get_vectype_for_scalar_type (vinfo, container_type);
2349 /* Calculate shift_n before the adjustments for widening loads, otherwise
2350 the container may change and we have to consider offset change for
2351 widening loads on big endianness. The shift_n calculated here can be
2352 independent of widening. */
2353 unsigned HOST_WIDE_INT shift_n = bit_field_offset (bf_ref).to_constant ();
2354 unsigned HOST_WIDE_INT mask_width = bit_field_size (bf_ref).to_constant ();
2355 unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
2356 if (BYTES_BIG_ENDIAN)
2357 shift_n = prec - shift_n - mask_width;
2359 /* We move the conversion earlier if the loaded type is smaller than the
2360 return type to enable the use of widening loads. */
2361 if (TYPE_PRECISION (TREE_TYPE (container)) < TYPE_PRECISION (ret_type)
2362 && !useless_type_conversion_p (TREE_TYPE (container), ret_type))
2364 pattern_stmt
2365 = gimple_build_assign (vect_recog_temp_ssa_var (ret_type),
2366 NOP_EXPR, container);
2367 container = gimple_get_lhs (pattern_stmt);
2368 container_type = TREE_TYPE (container);
2369 prec = tree_to_uhwi (TYPE_SIZE (container_type));
2370 vectype = get_vectype_for_scalar_type (vinfo, container_type);
2371 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2373 else if (!useless_type_conversion_p (TREE_TYPE (container), ret_type))
2374 /* If we are doing the conversion last then also delay the shift as we may
2375 be able to combine the shift and conversion in certain cases. */
2376 shift_first = false;
2378 /* If the only use of the result of this BIT_FIELD_REF + CONVERT is a
2379 PLUS_EXPR then do the shift last as some targets can combine the shift and
2380 add into a single instruction. */
2381 if (single_imm_use (gimple_assign_lhs (first_stmt), &use_p, &use_stmt))
2383 if (gimple_code (use_stmt) == GIMPLE_ASSIGN
2384 && gimple_assign_rhs_code (use_stmt) == PLUS_EXPR)
2385 shift_first = false;
2388 /* If we don't have to shift we only generate the mask, so just fix the
2389 code-path to shift_first. */
2390 if (shift_n == 0)
2391 shift_first = true;
2393 tree result;
2394 if (shift_first)
2396 tree shifted = container;
2397 if (shift_n)
2399 pattern_stmt
2400 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2401 RSHIFT_EXPR, container,
2402 build_int_cst (sizetype, shift_n));
2403 shifted = gimple_assign_lhs (pattern_stmt);
2404 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2407 tree mask = wide_int_to_tree (container_type,
2408 wi::mask (mask_width, false, prec));
2410 pattern_stmt
2411 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2412 BIT_AND_EXPR, shifted, mask);
2413 result = gimple_assign_lhs (pattern_stmt);
2415 else
2417 tree mask = wide_int_to_tree (container_type,
2418 wi::shifted_mask (shift_n, mask_width,
2419 false, prec));
2420 pattern_stmt
2421 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2422 BIT_AND_EXPR, container, mask);
2423 tree masked = gimple_assign_lhs (pattern_stmt);
2425 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2426 pattern_stmt
2427 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2428 RSHIFT_EXPR, masked,
2429 build_int_cst (sizetype, shift_n));
2430 result = gimple_assign_lhs (pattern_stmt);
2433 if (!useless_type_conversion_p (TREE_TYPE (result), ret_type))
2435 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2436 pattern_stmt
2437 = gimple_build_assign (vect_recog_temp_ssa_var (ret_type),
2438 NOP_EXPR, result);
2441 *type_out = STMT_VINFO_VECTYPE (stmt_info);
2442 vect_pattern_detected ("bitfield_ref pattern", stmt_info->stmt);
2444 return pattern_stmt;
2447 /* Function vect_recog_bit_insert_pattern
2449 Try to find the following pattern:
2451 written = BIT_INSERT_EXPR (container, value, bitpos);
2453 Input:
2455 * STMT_VINFO: The stmt we want to replace.
2457 Output:
2459 * TYPE_OUT: The vector type of the output of this pattern.
2461 * Return value: A new stmt that will be used to replace the sequence of
2462 stmts that constitute the pattern. In this case it will be:
2463 value = (container_type) value; // Make sure
2464 shifted = value << bitpos; // Shift value into place
2465 masked = shifted & (mask << bitpos); // Mask off the non-relevant bits in
2466 // the 'to-write value'.
2467 cleared = container & ~(mask << bitpos); // Clearing the bits we want to
2468 // write to from the value we want
2469 // to write to.
2470 written = cleared | masked; // Write bits.
2473 where mask = ((1 << TYPE_PRECISION (value)) - 1), a mask to keep the number of
2474 bits corresponding to the real size of the bitfield value we are writing to.
2475 The shifting is always optional depending on whether bitpos != 0.
2479 static gimple *
2480 vect_recog_bit_insert_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
2481 tree *type_out)
2483 gassign *bf_stmt = dyn_cast <gassign *> (stmt_info->stmt);
2484 if (!bf_stmt || gimple_assign_rhs_code (bf_stmt) != BIT_INSERT_EXPR)
2485 return NULL;
2487 tree container = gimple_assign_rhs1 (bf_stmt);
2488 tree value = gimple_assign_rhs2 (bf_stmt);
2489 tree shift = gimple_assign_rhs3 (bf_stmt);
2491 tree bf_type = TREE_TYPE (value);
2492 tree container_type = TREE_TYPE (container);
2494 if (!INTEGRAL_TYPE_P (container_type)
2495 || !tree_fits_uhwi_p (TYPE_SIZE (container_type)))
2496 return NULL;
2498 gimple *pattern_stmt;
2500 vect_unpromoted_value unprom;
2501 unprom.set_op (value, vect_internal_def);
2502 value = vect_convert_input (vinfo, stmt_info, container_type, &unprom,
2503 get_vectype_for_scalar_type (vinfo,
2504 container_type));
2506 unsigned HOST_WIDE_INT mask_width = TYPE_PRECISION (bf_type);
2507 unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
2508 unsigned HOST_WIDE_INT shift_n = tree_to_uhwi (shift);
2509 if (BYTES_BIG_ENDIAN)
2511 shift_n = prec - shift_n - mask_width;
2512 shift = build_int_cst (TREE_TYPE (shift), shift_n);
2515 if (!useless_type_conversion_p (TREE_TYPE (value), container_type))
2517 pattern_stmt =
2518 gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2519 NOP_EXPR, value);
2520 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2521 value = gimple_get_lhs (pattern_stmt);
2524 /* Shift VALUE into place. */
2525 tree shifted = value;
2526 if (shift_n)
2528 gimple_seq stmts = NULL;
2529 shifted
2530 = gimple_build (&stmts, LSHIFT_EXPR, container_type, value, shift);
2531 if (!gimple_seq_empty_p (stmts))
2532 append_pattern_def_seq (vinfo, stmt_info,
2533 gimple_seq_first_stmt (stmts));
2536 tree mask_t
2537 = wide_int_to_tree (container_type,
2538 wi::shifted_mask (shift_n, mask_width, false, prec));
2540 /* Clear bits we don't want to write back from SHIFTED. */
2541 gimple_seq stmts = NULL;
2542 tree masked = gimple_build (&stmts, BIT_AND_EXPR, container_type, shifted,
2543 mask_t);
2544 if (!gimple_seq_empty_p (stmts))
2546 pattern_stmt = gimple_seq_first_stmt (stmts);
2547 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2550 /* Mask off the bits in the container that we are to write to. */
2551 mask_t = wide_int_to_tree (container_type,
2552 wi::shifted_mask (shift_n, mask_width, true, prec));
2553 tree cleared = vect_recog_temp_ssa_var (container_type);
2554 pattern_stmt = gimple_build_assign (cleared, BIT_AND_EXPR, container, mask_t);
2555 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2557 /* Write MASKED into CLEARED. */
2558 pattern_stmt
2559 = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2560 BIT_IOR_EXPR, cleared, masked);
2562 *type_out = STMT_VINFO_VECTYPE (stmt_info);
2563 vect_pattern_detected ("bit_insert pattern", stmt_info->stmt);
2565 return pattern_stmt;
2569 /* Recognize cases in which an operation is performed in one type WTYPE
2570 but could be done more efficiently in a narrower type NTYPE. For example,
2571 if we have:
2573 ATYPE a; // narrower than NTYPE
2574 BTYPE b; // narrower than NTYPE
2575 WTYPE aw = (WTYPE) a;
2576 WTYPE bw = (WTYPE) b;
2577 WTYPE res = aw + bw; // only uses of aw and bw
2579 then it would be more efficient to do:
2581 NTYPE an = (NTYPE) a;
2582 NTYPE bn = (NTYPE) b;
2583 NTYPE resn = an + bn;
2584 WTYPE res = (WTYPE) resn;
2586 Other situations include things like:
2588 ATYPE a; // NTYPE or narrower
2589 WTYPE aw = (WTYPE) a;
2590 WTYPE res = aw + b;
2592 when only "(NTYPE) res" is significant. In that case it's more efficient
2593 to truncate "b" and do the operation on NTYPE instead:
2595 NTYPE an = (NTYPE) a;
2596 NTYPE bn = (NTYPE) b; // truncation
2597 NTYPE resn = an + bn;
2598 WTYPE res = (WTYPE) resn;
2600 All users of "res" should then use "resn" instead, making the final
2601 statement dead (not marked as relevant). The final statement is still
2602 needed to maintain the type correctness of the IR.
2604 vect_determine_precisions has already determined the minimum
2605 precison of the operation and the minimum precision required
2606 by users of the result. */
2608 static gimple *
2609 vect_recog_over_widening_pattern (vec_info *vinfo,
2610 stmt_vec_info last_stmt_info, tree *type_out)
2612 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2613 if (!last_stmt)
2614 return NULL;
2616 /* See whether we have found that this operation can be done on a
2617 narrower type without changing its semantics. */
2618 unsigned int new_precision = last_stmt_info->operation_precision;
2619 if (!new_precision)
2620 return NULL;
2622 tree lhs = gimple_assign_lhs (last_stmt);
2623 tree type = TREE_TYPE (lhs);
2624 tree_code code = gimple_assign_rhs_code (last_stmt);
2626 /* Punt for reductions where we don't handle the type conversions. */
2627 if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
2628 return NULL;
2630 /* Keep the first operand of a COND_EXPR as-is: only the other two
2631 operands are interesting. */
2632 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
2634 /* Check the operands. */
2635 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
2636 auto_vec <vect_unpromoted_value, 3> unprom (nops);
2637 unprom.quick_grow (nops);
2638 unsigned int min_precision = 0;
2639 bool single_use_p = false;
2640 for (unsigned int i = 0; i < nops; ++i)
2642 tree op = gimple_op (last_stmt, first_op + i);
2643 if (TREE_CODE (op) == INTEGER_CST)
2644 unprom[i].set_op (op, vect_constant_def);
2645 else if (TREE_CODE (op) == SSA_NAME)
2647 bool op_single_use_p = true;
2648 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
2649 &op_single_use_p))
2650 return NULL;
2651 /* If:
2653 (1) N bits of the result are needed;
2654 (2) all inputs are widened from M<N bits; and
2655 (3) one operand OP is a single-use SSA name
2657 we can shift the M->N widening from OP to the output
2658 without changing the number or type of extensions involved.
2659 This then reduces the number of copies of STMT_INFO.
2661 If instead of (3) more than one operand is a single-use SSA name,
2662 shifting the extension to the output is even more of a win.
2664 If instead:
2666 (1) N bits of the result are needed;
2667 (2) one operand OP2 is widened from M2<N bits;
2668 (3) another operand OP1 is widened from M1<M2 bits; and
2669 (4) both OP1 and OP2 are single-use
2671 the choice is between:
2673 (a) truncating OP2 to M1, doing the operation on M1,
2674 and then widening the result to N
2676 (b) widening OP1 to M2, doing the operation on M2, and then
2677 widening the result to N
2679 Both shift the M2->N widening of the inputs to the output.
2680 (a) additionally shifts the M1->M2 widening to the output;
2681 it requires fewer copies of STMT_INFO but requires an extra
2682 M2->M1 truncation.
2684 Which is better will depend on the complexity and cost of
2685 STMT_INFO, which is hard to predict at this stage. However,
2686 a clear tie-breaker in favor of (b) is the fact that the
2687 truncation in (a) increases the length of the operation chain.
2689 If instead of (4) only one of OP1 or OP2 is single-use,
2690 (b) is still a win over doing the operation in N bits:
2691 it still shifts the M2->N widening on the single-use operand
2692 to the output and reduces the number of STMT_INFO copies.
2694 If neither operand is single-use then operating on fewer than
2695 N bits might lead to more extensions overall. Whether it does
2696 or not depends on global information about the vectorization
2697 region, and whether that's a good trade-off would again
2698 depend on the complexity and cost of the statements involved,
2699 as well as things like register pressure that are not normally
2700 modelled at this stage. We therefore ignore these cases
2701 and just optimize the clear single-use wins above.
2703 Thus we take the maximum precision of the unpromoted operands
2704 and record whether any operand is single-use. */
2705 if (unprom[i].dt == vect_internal_def)
2707 min_precision = MAX (min_precision,
2708 TYPE_PRECISION (unprom[i].type));
2709 single_use_p |= op_single_use_p;
2712 else
2713 return NULL;
2716 /* Although the operation could be done in operation_precision, we have
2717 to balance that against introducing extra truncations or extensions.
2718 Calculate the minimum precision that can be handled efficiently.
2720 The loop above determined that the operation could be handled
2721 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
2722 extension from the inputs to the output without introducing more
2723 instructions, and would reduce the number of instructions required
2724 for STMT_INFO itself.
2726 vect_determine_precisions has also determined that the result only
2727 needs min_output_precision bits. Truncating by a factor of N times
2728 requires a tree of N - 1 instructions, so if TYPE is N times wider
2729 than min_output_precision, doing the operation in TYPE and truncating
2730 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
2731 In contrast:
2733 - truncating the input to a unary operation and doing the operation
2734 in the new type requires at most N - 1 + 1 = N instructions per
2735 output vector
2737 - doing the same for a binary operation requires at most
2738 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
2740 Both unary and binary operations require fewer instructions than
2741 this if the operands were extended from a suitable truncated form.
2742 Thus there is usually nothing to lose by doing operations in
2743 min_output_precision bits, but there can be something to gain. */
2744 if (!single_use_p)
2745 min_precision = last_stmt_info->min_output_precision;
2746 else
2747 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
2749 /* Apply the minimum efficient precision we just calculated. */
2750 if (new_precision < min_precision)
2751 new_precision = min_precision;
2752 new_precision = vect_element_precision (new_precision);
2753 if (new_precision >= TYPE_PRECISION (type))
2754 return NULL;
2756 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
2758 *type_out = get_vectype_for_scalar_type (vinfo, type);
2759 if (!*type_out)
2760 return NULL;
2762 /* We've found a viable pattern. Get the new type of the operation. */
2763 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
2764 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
2766 /* If we're truncating an operation, we need to make sure that we
2767 don't introduce new undefined overflow. The codes tested here are
2768 a subset of those accepted by vect_truncatable_operation_p. */
2769 tree op_type = new_type;
2770 if (TYPE_OVERFLOW_UNDEFINED (new_type)
2771 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
2772 op_type = build_nonstandard_integer_type (new_precision, true);
2774 /* We specifically don't check here whether the target supports the
2775 new operation, since it might be something that a later pattern
2776 wants to rewrite anyway. If targets have a minimum element size
2777 for some optabs, we should pattern-match smaller ops to larger ops
2778 where beneficial. */
2779 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2780 tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
2781 if (!new_vectype || !op_vectype)
2782 return NULL;
2784 if (dump_enabled_p ())
2785 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
2786 type, new_type);
2788 /* Calculate the rhs operands for an operation on OP_TYPE. */
2789 tree ops[3] = {};
2790 for (unsigned int i = 1; i < first_op; ++i)
2791 ops[i - 1] = gimple_op (last_stmt, i);
2792 vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
2793 op_type, &unprom[0], op_vectype);
2795 /* Use the operation to produce a result of type OP_TYPE. */
2796 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
2797 gimple *pattern_stmt = gimple_build_assign (new_var, code,
2798 ops[0], ops[1], ops[2]);
2799 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2801 if (dump_enabled_p ())
2802 dump_printf_loc (MSG_NOTE, vect_location,
2803 "created pattern stmt: %G", pattern_stmt);
2805 /* Convert back to the original signedness, if OP_TYPE is different
2806 from NEW_TYPE. */
2807 if (op_type != new_type)
2808 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
2809 pattern_stmt, op_vectype);
2811 /* Promote the result to the original type. */
2812 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
2813 pattern_stmt, new_vectype);
2815 return pattern_stmt;
2818 /* Recognize the following patterns:
2820 ATYPE a; // narrower than TYPE
2821 BTYPE b; // narrower than TYPE
2823 1) Multiply high with scaling
2824 TYPE res = ((TYPE) a * (TYPE) b) >> c;
2825 Here, c is bitsize (TYPE) / 2 - 1.
2827 2) ... or also with rounding
2828 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
2829 Here, d is bitsize (TYPE) / 2 - 2.
2831 3) Normal multiply high
2832 TYPE res = ((TYPE) a * (TYPE) b) >> e;
2833 Here, e is bitsize (TYPE) / 2.
2835 where only the bottom half of res is used. */
2837 static gimple *
2838 vect_recog_mulhs_pattern (vec_info *vinfo,
2839 stmt_vec_info last_stmt_info, tree *type_out)
2841 /* Check for a right shift. */
2842 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2843 if (!last_stmt
2844 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
2845 return NULL;
2847 /* Check that the shift result is wider than the users of the
2848 result need (i.e. that narrowing would be a natural choice). */
2849 tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
2850 unsigned int target_precision
2851 = vect_element_precision (last_stmt_info->min_output_precision);
2852 if (!INTEGRAL_TYPE_P (lhs_type)
2853 || target_precision >= TYPE_PRECISION (lhs_type))
2854 return NULL;
2856 /* Look through any change in sign on the outer shift input. */
2857 vect_unpromoted_value unprom_rshift_input;
2858 tree rshift_input = vect_look_through_possible_promotion
2859 (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
2860 if (!rshift_input
2861 || TYPE_PRECISION (TREE_TYPE (rshift_input))
2862 != TYPE_PRECISION (lhs_type))
2863 return NULL;
2865 /* Get the definition of the shift input. */
2866 stmt_vec_info rshift_input_stmt_info
2867 = vect_get_internal_def (vinfo, rshift_input);
2868 if (!rshift_input_stmt_info)
2869 return NULL;
2870 gassign *rshift_input_stmt
2871 = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
2872 if (!rshift_input_stmt)
2873 return NULL;
2875 stmt_vec_info mulh_stmt_info;
2876 tree scale_term;
2877 bool rounding_p = false;
2879 /* Check for the presence of the rounding term. */
2880 if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
2882 /* Check that the outer shift was by 1. */
2883 if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
2884 return NULL;
2886 /* Check that the second operand of the PLUS_EXPR is 1. */
2887 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
2888 return NULL;
2890 /* Look through any change in sign on the addition input. */
2891 vect_unpromoted_value unprom_plus_input;
2892 tree plus_input = vect_look_through_possible_promotion
2893 (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
2894 if (!plus_input
2895 || TYPE_PRECISION (TREE_TYPE (plus_input))
2896 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
2897 return NULL;
2899 /* Get the definition of the multiply-high-scale part. */
2900 stmt_vec_info plus_input_stmt_info
2901 = vect_get_internal_def (vinfo, plus_input);
2902 if (!plus_input_stmt_info)
2903 return NULL;
2904 gassign *plus_input_stmt
2905 = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
2906 if (!plus_input_stmt
2907 || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
2908 return NULL;
2910 /* Look through any change in sign on the scaling input. */
2911 vect_unpromoted_value unprom_scale_input;
2912 tree scale_input = vect_look_through_possible_promotion
2913 (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
2914 if (!scale_input
2915 || TYPE_PRECISION (TREE_TYPE (scale_input))
2916 != TYPE_PRECISION (TREE_TYPE (plus_input)))
2917 return NULL;
2919 /* Get the definition of the multiply-high part. */
2920 mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
2921 if (!mulh_stmt_info)
2922 return NULL;
2924 /* Get the scaling term. */
2925 scale_term = gimple_assign_rhs2 (plus_input_stmt);
2926 rounding_p = true;
2928 else
2930 mulh_stmt_info = rshift_input_stmt_info;
2931 scale_term = gimple_assign_rhs2 (last_stmt);
2934 /* Check that the scaling factor is constant. */
2935 if (TREE_CODE (scale_term) != INTEGER_CST)
2936 return NULL;
2938 /* Check whether the scaling input term can be seen as two widened
2939 inputs multiplied together. */
2940 vect_unpromoted_value unprom_mult[2];
2941 tree new_type;
2942 unsigned int nops
2943 = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
2944 false, 2, unprom_mult, &new_type);
2945 if (nops != 2)
2946 return NULL;
2948 /* Adjust output precision. */
2949 if (TYPE_PRECISION (new_type) < target_precision)
2950 new_type = build_nonstandard_integer_type
2951 (target_precision, TYPE_UNSIGNED (new_type));
2953 unsigned mult_precision = TYPE_PRECISION (new_type);
2954 internal_fn ifn;
2955 /* Check that the scaling factor is expected. Instead of
2956 target_precision, we should use the one that we actually
2957 use for internal function. */
2958 if (rounding_p)
2960 /* Check pattern 2). */
2961 if (wi::to_widest (scale_term) + mult_precision + 2
2962 != TYPE_PRECISION (lhs_type))
2963 return NULL;
2965 ifn = IFN_MULHRS;
2967 else
2969 /* Check for pattern 1). */
2970 if (wi::to_widest (scale_term) + mult_precision + 1
2971 == TYPE_PRECISION (lhs_type))
2972 ifn = IFN_MULHS;
2973 /* Check for pattern 3). */
2974 else if (wi::to_widest (scale_term) + mult_precision
2975 == TYPE_PRECISION (lhs_type))
2976 ifn = IFN_MULH;
2977 else
2978 return NULL;
2981 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
2983 /* Check for target support. */
2984 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2985 if (!new_vectype
2986 || !direct_internal_fn_supported_p
2987 (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2988 return NULL;
2990 /* The IR requires a valid vector type for the cast result, even though
2991 it's likely to be discarded. */
2992 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2993 if (!*type_out)
2994 return NULL;
2996 /* Generate the IFN_MULHRS call. */
2997 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2998 tree new_ops[2];
2999 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
3000 unprom_mult, new_vectype);
3001 gcall *mulhrs_stmt
3002 = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
3003 gimple_call_set_lhs (mulhrs_stmt, new_var);
3004 gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
3006 if (dump_enabled_p ())
3007 dump_printf_loc (MSG_NOTE, vect_location,
3008 "created pattern stmt: %G", (gimple *) mulhrs_stmt);
3010 return vect_convert_output (vinfo, last_stmt_info, lhs_type,
3011 mulhrs_stmt, new_vectype);
3014 /* Recognize the patterns:
3016 ATYPE a; // narrower than TYPE
3017 BTYPE b; // narrower than TYPE
3018 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
3019 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
3021 where only the bottom half of avg is used. Try to transform them into:
3023 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
3024 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
3026 followed by:
3028 TYPE avg = (TYPE) avg';
3030 where NTYPE is no wider than half of TYPE. Since only the bottom half
3031 of avg is used, all or part of the cast of avg' should become redundant.
3033 If there is no target support available, generate code to distribute rshift
3034 over plus and add a carry. */
3036 static gimple *
3037 vect_recog_average_pattern (vec_info *vinfo,
3038 stmt_vec_info last_stmt_info, tree *type_out)
3040 /* Check for a shift right by one bit. */
3041 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3042 if (!last_stmt
3043 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
3044 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
3045 return NULL;
3047 /* Check that the shift result is wider than the users of the
3048 result need (i.e. that narrowing would be a natural choice). */
3049 tree lhs = gimple_assign_lhs (last_stmt);
3050 tree type = TREE_TYPE (lhs);
3051 unsigned int target_precision
3052 = vect_element_precision (last_stmt_info->min_output_precision);
3053 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
3054 return NULL;
3056 /* Look through any change in sign on the shift input. */
3057 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
3058 vect_unpromoted_value unprom_plus;
3059 rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
3060 &unprom_plus);
3061 if (!rshift_rhs
3062 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
3063 return NULL;
3065 /* Get the definition of the shift input. */
3066 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
3067 if (!plus_stmt_info)
3068 return NULL;
3070 /* Check whether the shift input can be seen as a tree of additions on
3071 2 or 3 widened inputs.
3073 Note that the pattern should be a win even if the result of one or
3074 more additions is reused elsewhere: if the pattern matches, we'd be
3075 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
3076 internal_fn ifn = IFN_AVG_FLOOR;
3077 vect_unpromoted_value unprom[3];
3078 tree new_type;
3079 unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
3080 WIDEN_PLUS_EXPR, false, 3,
3081 unprom, &new_type);
3082 if (nops == 0)
3083 return NULL;
3084 if (nops == 3)
3086 /* Check that one operand is 1. */
3087 unsigned int i;
3088 for (i = 0; i < 3; ++i)
3089 if (integer_onep (unprom[i].op))
3090 break;
3091 if (i == 3)
3092 return NULL;
3093 /* Throw away the 1 operand and keep the other two. */
3094 if (i < 2)
3095 unprom[i] = unprom[2];
3096 ifn = IFN_AVG_CEIL;
3099 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
3101 /* We know that:
3103 (a) the operation can be viewed as:
3105 TYPE widened0 = (TYPE) UNPROM[0];
3106 TYPE widened1 = (TYPE) UNPROM[1];
3107 TYPE tmp1 = widened0 + widened1 {+ 1};
3108 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
3110 (b) the first two statements are equivalent to:
3112 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
3113 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
3115 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
3116 where sensible;
3118 (d) all the operations can be performed correctly at twice the width of
3119 NEW_TYPE, due to the nature of the average operation; and
3121 (e) users of the result of the right shift need only TARGET_PRECISION
3122 bits, where TARGET_PRECISION is no more than half of TYPE's
3123 precision.
3125 Under these circumstances, the only situation in which NEW_TYPE
3126 could be narrower than TARGET_PRECISION is if widened0, widened1
3127 and an addition result are all used more than once. Thus we can
3128 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
3129 as "free", whereas widening the result of the average instruction
3130 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
3131 therefore better not to go narrower than TARGET_PRECISION. */
3132 if (TYPE_PRECISION (new_type) < target_precision)
3133 new_type = build_nonstandard_integer_type (target_precision,
3134 TYPE_UNSIGNED (new_type));
3136 /* Check for target support. */
3137 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3138 if (!new_vectype)
3139 return NULL;
3141 bool fallback_p = false;
3143 if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
3145 else if (TYPE_UNSIGNED (new_type)
3146 && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
3147 && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
3148 && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
3149 && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
3150 fallback_p = true;
3151 else
3152 return NULL;
3154 /* The IR requires a valid vector type for the cast result, even though
3155 it's likely to be discarded. */
3156 *type_out = get_vectype_for_scalar_type (vinfo, type);
3157 if (!*type_out)
3158 return NULL;
3160 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
3161 tree new_ops[2];
3162 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
3163 unprom, new_vectype);
3165 if (fallback_p)
3167 /* As a fallback, generate code for following sequence:
3169 shifted_op0 = new_ops[0] >> 1;
3170 shifted_op1 = new_ops[1] >> 1;
3171 sum_of_shifted = shifted_op0 + shifted_op1;
3172 unmasked_carry = new_ops[0] and/or new_ops[1];
3173 carry = unmasked_carry & 1;
3174 new_var = sum_of_shifted + carry;
3177 tree one_cst = build_one_cst (new_type);
3178 gassign *g;
3180 tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
3181 g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
3182 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3184 tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
3185 g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
3186 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3188 tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
3189 g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
3190 shifted_op0, shifted_op1);
3191 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3193 tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
3194 tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
3195 g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
3196 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3198 tree carry = vect_recog_temp_ssa_var (new_type, NULL);
3199 g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
3200 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3202 g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
3203 return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
3206 /* Generate the IFN_AVG* call. */
3207 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
3208 new_ops[1]);
3209 gimple_call_set_lhs (average_stmt, new_var);
3210 gimple_set_location (average_stmt, gimple_location (last_stmt));
3212 if (dump_enabled_p ())
3213 dump_printf_loc (MSG_NOTE, vect_location,
3214 "created pattern stmt: %G", (gimple *) average_stmt);
3216 return vect_convert_output (vinfo, last_stmt_info,
3217 type, average_stmt, new_vectype);
3220 /* Recognize cases in which the input to a cast is wider than its
3221 output, and the input is fed by a widening operation. Fold this
3222 by removing the unnecessary intermediate widening. E.g.:
3224 unsigned char a;
3225 unsigned int b = (unsigned int) a;
3226 unsigned short c = (unsigned short) b;
3230 unsigned short c = (unsigned short) a;
3232 Although this is rare in input IR, it is an expected side-effect
3233 of the over-widening pattern above.
3235 This is beneficial also for integer-to-float conversions, if the
3236 widened integer has more bits than the float, and if the unwidened
3237 input doesn't. */
3239 static gimple *
3240 vect_recog_cast_forwprop_pattern (vec_info *vinfo,
3241 stmt_vec_info last_stmt_info, tree *type_out)
3243 /* Check for a cast, including an integer-to-float conversion. */
3244 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3245 if (!last_stmt)
3246 return NULL;
3247 tree_code code = gimple_assign_rhs_code (last_stmt);
3248 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
3249 return NULL;
3251 /* Make sure that the rhs is a scalar with a natural bitsize. */
3252 tree lhs = gimple_assign_lhs (last_stmt);
3253 if (!lhs)
3254 return NULL;
3255 tree lhs_type = TREE_TYPE (lhs);
3256 scalar_mode lhs_mode;
3257 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
3258 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
3259 return NULL;
3261 /* Check for a narrowing operation (from a vector point of view). */
3262 tree rhs = gimple_assign_rhs1 (last_stmt);
3263 tree rhs_type = TREE_TYPE (rhs);
3264 if (!INTEGRAL_TYPE_P (rhs_type)
3265 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
3266 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
3267 return NULL;
3269 /* Try to find an unpromoted input. */
3270 vect_unpromoted_value unprom;
3271 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
3272 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
3273 return NULL;
3275 /* If the bits above RHS_TYPE matter, make sure that they're the
3276 same when extending from UNPROM as they are when extending from RHS. */
3277 if (!INTEGRAL_TYPE_P (lhs_type)
3278 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
3279 return NULL;
3281 /* We can get the same result by casting UNPROM directly, to avoid
3282 the unnecessary widening and narrowing. */
3283 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
3285 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
3286 if (!*type_out)
3287 return NULL;
3289 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
3290 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
3291 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3293 return pattern_stmt;
3296 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
3297 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
3299 static gimple *
3300 vect_recog_widen_shift_pattern (vec_info *vinfo,
3301 stmt_vec_info last_stmt_info, tree *type_out)
3303 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
3304 LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
3305 "vect_recog_widen_shift_pattern");
3308 /* Detect a rotate pattern wouldn't be otherwise vectorized:
3310 type a_t, b_t, c_t;
3312 S0 a_t = b_t r<< c_t;
3314 Input/Output:
3316 * STMT_VINFO: The stmt from which the pattern search begins,
3317 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
3318 with a sequence:
3320 S1 d_t = -c_t;
3321 S2 e_t = d_t & (B - 1);
3322 S3 f_t = b_t << c_t;
3323 S4 g_t = b_t >> e_t;
3324 S0 a_t = f_t | g_t;
3326 where B is element bitsize of type.
3328 Output:
3330 * TYPE_OUT: The type of the output of this pattern.
3332 * Return value: A new stmt that will be used to replace the rotate
3333 S0 stmt. */
3335 static gimple *
3336 vect_recog_rotate_pattern (vec_info *vinfo,
3337 stmt_vec_info stmt_vinfo, tree *type_out)
3339 gimple *last_stmt = stmt_vinfo->stmt;
3340 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
3341 gimple *pattern_stmt, *def_stmt;
3342 enum tree_code rhs_code;
3343 enum vect_def_type dt;
3344 optab optab1, optab2;
3345 edge ext_def = NULL;
3346 bool bswap16_p = false;
3348 if (is_gimple_assign (last_stmt))
3350 rhs_code = gimple_assign_rhs_code (last_stmt);
3351 switch (rhs_code)
3353 case LROTATE_EXPR:
3354 case RROTATE_EXPR:
3355 break;
3356 default:
3357 return NULL;
3360 lhs = gimple_assign_lhs (last_stmt);
3361 oprnd0 = gimple_assign_rhs1 (last_stmt);
3362 type = TREE_TYPE (oprnd0);
3363 oprnd1 = gimple_assign_rhs2 (last_stmt);
3365 else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
3367 /* __builtin_bswap16 (x) is another form of x r>> 8.
3368 The vectorizer has bswap support, but only if the argument isn't
3369 promoted. */
3370 lhs = gimple_call_lhs (last_stmt);
3371 oprnd0 = gimple_call_arg (last_stmt, 0);
3372 type = TREE_TYPE (oprnd0);
3373 if (!lhs
3374 || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
3375 || TYPE_PRECISION (type) <= 16
3376 || TREE_CODE (oprnd0) != SSA_NAME
3377 || BITS_PER_UNIT != 8)
3378 return NULL;
3380 stmt_vec_info def_stmt_info;
3381 if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
3382 return NULL;
3384 if (dt != vect_internal_def)
3385 return NULL;
3387 if (gimple_assign_cast_p (def_stmt))
3389 def = gimple_assign_rhs1 (def_stmt);
3390 if (INTEGRAL_TYPE_P (TREE_TYPE (def))
3391 && TYPE_PRECISION (TREE_TYPE (def)) == 16)
3392 oprnd0 = def;
3395 type = TREE_TYPE (lhs);
3396 vectype = get_vectype_for_scalar_type (vinfo, type);
3397 if (vectype == NULL_TREE)
3398 return NULL;
3400 if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
3402 /* The encoding uses one stepped pattern for each byte in the
3403 16-bit word. */
3404 vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
3405 for (unsigned i = 0; i < 3; ++i)
3406 for (unsigned j = 0; j < 2; ++j)
3407 elts.quick_push ((i + 1) * 2 - j - 1);
3409 vec_perm_indices indices (elts, 1,
3410 TYPE_VECTOR_SUBPARTS (char_vectype));
3411 machine_mode vmode = TYPE_MODE (char_vectype);
3412 if (can_vec_perm_const_p (vmode, vmode, indices))
3414 /* vectorizable_bswap can handle the __builtin_bswap16 if we
3415 undo the argument promotion. */
3416 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
3418 def = vect_recog_temp_ssa_var (type, NULL);
3419 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3420 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3421 oprnd0 = def;
3424 /* Pattern detected. */
3425 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3427 *type_out = vectype;
3429 /* Pattern supported. Create a stmt to be used to replace the
3430 pattern, with the unpromoted argument. */
3431 var = vect_recog_temp_ssa_var (type, NULL);
3432 pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
3433 1, oprnd0);
3434 gimple_call_set_lhs (pattern_stmt, var);
3435 gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
3436 gimple_call_fntype (last_stmt));
3437 return pattern_stmt;
3441 oprnd1 = build_int_cst (integer_type_node, 8);
3442 rhs_code = LROTATE_EXPR;
3443 bswap16_p = true;
3445 else
3446 return NULL;
3448 if (TREE_CODE (oprnd0) != SSA_NAME
3449 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
3450 || !INTEGRAL_TYPE_P (type))
3451 return NULL;
3453 stmt_vec_info def_stmt_info;
3454 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
3455 return NULL;
3457 if (dt != vect_internal_def
3458 && dt != vect_constant_def
3459 && dt != vect_external_def)
3460 return NULL;
3462 vectype = get_vectype_for_scalar_type (vinfo, type);
3463 if (vectype == NULL_TREE)
3464 return NULL;
3466 /* If vector/vector or vector/scalar rotate is supported by the target,
3467 don't do anything here. */
3468 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
3469 if (optab1
3470 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3472 use_rotate:
3473 if (bswap16_p)
3475 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
3477 def = vect_recog_temp_ssa_var (type, NULL);
3478 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3479 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3480 oprnd0 = def;
3483 /* Pattern detected. */
3484 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3486 *type_out = vectype;
3488 /* Pattern supported. Create a stmt to be used to replace the
3489 pattern. */
3490 var = vect_recog_temp_ssa_var (type, NULL);
3491 pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
3492 oprnd1);
3493 return pattern_stmt;
3495 return NULL;
3498 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
3500 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
3501 if (optab2
3502 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3503 goto use_rotate;
3506 tree utype = unsigned_type_for (type);
3507 tree uvectype = get_vectype_for_scalar_type (vinfo, utype);
3508 if (!uvectype)
3509 return NULL;
3511 /* If vector/vector or vector/scalar shifts aren't supported by the target,
3512 don't do anything here either. */
3513 optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_vector);
3514 optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_vector);
3515 if (!optab1
3516 || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
3517 || !optab2
3518 || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
3520 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
3521 return NULL;
3522 optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_scalar);
3523 optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_scalar);
3524 if (!optab1
3525 || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
3526 || !optab2
3527 || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
3528 return NULL;
3531 *type_out = vectype;
3533 if (!useless_type_conversion_p (utype, TREE_TYPE (oprnd0)))
3535 def = vect_recog_temp_ssa_var (utype, NULL);
3536 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3537 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3538 oprnd0 = def;
3541 if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
3542 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
3544 def = NULL_TREE;
3545 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (utype);
3546 if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
3547 def = oprnd1;
3548 else if (def_stmt && gimple_assign_cast_p (def_stmt))
3550 tree rhs1 = gimple_assign_rhs1 (def_stmt);
3551 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
3552 && TYPE_PRECISION (TREE_TYPE (rhs1))
3553 == TYPE_PRECISION (type))
3554 def = rhs1;
3557 if (def == NULL_TREE)
3559 def = vect_recog_temp_ssa_var (utype, NULL);
3560 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
3561 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3563 stype = TREE_TYPE (def);
3565 if (TREE_CODE (def) == INTEGER_CST)
3567 if (!tree_fits_uhwi_p (def)
3568 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
3569 || integer_zerop (def))
3570 return NULL;
3571 def2 = build_int_cst (stype,
3572 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
3574 else
3576 tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
3578 if (vecstype == NULL_TREE)
3579 return NULL;
3580 def2 = vect_recog_temp_ssa_var (stype, NULL);
3581 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
3582 if (ext_def)
3584 basic_block new_bb
3585 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
3586 gcc_assert (!new_bb);
3588 else
3589 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
3591 def2 = vect_recog_temp_ssa_var (stype, NULL);
3592 tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
3593 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
3594 gimple_assign_lhs (def_stmt), mask);
3595 if (ext_def)
3597 basic_block new_bb
3598 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
3599 gcc_assert (!new_bb);
3601 else
3602 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
3605 var1 = vect_recog_temp_ssa_var (utype, NULL);
3606 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
3607 ? LSHIFT_EXPR : RSHIFT_EXPR,
3608 oprnd0, def);
3609 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3611 var2 = vect_recog_temp_ssa_var (utype, NULL);
3612 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
3613 ? RSHIFT_EXPR : LSHIFT_EXPR,
3614 oprnd0, def2);
3615 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3617 /* Pattern detected. */
3618 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3620 /* Pattern supported. Create a stmt to be used to replace the pattern. */
3621 var = vect_recog_temp_ssa_var (utype, NULL);
3622 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
3624 if (!useless_type_conversion_p (type, utype))
3626 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, uvectype);
3627 tree result = vect_recog_temp_ssa_var (type, NULL);
3628 pattern_stmt = gimple_build_assign (result, NOP_EXPR, var);
3630 return pattern_stmt;
3633 /* Detect a vector by vector shift pattern that wouldn't be otherwise
3634 vectorized:
3636 type a_t;
3637 TYPE b_T, res_T;
3639 S1 a_t = ;
3640 S2 b_T = ;
3641 S3 res_T = b_T op a_t;
3643 where type 'TYPE' is a type with different size than 'type',
3644 and op is <<, >> or rotate.
3646 Also detect cases:
3648 type a_t;
3649 TYPE b_T, c_T, res_T;
3651 S0 c_T = ;
3652 S1 a_t = (type) c_T;
3653 S2 b_T = ;
3654 S3 res_T = b_T op a_t;
3656 Input/Output:
3658 * STMT_VINFO: The stmt from which the pattern search begins,
3659 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
3660 with a shift/rotate which has same type on both operands, in the
3661 second case just b_T op c_T, in the first case with added cast
3662 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
3664 Output:
3666 * TYPE_OUT: The type of the output of this pattern.
3668 * Return value: A new stmt that will be used to replace the shift/rotate
3669 S3 stmt. */
3671 static gimple *
3672 vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
3673 stmt_vec_info stmt_vinfo,
3674 tree *type_out)
3676 gimple *last_stmt = stmt_vinfo->stmt;
3677 tree oprnd0, oprnd1, lhs, var;
3678 gimple *pattern_stmt;
3679 enum tree_code rhs_code;
3681 if (!is_gimple_assign (last_stmt))
3682 return NULL;
3684 rhs_code = gimple_assign_rhs_code (last_stmt);
3685 switch (rhs_code)
3687 case LSHIFT_EXPR:
3688 case RSHIFT_EXPR:
3689 case LROTATE_EXPR:
3690 case RROTATE_EXPR:
3691 break;
3692 default:
3693 return NULL;
3696 lhs = gimple_assign_lhs (last_stmt);
3697 oprnd0 = gimple_assign_rhs1 (last_stmt);
3698 oprnd1 = gimple_assign_rhs2 (last_stmt);
3699 if (TREE_CODE (oprnd0) != SSA_NAME
3700 || TREE_CODE (oprnd1) != SSA_NAME
3701 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
3702 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
3703 || TYPE_PRECISION (TREE_TYPE (lhs))
3704 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
3705 return NULL;
3707 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
3708 if (!def_vinfo)
3709 return NULL;
3711 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
3712 if (*type_out == NULL_TREE)
3713 return NULL;
3715 tree def = NULL_TREE;
3716 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
3717 if (def_stmt && gimple_assign_cast_p (def_stmt))
3719 tree rhs1 = gimple_assign_rhs1 (def_stmt);
3720 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
3721 && TYPE_PRECISION (TREE_TYPE (rhs1))
3722 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
3724 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
3725 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
3726 def = rhs1;
3727 else
3729 tree mask
3730 = build_low_bits_mask (TREE_TYPE (rhs1),
3731 TYPE_PRECISION (TREE_TYPE (oprnd1)));
3732 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
3733 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
3734 tree vecstype = get_vectype_for_scalar_type (vinfo,
3735 TREE_TYPE (rhs1));
3736 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
3741 if (def == NULL_TREE)
3743 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
3744 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
3745 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3748 /* Pattern detected. */
3749 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
3751 /* Pattern supported. Create a stmt to be used to replace the pattern. */
3752 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
3753 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
3755 return pattern_stmt;
3758 /* Return true iff the target has a vector optab implementing the operation
3759 CODE on type VECTYPE. */
3761 static bool
3762 target_has_vecop_for_code (tree_code code, tree vectype)
3764 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
3765 return voptab
3766 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
3769 /* Verify that the target has optabs of VECTYPE to perform all the steps
3770 needed by the multiplication-by-immediate synthesis algorithm described by
3771 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
3772 present. Return true iff the target supports all the steps. */
3774 static bool
3775 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
3776 tree vectype, bool synth_shift_p)
3778 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
3779 return false;
3781 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
3782 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
3784 if (var == negate_variant
3785 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
3786 return false;
3788 /* If we must synthesize shifts with additions make sure that vector
3789 addition is available. */
3790 if ((var == add_variant || synth_shift_p) && !supports_vplus)
3791 return false;
3793 for (int i = 1; i < alg->ops; i++)
3795 switch (alg->op[i])
3797 case alg_shift:
3798 break;
3799 case alg_add_t_m2:
3800 case alg_add_t2_m:
3801 case alg_add_factor:
3802 if (!supports_vplus)
3803 return false;
3804 break;
3805 case alg_sub_t_m2:
3806 case alg_sub_t2_m:
3807 case alg_sub_factor:
3808 if (!supports_vminus)
3809 return false;
3810 break;
3811 case alg_unknown:
3812 case alg_m:
3813 case alg_zero:
3814 case alg_impossible:
3815 return false;
3816 default:
3817 gcc_unreachable ();
3821 return true;
3824 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
3825 putting the final result in DEST. Append all statements but the last into
3826 VINFO. Return the last statement. */
3828 static gimple *
3829 synth_lshift_by_additions (vec_info *vinfo,
3830 tree dest, tree op, HOST_WIDE_INT amnt,
3831 stmt_vec_info stmt_info)
3833 HOST_WIDE_INT i;
3834 tree itype = TREE_TYPE (op);
3835 tree prev_res = op;
3836 gcc_assert (amnt >= 0);
3837 for (i = 0; i < amnt; i++)
3839 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
3840 : dest;
3841 gimple *stmt
3842 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
3843 prev_res = tmp_var;
3844 if (i < amnt - 1)
3845 append_pattern_def_seq (vinfo, stmt_info, stmt);
3846 else
3847 return stmt;
3849 gcc_unreachable ();
3850 return NULL;
3853 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
3854 CODE to operands OP1 and OP2, creating a new temporary SSA var in
3855 the process if necessary. Append the resulting assignment statements
3856 to the sequence in STMT_VINFO. Return the SSA variable that holds the
3857 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
3858 left shifts using additions. */
3860 static tree
3861 apply_binop_and_append_stmt (vec_info *vinfo,
3862 tree_code code, tree op1, tree op2,
3863 stmt_vec_info stmt_vinfo, bool synth_shift_p)
3865 if (integer_zerop (op2)
3866 && (code == LSHIFT_EXPR
3867 || code == PLUS_EXPR))
3869 gcc_assert (TREE_CODE (op1) == SSA_NAME);
3870 return op1;
3873 gimple *stmt;
3874 tree itype = TREE_TYPE (op1);
3875 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
3877 if (code == LSHIFT_EXPR
3878 && synth_shift_p)
3880 stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
3881 TREE_INT_CST_LOW (op2), stmt_vinfo);
3882 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3883 return tmp_var;
3886 stmt = gimple_build_assign (tmp_var, code, op1, op2);
3887 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3888 return tmp_var;
3891 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
3892 and simple arithmetic operations to be vectorized. Record the statements
3893 produced in STMT_VINFO and return the last statement in the sequence or
3894 NULL if it's not possible to synthesize such a multiplication.
3895 This function mirrors the behavior of expand_mult_const in expmed.cc but
3896 works on tree-ssa form. */
3898 static gimple *
3899 vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
3900 stmt_vec_info stmt_vinfo)
3902 tree itype = TREE_TYPE (op);
3903 machine_mode mode = TYPE_MODE (itype);
3904 struct algorithm alg;
3905 mult_variant variant;
3906 if (!tree_fits_shwi_p (val))
3907 return NULL;
3909 /* Multiplication synthesis by shifts, adds and subs can introduce
3910 signed overflow where the original operation didn't. Perform the
3911 operations on an unsigned type and cast back to avoid this.
3912 In the future we may want to relax this for synthesis algorithms
3913 that we can prove do not cause unexpected overflow. */
3914 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
3916 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
3917 tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
3918 if (!vectype)
3919 return NULL;
3921 /* Targets that don't support vector shifts but support vector additions
3922 can synthesize shifts that way. */
3923 bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
3925 HOST_WIDE_INT hwval = tree_to_shwi (val);
3926 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
3927 The vectorizer's benefit analysis will decide whether it's beneficial
3928 to do this. */
3929 bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
3930 ? TYPE_MODE (vectype) : mode,
3931 hwval, &alg, &variant, MAX_COST);
3932 if (!possible)
3933 return NULL;
3935 if (!target_supports_mult_synth_alg (&alg, variant, vectype, synth_shift_p))
3936 return NULL;
3938 tree accumulator;
3940 /* Clear out the sequence of statements so we can populate it below. */
3941 gimple *stmt = NULL;
3943 if (cast_to_unsigned_p)
3945 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
3946 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
3947 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3948 op = tmp_op;
3951 if (alg.op[0] == alg_zero)
3952 accumulator = build_int_cst (multtype, 0);
3953 else
3954 accumulator = op;
3956 bool needs_fixup = (variant == negate_variant)
3957 || (variant == add_variant);
3959 for (int i = 1; i < alg.ops; i++)
3961 tree shft_log = build_int_cst (multtype, alg.log[i]);
3962 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3963 tree tmp_var = NULL_TREE;
3965 switch (alg.op[i])
3967 case alg_shift:
3968 if (synth_shift_p)
3969 stmt
3970 = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
3971 alg.log[i], stmt_vinfo);
3972 else
3973 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
3974 shft_log);
3975 break;
3976 case alg_add_t_m2:
3977 tmp_var
3978 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
3979 stmt_vinfo, synth_shift_p);
3980 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
3981 tmp_var);
3982 break;
3983 case alg_sub_t_m2:
3984 tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
3985 shft_log, stmt_vinfo,
3986 synth_shift_p);
3987 /* In some algorithms the first step involves zeroing the
3988 accumulator. If subtracting from such an accumulator
3989 just emit the negation directly. */
3990 if (integer_zerop (accumulator))
3991 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
3992 else
3993 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
3994 tmp_var);
3995 break;
3996 case alg_add_t2_m:
3997 tmp_var
3998 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3999 shft_log, stmt_vinfo, synth_shift_p);
4000 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
4001 break;
4002 case alg_sub_t2_m:
4003 tmp_var
4004 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4005 shft_log, stmt_vinfo, synth_shift_p);
4006 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
4007 break;
4008 case alg_add_factor:
4009 tmp_var
4010 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4011 shft_log, stmt_vinfo, synth_shift_p);
4012 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
4013 tmp_var);
4014 break;
4015 case alg_sub_factor:
4016 tmp_var
4017 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4018 shft_log, stmt_vinfo, synth_shift_p);
4019 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
4020 accumulator);
4021 break;
4022 default:
4023 gcc_unreachable ();
4025 /* We don't want to append the last stmt in the sequence to stmt_vinfo
4026 but rather return it directly. */
4028 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
4029 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4030 accumulator = accum_tmp;
4032 if (variant == negate_variant)
4034 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4035 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
4036 accumulator = accum_tmp;
4037 if (cast_to_unsigned_p)
4038 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4040 else if (variant == add_variant)
4042 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4043 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
4044 accumulator = accum_tmp;
4045 if (cast_to_unsigned_p)
4046 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4048 /* Move back to a signed if needed. */
4049 if (cast_to_unsigned_p)
4051 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
4052 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
4055 return stmt;
4058 /* Detect multiplication by constant and convert it into a sequence of
4059 shifts and additions, subtractions, negations. We reuse the
4060 choose_mult_variant algorithms from expmed.cc
4062 Input/Output:
4064 STMT_VINFO: The stmt from which the pattern search begins,
4065 i.e. the mult stmt.
4067 Output:
4069 * TYPE_OUT: The type of the output of this pattern.
4071 * Return value: A new stmt that will be used to replace
4072 the multiplication. */
4074 static gimple *
4075 vect_recog_mult_pattern (vec_info *vinfo,
4076 stmt_vec_info stmt_vinfo, tree *type_out)
4078 gimple *last_stmt = stmt_vinfo->stmt;
4079 tree oprnd0, oprnd1, vectype, itype;
4080 gimple *pattern_stmt;
4082 if (!is_gimple_assign (last_stmt))
4083 return NULL;
4085 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
4086 return NULL;
4088 oprnd0 = gimple_assign_rhs1 (last_stmt);
4089 oprnd1 = gimple_assign_rhs2 (last_stmt);
4090 itype = TREE_TYPE (oprnd0);
4092 if (TREE_CODE (oprnd0) != SSA_NAME
4093 || TREE_CODE (oprnd1) != INTEGER_CST
4094 || !INTEGRAL_TYPE_P (itype)
4095 || !type_has_mode_precision_p (itype))
4096 return NULL;
4098 vectype = get_vectype_for_scalar_type (vinfo, itype);
4099 if (vectype == NULL_TREE)
4100 return NULL;
4102 /* If the target can handle vectorized multiplication natively,
4103 don't attempt to optimize this. */
4104 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
4105 if (mul_optab != unknown_optab)
4107 machine_mode vec_mode = TYPE_MODE (vectype);
4108 int icode = (int) optab_handler (mul_optab, vec_mode);
4109 if (icode != CODE_FOR_nothing)
4110 return NULL;
4113 pattern_stmt = vect_synth_mult_by_constant (vinfo,
4114 oprnd0, oprnd1, stmt_vinfo);
4115 if (!pattern_stmt)
4116 return NULL;
4118 /* Pattern detected. */
4119 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
4121 *type_out = vectype;
4123 return pattern_stmt;
4126 /* Detect a signed division by a constant that wouldn't be
4127 otherwise vectorized:
4129 type a_t, b_t;
4131 S1 a_t = b_t / N;
4133 where type 'type' is an integral type and N is a constant.
4135 Similarly handle modulo by a constant:
4137 S4 a_t = b_t % N;
4139 Input/Output:
4141 * STMT_VINFO: The stmt from which the pattern search begins,
4142 i.e. the division stmt. S1 is replaced by if N is a power
4143 of two constant and type is signed:
4144 S3 y_t = b_t < 0 ? N - 1 : 0;
4145 S2 x_t = b_t + y_t;
4146 S1' a_t = x_t >> log2 (N);
4148 S4 is replaced if N is a power of two constant and
4149 type is signed by (where *_T temporaries have unsigned type):
4150 S9 y_T = b_t < 0 ? -1U : 0U;
4151 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
4152 S7 z_t = (type) z_T;
4153 S6 w_t = b_t + z_t;
4154 S5 x_t = w_t & (N - 1);
4155 S4' a_t = x_t - z_t;
4157 Output:
4159 * TYPE_OUT: The type of the output of this pattern.
4161 * Return value: A new stmt that will be used to replace the division
4162 S1 or modulo S4 stmt. */
4164 static gimple *
4165 vect_recog_divmod_pattern (vec_info *vinfo,
4166 stmt_vec_info stmt_vinfo, tree *type_out)
4168 gimple *last_stmt = stmt_vinfo->stmt;
4169 tree oprnd0, oprnd1, vectype, itype, cond;
4170 gimple *pattern_stmt, *def_stmt;
4171 enum tree_code rhs_code;
4172 optab optab;
4173 tree q, cst;
4174 int dummy_int, prec;
4176 if (!is_gimple_assign (last_stmt))
4177 return NULL;
4179 rhs_code = gimple_assign_rhs_code (last_stmt);
4180 switch (rhs_code)
4182 case TRUNC_DIV_EXPR:
4183 case EXACT_DIV_EXPR:
4184 case TRUNC_MOD_EXPR:
4185 break;
4186 default:
4187 return NULL;
4190 oprnd0 = gimple_assign_rhs1 (last_stmt);
4191 oprnd1 = gimple_assign_rhs2 (last_stmt);
4192 itype = TREE_TYPE (oprnd0);
4193 if (TREE_CODE (oprnd0) != SSA_NAME
4194 || TREE_CODE (oprnd1) != INTEGER_CST
4195 || TREE_CODE (itype) != INTEGER_TYPE
4196 || !type_has_mode_precision_p (itype))
4197 return NULL;
4199 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
4200 vectype = get_vectype_for_scalar_type (vinfo, itype);
4201 if (vectype == NULL_TREE)
4202 return NULL;
4204 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
4206 /* If the target can handle vectorized division or modulo natively,
4207 don't attempt to optimize this, since native division is likely
4208 to give smaller code. */
4209 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
4210 if (optab != unknown_optab)
4212 machine_mode vec_mode = TYPE_MODE (vectype);
4213 int icode = (int) optab_handler (optab, vec_mode);
4214 if (icode != CODE_FOR_nothing)
4215 return NULL;
4219 prec = TYPE_PRECISION (itype);
4220 if (integer_pow2p (oprnd1))
4222 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
4223 return NULL;
4225 /* Pattern detected. */
4226 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
4228 *type_out = vectype;
4230 /* Check if the target supports this internal function. */
4231 internal_fn ifn = IFN_DIV_POW2;
4232 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
4234 tree shift = build_int_cst (itype, tree_log2 (oprnd1));
4236 tree var_div = vect_recog_temp_ssa_var (itype, NULL);
4237 gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
4238 gimple_call_set_lhs (div_stmt, var_div);
4240 if (rhs_code == TRUNC_MOD_EXPR)
4242 append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
4243 def_stmt
4244 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4245 LSHIFT_EXPR, var_div, shift);
4246 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4247 pattern_stmt
4248 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4249 MINUS_EXPR, oprnd0,
4250 gimple_assign_lhs (def_stmt));
4252 else
4253 pattern_stmt = div_stmt;
4254 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
4256 return pattern_stmt;
4259 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
4260 build_int_cst (itype, 0));
4261 if (rhs_code == TRUNC_DIV_EXPR
4262 || rhs_code == EXACT_DIV_EXPR)
4264 tree var = vect_recog_temp_ssa_var (itype, NULL);
4265 tree shift;
4266 def_stmt
4267 = gimple_build_assign (var, COND_EXPR, cond,
4268 fold_build2 (MINUS_EXPR, itype, oprnd1,
4269 build_int_cst (itype, 1)),
4270 build_int_cst (itype, 0));
4271 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4272 var = vect_recog_temp_ssa_var (itype, NULL);
4273 def_stmt
4274 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
4275 gimple_assign_lhs (def_stmt));
4276 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4278 shift = build_int_cst (itype, tree_log2 (oprnd1));
4279 pattern_stmt
4280 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4281 RSHIFT_EXPR, var, shift);
4283 else
4285 tree signmask;
4286 if (compare_tree_int (oprnd1, 2) == 0)
4288 signmask = vect_recog_temp_ssa_var (itype, NULL);
4289 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
4290 build_int_cst (itype, 1),
4291 build_int_cst (itype, 0));
4292 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4294 else
4296 tree utype
4297 = build_nonstandard_integer_type (prec, 1);
4298 tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
4299 tree shift
4300 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
4301 - tree_log2 (oprnd1));
4302 tree var = vect_recog_temp_ssa_var (utype, NULL);
4304 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
4305 build_int_cst (utype, -1),
4306 build_int_cst (utype, 0));
4307 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
4308 var = vect_recog_temp_ssa_var (utype, NULL);
4309 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
4310 gimple_assign_lhs (def_stmt),
4311 shift);
4312 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
4313 signmask = vect_recog_temp_ssa_var (itype, NULL);
4314 def_stmt
4315 = gimple_build_assign (signmask, NOP_EXPR, var);
4316 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4318 def_stmt
4319 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4320 PLUS_EXPR, oprnd0, signmask);
4321 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4322 def_stmt
4323 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4324 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
4325 fold_build2 (MINUS_EXPR, itype, oprnd1,
4326 build_int_cst (itype, 1)));
4327 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4329 pattern_stmt
4330 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4331 MINUS_EXPR, gimple_assign_lhs (def_stmt),
4332 signmask);
4335 return pattern_stmt;
4338 if ((cst = uniform_integer_cst_p (oprnd1))
4339 && TYPE_UNSIGNED (itype)
4340 && rhs_code == TRUNC_DIV_EXPR
4341 && vectype
4342 && targetm.vectorize.preferred_div_as_shifts_over_mult (vectype))
4344 /* We can use the relationship:
4346 x // N == ((x+N+2) // (N+1) + x) // (N+1) for 0 <= x < N(N+3)
4348 to optimize cases where N+1 is a power of 2, and where // (N+1)
4349 is therefore a shift right. When operating in modes that are
4350 multiples of a byte in size, there are two cases:
4352 (1) N(N+3) is not representable, in which case the question
4353 becomes whether the replacement expression overflows.
4354 It is enough to test that x+N+2 does not overflow,
4355 i.e. that x < MAX-(N+1).
4357 (2) N(N+3) is representable, in which case it is the (only)
4358 bound that we need to check.
4360 ??? For now we just handle the case where // (N+1) is a shift
4361 right by half the precision, since some architectures can
4362 optimize the associated addition and shift combinations
4363 into single instructions. */
4365 auto wcst = wi::to_wide (cst);
4366 int pow = wi::exact_log2 (wcst + 1);
4367 if (pow == prec / 2)
4369 gimple *stmt = SSA_NAME_DEF_STMT (oprnd0);
4371 gimple_ranger ranger;
4372 int_range_max r;
4374 /* Check that no overflow will occur. If we don't have range
4375 information we can't perform the optimization. */
4377 if (ranger.range_of_expr (r, oprnd0, stmt) && !r.undefined_p ())
4379 wide_int max = r.upper_bound ();
4380 wide_int one = wi::shwi (1, prec);
4381 wide_int adder = wi::add (one, wi::lshift (one, pow));
4382 wi::overflow_type ovf;
4383 wi::add (max, adder, UNSIGNED, &ovf);
4384 if (ovf == wi::OVF_NONE)
4386 *type_out = vectype;
4387 tree tadder = wide_int_to_tree (itype, adder);
4388 tree rshift = wide_int_to_tree (itype, pow);
4390 tree new_lhs1 = vect_recog_temp_ssa_var (itype, NULL);
4391 gassign *patt1
4392 = gimple_build_assign (new_lhs1, PLUS_EXPR, oprnd0, tadder);
4393 append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
4395 tree new_lhs2 = vect_recog_temp_ssa_var (itype, NULL);
4396 patt1 = gimple_build_assign (new_lhs2, RSHIFT_EXPR, new_lhs1,
4397 rshift);
4398 append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
4400 tree new_lhs3 = vect_recog_temp_ssa_var (itype, NULL);
4401 patt1 = gimple_build_assign (new_lhs3, PLUS_EXPR, new_lhs2,
4402 oprnd0);
4403 append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
4405 tree new_lhs4 = vect_recog_temp_ssa_var (itype, NULL);
4406 pattern_stmt = gimple_build_assign (new_lhs4, RSHIFT_EXPR,
4407 new_lhs3, rshift);
4409 return pattern_stmt;
4415 if (prec > HOST_BITS_PER_WIDE_INT
4416 || integer_zerop (oprnd1))
4417 return NULL;
4419 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
4420 return NULL;
4422 if (TYPE_UNSIGNED (itype))
4424 unsigned HOST_WIDE_INT mh, ml;
4425 int pre_shift, post_shift;
4426 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
4427 & GET_MODE_MASK (itype_mode));
4428 tree t1, t2, t3, t4;
4430 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
4431 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
4432 return NULL;
4434 /* Find a suitable multiplier and right shift count
4435 instead of multiplying with D. */
4436 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
4438 /* If the suggested multiplier is more than SIZE bits, we can do better
4439 for even divisors, using an initial right shift. */
4440 if (mh != 0 && (d & 1) == 0)
4442 pre_shift = ctz_or_zero (d);
4443 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
4444 &ml, &post_shift, &dummy_int);
4445 gcc_assert (!mh);
4447 else
4448 pre_shift = 0;
4450 if (mh != 0)
4452 if (post_shift - 1 >= prec)
4453 return NULL;
4455 /* t1 = oprnd0 h* ml;
4456 t2 = oprnd0 - t1;
4457 t3 = t2 >> 1;
4458 t4 = t1 + t3;
4459 q = t4 >> (post_shift - 1); */
4460 t1 = vect_recog_temp_ssa_var (itype, NULL);
4461 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
4462 build_int_cst (itype, ml));
4463 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4465 t2 = vect_recog_temp_ssa_var (itype, NULL);
4466 def_stmt
4467 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
4468 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4470 t3 = vect_recog_temp_ssa_var (itype, NULL);
4471 def_stmt
4472 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
4473 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4475 t4 = vect_recog_temp_ssa_var (itype, NULL);
4476 def_stmt
4477 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
4479 if (post_shift != 1)
4481 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4483 q = vect_recog_temp_ssa_var (itype, NULL);
4484 pattern_stmt
4485 = gimple_build_assign (q, RSHIFT_EXPR, t4,
4486 build_int_cst (itype, post_shift - 1));
4488 else
4490 q = t4;
4491 pattern_stmt = def_stmt;
4494 else
4496 if (pre_shift >= prec || post_shift >= prec)
4497 return NULL;
4499 /* t1 = oprnd0 >> pre_shift;
4500 t2 = t1 h* ml;
4501 q = t2 >> post_shift; */
4502 if (pre_shift)
4504 t1 = vect_recog_temp_ssa_var (itype, NULL);
4505 def_stmt
4506 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
4507 build_int_cst (NULL, pre_shift));
4508 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4510 else
4511 t1 = oprnd0;
4513 t2 = vect_recog_temp_ssa_var (itype, NULL);
4514 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
4515 build_int_cst (itype, ml));
4517 if (post_shift)
4519 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4521 q = vect_recog_temp_ssa_var (itype, NULL);
4522 def_stmt
4523 = gimple_build_assign (q, RSHIFT_EXPR, t2,
4524 build_int_cst (itype, post_shift));
4526 else
4527 q = t2;
4529 pattern_stmt = def_stmt;
4532 else
4534 unsigned HOST_WIDE_INT ml;
4535 int post_shift;
4536 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
4537 unsigned HOST_WIDE_INT abs_d;
4538 bool add = false;
4539 tree t1, t2, t3, t4;
4541 /* Give up for -1. */
4542 if (d == -1)
4543 return NULL;
4545 /* Since d might be INT_MIN, we have to cast to
4546 unsigned HOST_WIDE_INT before negating to avoid
4547 undefined signed overflow. */
4548 abs_d = (d >= 0
4549 ? (unsigned HOST_WIDE_INT) d
4550 : - (unsigned HOST_WIDE_INT) d);
4552 /* n rem d = n rem -d */
4553 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
4555 d = abs_d;
4556 oprnd1 = build_int_cst (itype, abs_d);
4558 if (HOST_BITS_PER_WIDE_INT >= prec
4559 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
4560 /* This case is not handled correctly below. */
4561 return NULL;
4563 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
4564 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
4566 add = true;
4567 ml |= HOST_WIDE_INT_M1U << (prec - 1);
4569 if (post_shift >= prec)
4570 return NULL;
4572 /* t1 = oprnd0 h* ml; */
4573 t1 = vect_recog_temp_ssa_var (itype, NULL);
4574 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
4575 build_int_cst (itype, ml));
4577 if (add)
4579 /* t2 = t1 + oprnd0; */
4580 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4581 t2 = vect_recog_temp_ssa_var (itype, NULL);
4582 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
4584 else
4585 t2 = t1;
4587 if (post_shift)
4589 /* t3 = t2 >> post_shift; */
4590 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4591 t3 = vect_recog_temp_ssa_var (itype, NULL);
4592 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
4593 build_int_cst (itype, post_shift));
4595 else
4596 t3 = t2;
4598 int msb = 1;
4599 value_range r;
4600 get_range_query (cfun)->range_of_expr (r, oprnd0);
4601 if (!r.varying_p () && !r.undefined_p ())
4603 if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
4604 msb = 0;
4605 else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
4606 msb = -1;
4609 if (msb == 0 && d >= 0)
4611 /* q = t3; */
4612 q = t3;
4613 pattern_stmt = def_stmt;
4615 else
4617 /* t4 = oprnd0 >> (prec - 1);
4618 or if we know from VRP that oprnd0 >= 0
4619 t4 = 0;
4620 or if we know from VRP that oprnd0 < 0
4621 t4 = -1; */
4622 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4623 t4 = vect_recog_temp_ssa_var (itype, NULL);
4624 if (msb != 1)
4625 def_stmt = gimple_build_assign (t4, INTEGER_CST,
4626 build_int_cst (itype, msb));
4627 else
4628 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
4629 build_int_cst (itype, prec - 1));
4630 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4632 /* q = t3 - t4; or q = t4 - t3; */
4633 q = vect_recog_temp_ssa_var (itype, NULL);
4634 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
4635 d < 0 ? t3 : t4);
4639 if (rhs_code == TRUNC_MOD_EXPR)
4641 tree r, t1;
4643 /* We divided. Now finish by:
4644 t1 = q * oprnd1;
4645 r = oprnd0 - t1; */
4646 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
4648 t1 = vect_recog_temp_ssa_var (itype, NULL);
4649 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
4650 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4652 r = vect_recog_temp_ssa_var (itype, NULL);
4653 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
4656 /* Pattern detected. */
4657 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
4659 *type_out = vectype;
4660 return pattern_stmt;
4663 /* Function vect_recog_mixed_size_cond_pattern
4665 Try to find the following pattern:
4667 type x_t, y_t;
4668 TYPE a_T, b_T, c_T;
4669 loop:
4670 S1 a_T = x_t CMP y_t ? b_T : c_T;
4672 where type 'TYPE' is an integral type which has different size
4673 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
4674 than 'type', the constants need to fit into an integer type
4675 with the same width as 'type') or results of conversion from 'type'.
4677 Input:
4679 * STMT_VINFO: The stmt from which the pattern search begins.
4681 Output:
4683 * TYPE_OUT: The type of the output of this pattern.
4685 * Return value: A new stmt that will be used to replace the pattern.
4686 Additionally a def_stmt is added.
4688 a_it = x_t CMP y_t ? b_it : c_it;
4689 a_T = (TYPE) a_it; */
4691 static gimple *
4692 vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
4693 stmt_vec_info stmt_vinfo, tree *type_out)
4695 gimple *last_stmt = stmt_vinfo->stmt;
4696 tree cond_expr, then_clause, else_clause;
4697 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
4698 gimple *pattern_stmt, *def_stmt;
4699 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
4700 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
4701 bool promotion;
4702 tree comp_scalar_type;
4704 if (!is_gimple_assign (last_stmt)
4705 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
4706 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
4707 return NULL;
4709 cond_expr = gimple_assign_rhs1 (last_stmt);
4710 then_clause = gimple_assign_rhs2 (last_stmt);
4711 else_clause = gimple_assign_rhs3 (last_stmt);
4713 if (!COMPARISON_CLASS_P (cond_expr))
4714 return NULL;
4716 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
4717 comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
4718 if (comp_vectype == NULL_TREE)
4719 return NULL;
4721 type = TREE_TYPE (gimple_assign_lhs (last_stmt));
4722 if (types_compatible_p (type, comp_scalar_type)
4723 || ((TREE_CODE (then_clause) != INTEGER_CST
4724 || TREE_CODE (else_clause) != INTEGER_CST)
4725 && !INTEGRAL_TYPE_P (comp_scalar_type))
4726 || !INTEGRAL_TYPE_P (type))
4727 return NULL;
4729 if ((TREE_CODE (then_clause) != INTEGER_CST
4730 && !type_conversion_p (vinfo, then_clause, false,
4731 &orig_type0, &def_stmt0, &promotion))
4732 || (TREE_CODE (else_clause) != INTEGER_CST
4733 && !type_conversion_p (vinfo, else_clause, false,
4734 &orig_type1, &def_stmt1, &promotion)))
4735 return NULL;
4737 if (orig_type0 && orig_type1
4738 && !types_compatible_p (orig_type0, orig_type1))
4739 return NULL;
4741 if (orig_type0)
4743 if (!types_compatible_p (orig_type0, comp_scalar_type))
4744 return NULL;
4745 then_clause = gimple_assign_rhs1 (def_stmt0);
4746 itype = orig_type0;
4749 if (orig_type1)
4751 if (!types_compatible_p (orig_type1, comp_scalar_type))
4752 return NULL;
4753 else_clause = gimple_assign_rhs1 (def_stmt1);
4754 itype = orig_type1;
4758 HOST_WIDE_INT cmp_mode_size
4759 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
4761 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
4762 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
4763 return NULL;
4765 vectype = get_vectype_for_scalar_type (vinfo, type);
4766 if (vectype == NULL_TREE)
4767 return NULL;
4769 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
4770 return NULL;
4772 if (itype == NULL_TREE)
4773 itype = build_nonstandard_integer_type (cmp_mode_size,
4774 TYPE_UNSIGNED (type));
4776 if (itype == NULL_TREE
4777 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
4778 return NULL;
4780 vecitype = get_vectype_for_scalar_type (vinfo, itype);
4781 if (vecitype == NULL_TREE)
4782 return NULL;
4784 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
4785 return NULL;
4787 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
4789 if ((TREE_CODE (then_clause) == INTEGER_CST
4790 && !int_fits_type_p (then_clause, itype))
4791 || (TREE_CODE (else_clause) == INTEGER_CST
4792 && !int_fits_type_p (else_clause, itype)))
4793 return NULL;
4796 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4797 COND_EXPR, unshare_expr (cond_expr),
4798 fold_convert (itype, then_clause),
4799 fold_convert (itype, else_clause));
4800 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
4801 NOP_EXPR, gimple_assign_lhs (def_stmt));
4803 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
4804 *type_out = vectype;
4806 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
4808 return pattern_stmt;
4812 /* Helper function of vect_recog_bool_pattern. Called recursively, return
4813 true if bool VAR can and should be optimized that way. Assume it shouldn't
4814 in case it's a result of a comparison which can be directly vectorized into
4815 a vector comparison. Fills in STMTS with all stmts visited during the
4816 walk. */
4818 static bool
4819 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
4821 tree rhs1;
4822 enum tree_code rhs_code;
4824 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
4825 if (!def_stmt_info)
4826 return false;
4828 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
4829 if (!def_stmt)
4830 return false;
4832 if (stmts.contains (def_stmt))
4833 return true;
4835 rhs1 = gimple_assign_rhs1 (def_stmt);
4836 rhs_code = gimple_assign_rhs_code (def_stmt);
4837 switch (rhs_code)
4839 case SSA_NAME:
4840 if (! check_bool_pattern (rhs1, vinfo, stmts))
4841 return false;
4842 break;
4844 CASE_CONVERT:
4845 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
4846 return false;
4847 if (! check_bool_pattern (rhs1, vinfo, stmts))
4848 return false;
4849 break;
4851 case BIT_NOT_EXPR:
4852 if (! check_bool_pattern (rhs1, vinfo, stmts))
4853 return false;
4854 break;
4856 case BIT_AND_EXPR:
4857 case BIT_IOR_EXPR:
4858 case BIT_XOR_EXPR:
4859 if (! check_bool_pattern (rhs1, vinfo, stmts)
4860 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
4861 return false;
4862 break;
4864 default:
4865 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
4867 tree vecitype, comp_vectype;
4869 /* If the comparison can throw, then is_gimple_condexpr will be
4870 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
4871 if (stmt_could_throw_p (cfun, def_stmt))
4872 return false;
4874 comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
4875 if (comp_vectype == NULL_TREE)
4876 return false;
4878 tree mask_type = get_mask_type_for_scalar_type (vinfo,
4879 TREE_TYPE (rhs1));
4880 if (mask_type
4881 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
4882 return false;
4884 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
4886 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
4887 tree itype
4888 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
4889 vecitype = get_vectype_for_scalar_type (vinfo, itype);
4890 if (vecitype == NULL_TREE)
4891 return false;
4893 else
4894 vecitype = comp_vectype;
4895 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
4896 return false;
4898 else
4899 return false;
4900 break;
4903 bool res = stmts.add (def_stmt);
4904 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
4905 gcc_assert (!res);
4907 return true;
4911 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
4912 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
4913 pattern sequence. */
4915 static tree
4916 adjust_bool_pattern_cast (vec_info *vinfo,
4917 tree type, tree var, stmt_vec_info stmt_info)
4919 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
4920 NOP_EXPR, var);
4921 append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
4922 get_vectype_for_scalar_type (vinfo, type));
4923 return gimple_assign_lhs (cast_stmt);
4926 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
4927 VAR is an SSA_NAME that should be transformed from bool to a wider integer
4928 type, OUT_TYPE is the desired final integer type of the whole pattern.
4929 STMT_INFO is the info of the pattern root and is where pattern stmts should
4930 be associated with. DEFS is a map of pattern defs. */
4932 static void
4933 adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
4934 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
4936 gimple *stmt = SSA_NAME_DEF_STMT (var);
4937 enum tree_code rhs_code, def_rhs_code;
4938 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
4939 location_t loc;
4940 gimple *pattern_stmt, *def_stmt;
4941 tree trueval = NULL_TREE;
4943 rhs1 = gimple_assign_rhs1 (stmt);
4944 rhs2 = gimple_assign_rhs2 (stmt);
4945 rhs_code = gimple_assign_rhs_code (stmt);
4946 loc = gimple_location (stmt);
4947 switch (rhs_code)
4949 case SSA_NAME:
4950 CASE_CONVERT:
4951 irhs1 = *defs.get (rhs1);
4952 itype = TREE_TYPE (irhs1);
4953 pattern_stmt
4954 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4955 SSA_NAME, irhs1);
4956 break;
4958 case BIT_NOT_EXPR:
4959 irhs1 = *defs.get (rhs1);
4960 itype = TREE_TYPE (irhs1);
4961 pattern_stmt
4962 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4963 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
4964 break;
4966 case BIT_AND_EXPR:
4967 /* Try to optimize x = y & (a < b ? 1 : 0); into
4968 x = (a < b ? y : 0);
4970 E.g. for:
4971 bool a_b, b_b, c_b;
4972 TYPE d_T;
4974 S1 a_b = x1 CMP1 y1;
4975 S2 b_b = x2 CMP2 y2;
4976 S3 c_b = a_b & b_b;
4977 S4 d_T = (TYPE) c_b;
4979 we would normally emit:
4981 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4982 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4983 S3' c_T = a_T & b_T;
4984 S4' d_T = c_T;
4986 but we can save one stmt by using the
4987 result of one of the COND_EXPRs in the other COND_EXPR and leave
4988 BIT_AND_EXPR stmt out:
4990 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4991 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4992 S4' f_T = c_T;
4994 At least when VEC_COND_EXPR is implemented using masks
4995 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
4996 computes the comparison masks and ands it, in one case with
4997 all ones vector, in the other case with a vector register.
4998 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
4999 often more expensive. */
5000 def_stmt = SSA_NAME_DEF_STMT (rhs2);
5001 def_rhs_code = gimple_assign_rhs_code (def_stmt);
5002 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
5004 irhs1 = *defs.get (rhs1);
5005 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
5006 if (TYPE_PRECISION (TREE_TYPE (irhs1))
5007 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
5009 rhs_code = def_rhs_code;
5010 rhs1 = def_rhs1;
5011 rhs2 = gimple_assign_rhs2 (def_stmt);
5012 trueval = irhs1;
5013 goto do_compare;
5015 else
5016 irhs2 = *defs.get (rhs2);
5017 goto and_ior_xor;
5019 def_stmt = SSA_NAME_DEF_STMT (rhs1);
5020 def_rhs_code = gimple_assign_rhs_code (def_stmt);
5021 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
5023 irhs2 = *defs.get (rhs2);
5024 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
5025 if (TYPE_PRECISION (TREE_TYPE (irhs2))
5026 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
5028 rhs_code = def_rhs_code;
5029 rhs1 = def_rhs1;
5030 rhs2 = gimple_assign_rhs2 (def_stmt);
5031 trueval = irhs2;
5032 goto do_compare;
5034 else
5035 irhs1 = *defs.get (rhs1);
5036 goto and_ior_xor;
5038 /* FALLTHRU */
5039 case BIT_IOR_EXPR:
5040 case BIT_XOR_EXPR:
5041 irhs1 = *defs.get (rhs1);
5042 irhs2 = *defs.get (rhs2);
5043 and_ior_xor:
5044 if (TYPE_PRECISION (TREE_TYPE (irhs1))
5045 != TYPE_PRECISION (TREE_TYPE (irhs2)))
5047 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
5048 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
5049 int out_prec = TYPE_PRECISION (out_type);
5050 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
5051 irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
5052 stmt_info);
5053 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
5054 irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
5055 stmt_info);
5056 else
5058 irhs1 = adjust_bool_pattern_cast (vinfo,
5059 out_type, irhs1, stmt_info);
5060 irhs2 = adjust_bool_pattern_cast (vinfo,
5061 out_type, irhs2, stmt_info);
5064 itype = TREE_TYPE (irhs1);
5065 pattern_stmt
5066 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5067 rhs_code, irhs1, irhs2);
5068 break;
5070 default:
5071 do_compare:
5072 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
5073 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
5074 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
5075 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
5076 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
5078 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
5079 itype
5080 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
5082 else
5083 itype = TREE_TYPE (rhs1);
5084 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
5085 if (trueval == NULL_TREE)
5086 trueval = build_int_cst (itype, 1);
5087 else
5088 gcc_checking_assert (useless_type_conversion_p (itype,
5089 TREE_TYPE (trueval)));
5090 pattern_stmt
5091 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5092 COND_EXPR, cond_expr, trueval,
5093 build_int_cst (itype, 0));
5094 break;
5097 gimple_set_location (pattern_stmt, loc);
5098 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
5099 get_vectype_for_scalar_type (vinfo, itype));
5100 defs.put (var, gimple_assign_lhs (pattern_stmt));
5103 /* Comparison function to qsort a vector of gimple stmts after UID. */
5105 static int
5106 sort_after_uid (const void *p1, const void *p2)
5108 const gimple *stmt1 = *(const gimple * const *)p1;
5109 const gimple *stmt2 = *(const gimple * const *)p2;
5110 return gimple_uid (stmt1) - gimple_uid (stmt2);
5113 /* Create pattern stmts for all stmts participating in the bool pattern
5114 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
5115 OUT_TYPE. Return the def of the pattern root. */
5117 static tree
5118 adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
5119 tree out_type, stmt_vec_info stmt_info)
5121 /* Gather original stmts in the bool pattern in their order of appearance
5122 in the IL. */
5123 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
5124 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
5125 i != bool_stmt_set.end (); ++i)
5126 bool_stmts.quick_push (*i);
5127 bool_stmts.qsort (sort_after_uid);
5129 /* Now process them in that order, producing pattern stmts. */
5130 hash_map <tree, tree> defs;
5131 for (unsigned i = 0; i < bool_stmts.length (); ++i)
5132 adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
5133 out_type, stmt_info, defs);
5135 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
5136 gimple *pattern_stmt
5137 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5138 return gimple_assign_lhs (pattern_stmt);
5141 /* Return the proper type for converting bool VAR into
5142 an integer value or NULL_TREE if no such type exists.
5143 The type is chosen so that the converted value has the
5144 same number of elements as VAR's vector type. */
5146 static tree
5147 integer_type_for_mask (tree var, vec_info *vinfo)
5149 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
5150 return NULL_TREE;
5152 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
5153 if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
5154 return NULL_TREE;
5156 return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
5159 /* Function vect_recog_bool_pattern
5161 Try to find pattern like following:
5163 bool a_b, b_b, c_b, d_b, e_b;
5164 TYPE f_T;
5165 loop:
5166 S1 a_b = x1 CMP1 y1;
5167 S2 b_b = x2 CMP2 y2;
5168 S3 c_b = a_b & b_b;
5169 S4 d_b = x3 CMP3 y3;
5170 S5 e_b = c_b | d_b;
5171 S6 f_T = (TYPE) e_b;
5173 where type 'TYPE' is an integral type. Or a similar pattern
5174 ending in
5176 S6 f_Y = e_b ? r_Y : s_Y;
5178 as results from if-conversion of a complex condition.
5180 Input:
5182 * STMT_VINFO: The stmt at the end from which the pattern
5183 search begins, i.e. cast of a bool to
5184 an integer type.
5186 Output:
5188 * TYPE_OUT: The type of the output of this pattern.
5190 * Return value: A new stmt that will be used to replace the pattern.
5192 Assuming size of TYPE is the same as size of all comparisons
5193 (otherwise some casts would be added where needed), the above
5194 sequence we create related pattern stmts:
5195 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5196 S3' c_T = x2 CMP2 y2 ? a_T : 0;
5197 S4' d_T = x3 CMP3 y3 ? 1 : 0;
5198 S5' e_T = c_T | d_T;
5199 S6' f_T = e_T;
5201 Instead of the above S3' we could emit:
5202 S2' b_T = x2 CMP2 y2 ? 1 : 0;
5203 S3' c_T = a_T | b_T;
5204 but the above is more efficient. */
5206 static gimple *
5207 vect_recog_bool_pattern (vec_info *vinfo,
5208 stmt_vec_info stmt_vinfo, tree *type_out)
5210 gimple *last_stmt = stmt_vinfo->stmt;
5211 enum tree_code rhs_code;
5212 tree var, lhs, rhs, vectype;
5213 gimple *pattern_stmt;
5215 if (!is_gimple_assign (last_stmt))
5216 return NULL;
5218 var = gimple_assign_rhs1 (last_stmt);
5219 lhs = gimple_assign_lhs (last_stmt);
5220 rhs_code = gimple_assign_rhs_code (last_stmt);
5222 if (rhs_code == VIEW_CONVERT_EXPR)
5223 var = TREE_OPERAND (var, 0);
5225 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
5226 return NULL;
5228 hash_set<gimple *> bool_stmts;
5230 if (CONVERT_EXPR_CODE_P (rhs_code)
5231 || rhs_code == VIEW_CONVERT_EXPR)
5233 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
5234 || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5235 return NULL;
5236 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5238 if (check_bool_pattern (var, vinfo, bool_stmts))
5240 rhs = adjust_bool_stmts (vinfo, bool_stmts,
5241 TREE_TYPE (lhs), stmt_vinfo);
5242 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5243 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
5244 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
5245 else
5246 pattern_stmt
5247 = gimple_build_assign (lhs, NOP_EXPR, rhs);
5249 else
5251 tree type = integer_type_for_mask (var, vinfo);
5252 tree cst0, cst1, tmp;
5254 if (!type)
5255 return NULL;
5257 /* We may directly use cond with narrowed type to avoid
5258 multiple cond exprs with following result packing and
5259 perform single cond with packed mask instead. In case
5260 of widening we better make cond first and then extract
5261 results. */
5262 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
5263 type = TREE_TYPE (lhs);
5265 cst0 = build_int_cst (type, 0);
5266 cst1 = build_int_cst (type, 1);
5267 tmp = vect_recog_temp_ssa_var (type, NULL);
5268 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
5270 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
5272 tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
5273 append_pattern_def_seq (vinfo, stmt_vinfo,
5274 pattern_stmt, new_vectype);
5276 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5277 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
5281 *type_out = vectype;
5282 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
5284 return pattern_stmt;
5286 else if (rhs_code == COND_EXPR
5287 && TREE_CODE (var) == SSA_NAME)
5289 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5290 if (vectype == NULL_TREE)
5291 return NULL;
5293 /* Build a scalar type for the boolean result that when
5294 vectorized matches the vector type of the result in
5295 size and number of elements. */
5296 unsigned prec
5297 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
5298 TYPE_VECTOR_SUBPARTS (vectype));
5300 tree type
5301 = build_nonstandard_integer_type (prec,
5302 TYPE_UNSIGNED (TREE_TYPE (var)));
5303 if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
5304 return NULL;
5306 if (check_bool_pattern (var, vinfo, bool_stmts))
5307 var = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
5308 else if (integer_type_for_mask (var, vinfo))
5309 return NULL;
5311 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5312 pattern_stmt
5313 = gimple_build_assign (lhs, COND_EXPR,
5314 build2 (NE_EXPR, boolean_type_node,
5315 var, build_int_cst (TREE_TYPE (var), 0)),
5316 gimple_assign_rhs2 (last_stmt),
5317 gimple_assign_rhs3 (last_stmt));
5318 *type_out = vectype;
5319 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
5321 return pattern_stmt;
5323 else if (rhs_code == SSA_NAME
5324 && STMT_VINFO_DATA_REF (stmt_vinfo))
5326 stmt_vec_info pattern_stmt_info;
5327 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5328 if (!vectype || !VECTOR_MODE_P (TYPE_MODE (vectype)))
5329 return NULL;
5331 if (check_bool_pattern (var, vinfo, bool_stmts))
5332 rhs = adjust_bool_stmts (vinfo, bool_stmts,
5333 TREE_TYPE (vectype), stmt_vinfo);
5334 else
5336 tree type = integer_type_for_mask (var, vinfo);
5337 tree cst0, cst1, new_vectype;
5339 if (!type)
5340 return NULL;
5342 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
5343 type = TREE_TYPE (vectype);
5345 cst0 = build_int_cst (type, 0);
5346 cst1 = build_int_cst (type, 1);
5347 new_vectype = get_vectype_for_scalar_type (vinfo, type);
5349 rhs = vect_recog_temp_ssa_var (type, NULL);
5350 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
5351 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
5354 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
5355 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
5357 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5358 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
5359 append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
5360 rhs = rhs2;
5362 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
5363 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
5364 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
5365 *type_out = vectype;
5366 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
5368 return pattern_stmt;
5370 else
5371 return NULL;
5375 /* A helper for vect_recog_mask_conversion_pattern. Build
5376 conversion of MASK to a type suitable for masking VECTYPE.
5377 Built statement gets required vectype and is appended to
5378 a pattern sequence of STMT_VINFO.
5380 Return converted mask. */
5382 static tree
5383 build_mask_conversion (vec_info *vinfo,
5384 tree mask, tree vectype, stmt_vec_info stmt_vinfo)
5386 gimple *stmt;
5387 tree masktype, tmp;
5389 masktype = truth_type_for (vectype);
5390 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
5391 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
5392 append_pattern_def_seq (vinfo, stmt_vinfo,
5393 stmt, masktype, TREE_TYPE (vectype));
5395 return tmp;
5399 /* Function vect_recog_mask_conversion_pattern
5401 Try to find statements which require boolean type
5402 converison. Additional conversion statements are
5403 added to handle such cases. For example:
5405 bool m_1, m_2, m_3;
5406 int i_4, i_5;
5407 double d_6, d_7;
5408 char c_1, c_2, c_3;
5410 S1 m_1 = i_4 > i_5;
5411 S2 m_2 = d_6 < d_7;
5412 S3 m_3 = m_1 & m_2;
5413 S4 c_1 = m_3 ? c_2 : c_3;
5415 Will be transformed into:
5417 S1 m_1 = i_4 > i_5;
5418 S2 m_2 = d_6 < d_7;
5419 S3'' m_2' = (_Bool[bitsize=32])m_2
5420 S3' m_3' = m_1 & m_2';
5421 S4'' m_3'' = (_Bool[bitsize=8])m_3'
5422 S4' c_1' = m_3'' ? c_2 : c_3; */
5424 static gimple *
5425 vect_recog_mask_conversion_pattern (vec_info *vinfo,
5426 stmt_vec_info stmt_vinfo, tree *type_out)
5428 gimple *last_stmt = stmt_vinfo->stmt;
5429 enum tree_code rhs_code;
5430 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
5431 tree vectype1, vectype2;
5432 stmt_vec_info pattern_stmt_info;
5433 tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
5434 tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
5436 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
5437 if (is_gimple_call (last_stmt)
5438 && gimple_call_internal_p (last_stmt))
5440 gcall *pattern_stmt;
5442 internal_fn ifn = gimple_call_internal_fn (last_stmt);
5443 int mask_argno = internal_fn_mask_index (ifn);
5444 if (mask_argno < 0)
5445 return NULL;
5447 bool store_p = internal_store_fn_p (ifn);
5448 if (store_p)
5450 int rhs_index = internal_fn_stored_value_index (ifn);
5451 tree rhs = gimple_call_arg (last_stmt, rhs_index);
5452 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
5454 else
5456 lhs = gimple_call_lhs (last_stmt);
5457 if (!lhs)
5458 return NULL;
5459 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5462 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
5463 tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
5464 if (!mask_arg_type)
5465 return NULL;
5466 vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
5468 if (!vectype1 || !vectype2
5469 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
5470 TYPE_VECTOR_SUBPARTS (vectype2)))
5471 return NULL;
5473 tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
5475 auto_vec<tree, 8> args;
5476 unsigned int nargs = gimple_call_num_args (last_stmt);
5477 args.safe_grow (nargs, true);
5478 for (unsigned int i = 0; i < nargs; ++i)
5479 args[i] = ((int) i == mask_argno
5480 ? tmp
5481 : gimple_call_arg (last_stmt, i));
5482 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
5484 if (!store_p)
5486 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5487 gimple_call_set_lhs (pattern_stmt, lhs);
5489 gimple_call_set_nothrow (pattern_stmt, true);
5491 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
5492 if (STMT_VINFO_DATA_REF (stmt_vinfo))
5493 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
5495 *type_out = vectype1;
5496 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
5498 return pattern_stmt;
5501 if (!is_gimple_assign (last_stmt))
5502 return NULL;
5504 gimple *pattern_stmt;
5505 lhs = gimple_assign_lhs (last_stmt);
5506 rhs1 = gimple_assign_rhs1 (last_stmt);
5507 rhs_code = gimple_assign_rhs_code (last_stmt);
5509 /* Check for cond expression requiring mask conversion. */
5510 if (rhs_code == COND_EXPR)
5512 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5514 if (TREE_CODE (rhs1) == SSA_NAME)
5516 rhs1_type = integer_type_for_mask (rhs1, vinfo);
5517 if (!rhs1_type)
5518 return NULL;
5520 else if (COMPARISON_CLASS_P (rhs1))
5522 /* Check whether we're comparing scalar booleans and (if so)
5523 whether a better mask type exists than the mask associated
5524 with boolean-sized elements. This avoids unnecessary packs
5525 and unpacks if the booleans are set from comparisons of
5526 wider types. E.g. in:
5528 int x1, x2, x3, x4, y1, y1;
5530 bool b1 = (x1 == x2);
5531 bool b2 = (x3 == x4);
5532 ... = b1 == b2 ? y1 : y2;
5534 it is better for b1 and b2 to use the mask type associated
5535 with int elements rather bool (byte) elements. */
5536 rhs1_op0 = TREE_OPERAND (rhs1, 0);
5537 rhs1_op1 = TREE_OPERAND (rhs1, 1);
5538 if (!rhs1_op0 || !rhs1_op1)
5539 return NULL;
5540 rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
5541 rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
5543 if (!rhs1_op0_type)
5544 rhs1_type = TREE_TYPE (rhs1_op0);
5545 else if (!rhs1_op1_type)
5546 rhs1_type = TREE_TYPE (rhs1_op1);
5547 else if (TYPE_PRECISION (rhs1_op0_type)
5548 != TYPE_PRECISION (rhs1_op1_type))
5550 int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
5551 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
5552 int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
5553 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
5554 if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
5556 if (abs (tmp0) > abs (tmp1))
5557 rhs1_type = rhs1_op1_type;
5558 else
5559 rhs1_type = rhs1_op0_type;
5561 else
5562 rhs1_type = build_nonstandard_integer_type
5563 (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
5565 else
5566 rhs1_type = rhs1_op0_type;
5568 else
5569 return NULL;
5571 vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
5573 if (!vectype1 || !vectype2)
5574 return NULL;
5576 /* Continue if a conversion is needed. Also continue if we have
5577 a comparison whose vector type would normally be different from
5578 VECTYPE2 when considered in isolation. In that case we'll
5579 replace the comparison with an SSA name (so that we can record
5580 its vector type) and behave as though the comparison was an SSA
5581 name from the outset. */
5582 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
5583 TYPE_VECTOR_SUBPARTS (vectype2))
5584 && !rhs1_op0_type
5585 && !rhs1_op1_type)
5586 return NULL;
5588 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
5589 in place, we can handle it in vectorizable_condition. This avoids
5590 unnecessary promotion stmts and increased vectorization factor. */
5591 if (COMPARISON_CLASS_P (rhs1)
5592 && INTEGRAL_TYPE_P (rhs1_type)
5593 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
5594 TYPE_VECTOR_SUBPARTS (vectype2)))
5596 enum vect_def_type dt;
5597 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
5598 && dt == vect_external_def
5599 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
5600 && (dt == vect_external_def
5601 || dt == vect_constant_def))
5603 tree wide_scalar_type = build_nonstandard_integer_type
5604 (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
5605 tree vectype3 = get_vectype_for_scalar_type (vinfo,
5606 wide_scalar_type);
5607 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
5608 return NULL;
5612 /* If rhs1 is a comparison we need to move it into a
5613 separate statement. */
5614 if (TREE_CODE (rhs1) != SSA_NAME)
5616 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
5617 if (rhs1_op0_type
5618 && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
5619 rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
5620 vectype2, stmt_vinfo);
5621 if (rhs1_op1_type
5622 && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
5623 rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
5624 vectype2, stmt_vinfo);
5625 pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
5626 rhs1_op0, rhs1_op1);
5627 rhs1 = tmp;
5628 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
5629 rhs1_type);
5632 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
5633 TYPE_VECTOR_SUBPARTS (vectype2)))
5634 tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
5635 else
5636 tmp = rhs1;
5638 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5639 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
5640 gimple_assign_rhs2 (last_stmt),
5641 gimple_assign_rhs3 (last_stmt));
5643 *type_out = vectype1;
5644 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
5646 return pattern_stmt;
5649 /* Now check for binary boolean operations requiring conversion for
5650 one of operands. */
5651 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5652 return NULL;
5654 if (rhs_code != BIT_IOR_EXPR
5655 && rhs_code != BIT_XOR_EXPR
5656 && rhs_code != BIT_AND_EXPR
5657 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
5658 return NULL;
5660 rhs2 = gimple_assign_rhs2 (last_stmt);
5662 rhs1_type = integer_type_for_mask (rhs1, vinfo);
5663 rhs2_type = integer_type_for_mask (rhs2, vinfo);
5665 if (!rhs1_type || !rhs2_type
5666 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
5667 return NULL;
5669 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
5671 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
5672 if (!vectype1)
5673 return NULL;
5674 rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
5676 else
5678 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
5679 if (!vectype1)
5680 return NULL;
5681 rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
5684 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5685 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
5687 *type_out = vectype1;
5688 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
5690 return pattern_stmt;
5693 /* STMT_INFO is a load or store. If the load or store is conditional, return
5694 the boolean condition under which it occurs, otherwise return null. */
5696 static tree
5697 vect_get_load_store_mask (stmt_vec_info stmt_info)
5699 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
5701 gcc_assert (gimple_assign_single_p (def_assign));
5702 return NULL_TREE;
5705 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
5707 internal_fn ifn = gimple_call_internal_fn (def_call);
5708 int mask_index = internal_fn_mask_index (ifn);
5709 return gimple_call_arg (def_call, mask_index);
5712 gcc_unreachable ();
5715 /* Return MASK if MASK is suitable for masking an operation on vectors
5716 of type VECTYPE, otherwise convert it into such a form and return
5717 the result. Associate any conversion statements with STMT_INFO's
5718 pattern. */
5720 static tree
5721 vect_convert_mask_for_vectype (tree mask, tree vectype,
5722 stmt_vec_info stmt_info, vec_info *vinfo)
5724 tree mask_type = integer_type_for_mask (mask, vinfo);
5725 if (mask_type)
5727 tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
5728 if (mask_vectype
5729 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
5730 TYPE_VECTOR_SUBPARTS (mask_vectype)))
5731 mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
5733 return mask;
5736 /* Return the equivalent of:
5738 fold_convert (TYPE, VALUE)
5740 with the expectation that the operation will be vectorized.
5741 If new statements are needed, add them as pattern statements
5742 to STMT_INFO. */
5744 static tree
5745 vect_add_conversion_to_pattern (vec_info *vinfo,
5746 tree type, tree value, stmt_vec_info stmt_info)
5748 if (useless_type_conversion_p (type, TREE_TYPE (value)))
5749 return value;
5751 tree new_value = vect_recog_temp_ssa_var (type, NULL);
5752 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
5753 append_pattern_def_seq (vinfo, stmt_info, conversion,
5754 get_vectype_for_scalar_type (vinfo, type));
5755 return new_value;
5758 /* Try to convert STMT_INFO into a call to a gather load or scatter store
5759 internal function. Return the final statement on success and set
5760 *TYPE_OUT to the vector type being loaded or stored.
5762 This function only handles gathers and scatters that were recognized
5763 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
5765 static gimple *
5766 vect_recog_gather_scatter_pattern (vec_info *vinfo,
5767 stmt_vec_info stmt_info, tree *type_out)
5769 /* Currently we only support this for loop vectorization. */
5770 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5771 if (!loop_vinfo)
5772 return NULL;
5774 /* Make sure that we're looking at a gather load or scatter store. */
5775 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
5776 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5777 return NULL;
5779 /* Get the boolean that controls whether the load or store happens.
5780 This is null if the operation is unconditional. */
5781 tree mask = vect_get_load_store_mask (stmt_info);
5783 /* Make sure that the target supports an appropriate internal
5784 function for the gather/scatter operation. */
5785 gather_scatter_info gs_info;
5786 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
5787 || gs_info.ifn == IFN_LAST)
5788 return NULL;
5790 /* Convert the mask to the right form. */
5791 tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
5792 gs_info.element_type);
5793 if (mask)
5794 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
5795 loop_vinfo);
5796 else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
5797 || gs_info.ifn == IFN_MASK_GATHER_LOAD)
5798 mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
5800 /* Get the invariant base and non-invariant offset, converting the
5801 latter to the same width as the vector elements. */
5802 tree base = gs_info.base;
5803 tree offset_type = TREE_TYPE (gs_info.offset_vectype);
5804 tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
5805 gs_info.offset, stmt_info);
5807 /* Build the new pattern statement. */
5808 tree scale = size_int (gs_info.scale);
5809 gcall *pattern_stmt;
5810 if (DR_IS_READ (dr))
5812 tree zero = build_zero_cst (gs_info.element_type);
5813 if (mask != NULL)
5814 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
5815 offset, scale, zero, mask);
5816 else
5817 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
5818 offset, scale, zero);
5819 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
5820 gimple_call_set_lhs (pattern_stmt, load_lhs);
5822 else
5824 tree rhs = vect_get_store_rhs (stmt_info);
5825 if (mask != NULL)
5826 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
5827 base, offset, scale, rhs,
5828 mask);
5829 else
5830 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
5831 base, offset, scale, rhs);
5833 gimple_call_set_nothrow (pattern_stmt, true);
5835 /* Copy across relevant vectorization info and associate DR with the
5836 new pattern statement instead of the original statement. */
5837 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
5838 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
5840 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5841 *type_out = vectype;
5842 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
5844 return pattern_stmt;
5847 /* Return true if TYPE is a non-boolean integer type. These are the types
5848 that we want to consider for narrowing. */
5850 static bool
5851 vect_narrowable_type_p (tree type)
5853 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
5856 /* Return true if the operation given by CODE can be truncated to N bits
5857 when only N bits of the output are needed. This is only true if bit N+1
5858 of the inputs has no effect on the low N bits of the result. */
5860 static bool
5861 vect_truncatable_operation_p (tree_code code)
5863 switch (code)
5865 case PLUS_EXPR:
5866 case MINUS_EXPR:
5867 case MULT_EXPR:
5868 case BIT_AND_EXPR:
5869 case BIT_IOR_EXPR:
5870 case BIT_XOR_EXPR:
5871 case COND_EXPR:
5872 return true;
5874 default:
5875 return false;
5879 /* Record that STMT_INFO could be changed from operating on TYPE to
5880 operating on a type with the precision and sign given by PRECISION
5881 and SIGN respectively. PRECISION is an arbitrary bit precision;
5882 it might not be a whole number of bytes. */
5884 static void
5885 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
5886 unsigned int precision, signop sign)
5888 /* Round the precision up to a whole number of bytes. */
5889 precision = vect_element_precision (precision);
5890 if (precision < TYPE_PRECISION (type)
5891 && (!stmt_info->operation_precision
5892 || stmt_info->operation_precision > precision))
5894 stmt_info->operation_precision = precision;
5895 stmt_info->operation_sign = sign;
5899 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
5900 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
5901 is an arbitrary bit precision; it might not be a whole number of bytes. */
5903 static void
5904 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
5905 unsigned int min_input_precision)
5907 /* This operation in isolation only requires the inputs to have
5908 MIN_INPUT_PRECISION of precision, However, that doesn't mean
5909 that MIN_INPUT_PRECISION is a natural precision for the chain
5910 as a whole. E.g. consider something like:
5912 unsigned short *x, *y;
5913 *y = ((*x & 0xf0) >> 4) | (*y << 4);
5915 The right shift can be done on unsigned chars, and only requires the
5916 result of "*x & 0xf0" to be done on unsigned chars. But taking that
5917 approach would mean turning a natural chain of single-vector unsigned
5918 short operations into one that truncates "*x" and then extends
5919 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
5920 operation and one vector for each unsigned char operation.
5921 This would be a significant pessimization.
5923 Instead only propagate the maximum of this precision and the precision
5924 required by the users of the result. This means that we don't pessimize
5925 the case above but continue to optimize things like:
5927 unsigned char *y;
5928 unsigned short *x;
5929 *y = ((*x & 0xf0) >> 4) | (*y << 4);
5931 Here we would truncate two vectors of *x to a single vector of
5932 unsigned chars and use single-vector unsigned char operations for
5933 everything else, rather than doing two unsigned short copies of
5934 "(*x & 0xf0) >> 4" and then truncating the result. */
5935 min_input_precision = MAX (min_input_precision,
5936 stmt_info->min_output_precision);
5938 if (min_input_precision < TYPE_PRECISION (type)
5939 && (!stmt_info->min_input_precision
5940 || stmt_info->min_input_precision > min_input_precision))
5941 stmt_info->min_input_precision = min_input_precision;
5944 /* Subroutine of vect_determine_min_output_precision. Return true if
5945 we can calculate a reduced number of output bits for STMT_INFO,
5946 whose result is LHS. */
5948 static bool
5949 vect_determine_min_output_precision_1 (vec_info *vinfo,
5950 stmt_vec_info stmt_info, tree lhs)
5952 /* Take the maximum precision required by users of the result. */
5953 unsigned int precision = 0;
5954 imm_use_iterator iter;
5955 use_operand_p use;
5956 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
5958 gimple *use_stmt = USE_STMT (use);
5959 if (is_gimple_debug (use_stmt))
5960 continue;
5961 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
5962 if (!use_stmt_info || !use_stmt_info->min_input_precision)
5963 return false;
5964 /* The input precision recorded for COND_EXPRs applies only to the
5965 "then" and "else" values. */
5966 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
5967 if (assign
5968 && gimple_assign_rhs_code (assign) == COND_EXPR
5969 && use->use != gimple_assign_rhs2_ptr (assign)
5970 && use->use != gimple_assign_rhs3_ptr (assign))
5971 return false;
5972 precision = MAX (precision, use_stmt_info->min_input_precision);
5975 if (dump_enabled_p ())
5976 dump_printf_loc (MSG_NOTE, vect_location,
5977 "only the low %d bits of %T are significant\n",
5978 precision, lhs);
5979 stmt_info->min_output_precision = precision;
5980 return true;
5983 /* Calculate min_output_precision for STMT_INFO. */
5985 static void
5986 vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5988 /* We're only interested in statements with a narrowable result. */
5989 tree lhs = gimple_get_lhs (stmt_info->stmt);
5990 if (!lhs
5991 || TREE_CODE (lhs) != SSA_NAME
5992 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
5993 return;
5995 if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
5996 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
5999 /* Use range information to decide whether STMT (described by STMT_INFO)
6000 could be done in a narrower type. This is effectively a forward
6001 propagation, since it uses context-independent information that applies
6002 to all users of an SSA name. */
6004 static void
6005 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
6007 tree lhs = gimple_assign_lhs (stmt);
6008 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
6009 return;
6011 tree type = TREE_TYPE (lhs);
6012 if (!vect_narrowable_type_p (type))
6013 return;
6015 /* First see whether we have any useful range information for the result. */
6016 unsigned int precision = TYPE_PRECISION (type);
6017 signop sign = TYPE_SIGN (type);
6018 wide_int min_value, max_value;
6019 if (!vect_get_range_info (lhs, &min_value, &max_value))
6020 return;
6022 tree_code code = gimple_assign_rhs_code (stmt);
6023 unsigned int nops = gimple_num_ops (stmt);
6025 if (!vect_truncatable_operation_p (code))
6026 /* Check that all relevant input operands are compatible, and update
6027 [MIN_VALUE, MAX_VALUE] to include their ranges. */
6028 for (unsigned int i = 1; i < nops; ++i)
6030 tree op = gimple_op (stmt, i);
6031 if (TREE_CODE (op) == INTEGER_CST)
6033 /* Don't require the integer to have RHS_TYPE (which it might
6034 not for things like shift amounts, etc.), but do require it
6035 to fit the type. */
6036 if (!int_fits_type_p (op, type))
6037 return;
6039 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
6040 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
6042 else if (TREE_CODE (op) == SSA_NAME)
6044 /* Ignore codes that don't take uniform arguments. */
6045 if (!types_compatible_p (TREE_TYPE (op), type))
6046 return;
6048 wide_int op_min_value, op_max_value;
6049 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
6050 return;
6052 min_value = wi::min (min_value, op_min_value, sign);
6053 max_value = wi::max (max_value, op_max_value, sign);
6055 else
6056 return;
6059 /* Try to switch signed types for unsigned types if we can.
6060 This is better for two reasons. First, unsigned ops tend
6061 to be cheaper than signed ops. Second, it means that we can
6062 handle things like:
6064 signed char c;
6065 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
6069 signed char c;
6070 unsigned short res_1 = (unsigned short) c & 0xff00;
6071 int res = (int) res_1;
6073 where the intermediate result res_1 has unsigned rather than
6074 signed type. */
6075 if (sign == SIGNED && !wi::neg_p (min_value))
6076 sign = UNSIGNED;
6078 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
6079 unsigned int precision1 = wi::min_precision (min_value, sign);
6080 unsigned int precision2 = wi::min_precision (max_value, sign);
6081 unsigned int value_precision = MAX (precision1, precision2);
6082 if (value_precision >= precision)
6083 return;
6085 if (dump_enabled_p ())
6086 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
6087 " without loss of precision: %G",
6088 sign == SIGNED ? "signed" : "unsigned",
6089 value_precision, (gimple *) stmt);
6091 vect_set_operation_type (stmt_info, type, value_precision, sign);
6092 vect_set_min_input_precision (stmt_info, type, value_precision);
6095 /* Use information about the users of STMT's result to decide whether
6096 STMT (described by STMT_INFO) could be done in a narrower type.
6097 This is effectively a backward propagation. */
6099 static void
6100 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
6102 tree_code code = gimple_assign_rhs_code (stmt);
6103 unsigned int opno = (code == COND_EXPR ? 2 : 1);
6104 tree type = TREE_TYPE (gimple_op (stmt, opno));
6105 if (!vect_narrowable_type_p (type))
6106 return;
6108 unsigned int precision = TYPE_PRECISION (type);
6109 unsigned int operation_precision, min_input_precision;
6110 switch (code)
6112 CASE_CONVERT:
6113 /* Only the bits that contribute to the output matter. Don't change
6114 the precision of the operation itself. */
6115 operation_precision = precision;
6116 min_input_precision = stmt_info->min_output_precision;
6117 break;
6119 case LSHIFT_EXPR:
6120 case RSHIFT_EXPR:
6122 tree shift = gimple_assign_rhs2 (stmt);
6123 if (TREE_CODE (shift) != INTEGER_CST
6124 || !wi::ltu_p (wi::to_widest (shift), precision))
6125 return;
6126 unsigned int const_shift = TREE_INT_CST_LOW (shift);
6127 if (code == LSHIFT_EXPR)
6129 /* Avoid creating an undefined shift.
6131 ??? We could instead use min_output_precision as-is and
6132 optimize out-of-range shifts to zero. However, only
6133 degenerate testcases shift away all their useful input data,
6134 and it isn't natural to drop input operations in the middle
6135 of vectorization. This sort of thing should really be
6136 handled before vectorization. */
6137 operation_precision = MAX (stmt_info->min_output_precision,
6138 const_shift + 1);
6139 /* We need CONST_SHIFT fewer bits of the input. */
6140 min_input_precision = (MAX (operation_precision, const_shift)
6141 - const_shift);
6143 else
6145 /* We need CONST_SHIFT extra bits to do the operation. */
6146 operation_precision = (stmt_info->min_output_precision
6147 + const_shift);
6148 min_input_precision = operation_precision;
6150 break;
6153 default:
6154 if (vect_truncatable_operation_p (code))
6156 /* Input bit N has no effect on output bits N-1 and lower. */
6157 operation_precision = stmt_info->min_output_precision;
6158 min_input_precision = operation_precision;
6159 break;
6161 return;
6164 if (operation_precision < precision)
6166 if (dump_enabled_p ())
6167 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
6168 " without affecting users: %G",
6169 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
6170 operation_precision, (gimple *) stmt);
6171 vect_set_operation_type (stmt_info, type, operation_precision,
6172 TYPE_SIGN (type));
6174 vect_set_min_input_precision (stmt_info, type, min_input_precision);
6177 /* Return true if the statement described by STMT_INFO sets a boolean
6178 SSA_NAME and if we know how to vectorize this kind of statement using
6179 vector mask types. */
6181 static bool
6182 possible_vector_mask_operation_p (stmt_vec_info stmt_info)
6184 tree lhs = gimple_get_lhs (stmt_info->stmt);
6185 if (!lhs
6186 || TREE_CODE (lhs) != SSA_NAME
6187 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
6188 return false;
6190 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
6192 tree_code rhs_code = gimple_assign_rhs_code (assign);
6193 switch (rhs_code)
6195 CASE_CONVERT:
6196 case SSA_NAME:
6197 case BIT_NOT_EXPR:
6198 case BIT_IOR_EXPR:
6199 case BIT_XOR_EXPR:
6200 case BIT_AND_EXPR:
6201 return true;
6203 default:
6204 return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
6207 else if (is_a <gphi *> (stmt_info->stmt))
6208 return true;
6209 return false;
6212 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
6213 a vector mask type instead of a normal vector type. Record the
6214 result in STMT_INFO->mask_precision. */
6216 static void
6217 vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
6219 if (!possible_vector_mask_operation_p (stmt_info))
6220 return;
6222 /* If at least one boolean input uses a vector mask type,
6223 pick the mask type with the narrowest elements.
6225 ??? This is the traditional behavior. It should always produce
6226 the smallest number of operations, but isn't necessarily the
6227 optimal choice. For example, if we have:
6229 a = b & c
6231 where:
6233 - the user of a wants it to have a mask type for 16-bit elements (M16)
6234 - b also uses M16
6235 - c uses a mask type for 8-bit elements (M8)
6237 then picking M8 gives:
6239 - 1 M16->M8 pack for b
6240 - 1 M8 AND for a
6241 - 2 M8->M16 unpacks for the user of a
6243 whereas picking M16 would have given:
6245 - 2 M8->M16 unpacks for c
6246 - 2 M16 ANDs for a
6248 The number of operations are equal, but M16 would have given
6249 a shorter dependency chain and allowed more ILP. */
6250 unsigned int precision = ~0U;
6251 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
6253 unsigned int nops = gimple_num_ops (assign);
6254 for (unsigned int i = 1; i < nops; ++i)
6256 tree rhs = gimple_op (assign, i);
6257 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
6258 continue;
6260 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
6261 if (!def_stmt_info)
6262 /* Don't let external or constant operands influence the choice.
6263 We can convert them to whichever vector type we pick. */
6264 continue;
6266 if (def_stmt_info->mask_precision)
6268 if (precision > def_stmt_info->mask_precision)
6269 precision = def_stmt_info->mask_precision;
6273 /* If the statement compares two values that shouldn't use vector masks,
6274 try comparing the values as normal scalars instead. */
6275 tree_code rhs_code = gimple_assign_rhs_code (assign);
6276 if (precision == ~0U
6277 && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
6279 tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
6280 scalar_mode mode;
6281 tree vectype, mask_type;
6282 if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
6283 && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
6284 && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
6285 && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
6286 precision = GET_MODE_BITSIZE (mode);
6289 else
6291 gphi *phi = as_a <gphi *> (stmt_info->stmt);
6292 for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
6294 tree rhs = gimple_phi_arg_def (phi, i);
6296 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
6297 if (!def_stmt_info)
6298 /* Don't let external or constant operands influence the choice.
6299 We can convert them to whichever vector type we pick. */
6300 continue;
6302 if (def_stmt_info->mask_precision)
6304 if (precision > def_stmt_info->mask_precision)
6305 precision = def_stmt_info->mask_precision;
6310 if (dump_enabled_p ())
6312 if (precision == ~0U)
6313 dump_printf_loc (MSG_NOTE, vect_location,
6314 "using normal nonmask vectors for %G",
6315 stmt_info->stmt);
6316 else
6317 dump_printf_loc (MSG_NOTE, vect_location,
6318 "using boolean precision %d for %G",
6319 precision, stmt_info->stmt);
6322 stmt_info->mask_precision = precision;
6325 /* Handle vect_determine_precisions for STMT_INFO, given that we
6326 have already done so for the users of its result. */
6328 void
6329 vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
6331 vect_determine_min_output_precision (vinfo, stmt_info);
6332 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
6334 vect_determine_precisions_from_range (stmt_info, stmt);
6335 vect_determine_precisions_from_users (stmt_info, stmt);
6339 /* Walk backwards through the vectorizable region to determine the
6340 values of these fields:
6342 - min_output_precision
6343 - min_input_precision
6344 - operation_precision
6345 - operation_sign. */
6347 void
6348 vect_determine_precisions (vec_info *vinfo)
6350 DUMP_VECT_SCOPE ("vect_determine_precisions");
6352 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
6354 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6355 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
6356 unsigned int nbbs = loop->num_nodes;
6358 for (unsigned int i = 0; i < nbbs; i++)
6360 basic_block bb = bbs[i];
6361 for (auto gsi = gsi_start_phis (bb);
6362 !gsi_end_p (gsi); gsi_next (&gsi))
6364 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6365 if (stmt_info)
6366 vect_determine_mask_precision (vinfo, stmt_info);
6368 for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
6369 if (!is_gimple_debug (gsi_stmt (si)))
6370 vect_determine_mask_precision
6371 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
6373 for (unsigned int i = 0; i < nbbs; i++)
6375 basic_block bb = bbs[nbbs - i - 1];
6376 for (gimple_stmt_iterator si = gsi_last_bb (bb);
6377 !gsi_end_p (si); gsi_prev (&si))
6378 if (!is_gimple_debug (gsi_stmt (si)))
6379 vect_determine_stmt_precisions
6380 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
6381 for (auto gsi = gsi_start_phis (bb);
6382 !gsi_end_p (gsi); gsi_next (&gsi))
6384 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6385 if (stmt_info)
6386 vect_determine_stmt_precisions (vinfo, stmt_info);
6390 else
6392 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
6393 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
6395 basic_block bb = bb_vinfo->bbs[i];
6396 for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6398 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6399 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6400 vect_determine_mask_precision (vinfo, stmt_info);
6402 for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6404 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
6405 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6406 vect_determine_mask_precision (vinfo, stmt_info);
6409 for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
6411 for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]);
6412 !gsi_end_p (gsi); gsi_prev (&gsi))
6414 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
6415 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6416 vect_determine_stmt_precisions (vinfo, stmt_info);
6418 for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]);
6419 !gsi_end_p (gsi); gsi_next (&gsi))
6421 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6422 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6423 vect_determine_stmt_precisions (vinfo, stmt_info);
6429 typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
6431 struct vect_recog_func
6433 vect_recog_func_ptr fn;
6434 const char *name;
6437 /* Note that ordering matters - the first pattern matching on a stmt is
6438 taken which means usually the more complex one needs to preceed the
6439 less comples onex (widen_sum only after dot_prod or sad for example). */
6440 static vect_recog_func vect_vect_recog_func_ptrs[] = {
6441 { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
6442 { vect_recog_bit_insert_pattern, "bit_insert" },
6443 { vect_recog_over_widening_pattern, "over_widening" },
6444 /* Must come after over_widening, which narrows the shift as much as
6445 possible beforehand. */
6446 { vect_recog_average_pattern, "average" },
6447 { vect_recog_cond_expr_convert_pattern, "cond_expr_convert" },
6448 { vect_recog_mulhs_pattern, "mult_high" },
6449 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
6450 { vect_recog_widen_mult_pattern, "widen_mult" },
6451 { vect_recog_dot_prod_pattern, "dot_prod" },
6452 { vect_recog_sad_pattern, "sad" },
6453 { vect_recog_widen_sum_pattern, "widen_sum" },
6454 { vect_recog_pow_pattern, "pow" },
6455 { vect_recog_popcount_clz_ctz_ffs_pattern, "popcount_clz_ctz_ffs" },
6456 { vect_recog_ctz_ffs_pattern, "ctz_ffs" },
6457 { vect_recog_widen_shift_pattern, "widen_shift" },
6458 { vect_recog_rotate_pattern, "rotate" },
6459 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
6460 { vect_recog_divmod_pattern, "divmod" },
6461 { vect_recog_mult_pattern, "mult" },
6462 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
6463 { vect_recog_bool_pattern, "bool" },
6464 /* This must come before mask conversion, and includes the parts
6465 of mask conversion that are needed for gather and scatter
6466 internal functions. */
6467 { vect_recog_gather_scatter_pattern, "gather_scatter" },
6468 { vect_recog_mask_conversion_pattern, "mask_conversion" },
6469 { vect_recog_widen_plus_pattern, "widen_plus" },
6470 { vect_recog_widen_minus_pattern, "widen_minus" },
6473 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
6475 /* Mark statements that are involved in a pattern. */
6477 void
6478 vect_mark_pattern_stmts (vec_info *vinfo,
6479 stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
6480 tree pattern_vectype)
6482 stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
6483 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
6485 gimple *orig_pattern_stmt = NULL;
6486 if (is_pattern_stmt_p (orig_stmt_info))
6488 /* We're replacing a statement in an existing pattern definition
6489 sequence. */
6490 orig_pattern_stmt = orig_stmt_info->stmt;
6491 if (dump_enabled_p ())
6492 dump_printf_loc (MSG_NOTE, vect_location,
6493 "replacing earlier pattern %G", orig_pattern_stmt);
6495 /* To keep the book-keeping simple, just swap the lhs of the
6496 old and new statements, so that the old one has a valid but
6497 unused lhs. */
6498 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
6499 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
6500 gimple_set_lhs (pattern_stmt, old_lhs);
6502 if (dump_enabled_p ())
6503 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
6505 /* Switch to the statement that ORIG replaces. */
6506 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
6508 /* We shouldn't be replacing the main pattern statement. */
6509 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
6510 != orig_pattern_stmt);
6513 if (def_seq)
6514 for (gimple_stmt_iterator si = gsi_start (def_seq);
6515 !gsi_end_p (si); gsi_next (&si))
6517 if (dump_enabled_p ())
6518 dump_printf_loc (MSG_NOTE, vect_location,
6519 "extra pattern stmt: %G", gsi_stmt (si));
6520 stmt_vec_info pattern_stmt_info
6521 = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
6522 orig_stmt_info, pattern_vectype);
6523 /* Stmts in the def sequence are not vectorizable cycle or
6524 induction defs, instead they should all be vect_internal_def
6525 feeding the main pattern stmt which retains this def type. */
6526 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
6529 if (orig_pattern_stmt)
6531 vect_init_pattern_stmt (vinfo, pattern_stmt,
6532 orig_stmt_info, pattern_vectype);
6534 /* Insert all the new pattern statements before the original one. */
6535 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
6536 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
6537 orig_def_seq);
6538 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
6539 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
6541 /* Remove the pattern statement that this new pattern replaces. */
6542 gsi_remove (&gsi, false);
6544 else
6545 vect_set_pattern_stmt (vinfo,
6546 pattern_stmt, orig_stmt_info, pattern_vectype);
6548 /* Transfer reduction path info to the pattern. */
6549 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
6551 gimple_match_op op;
6552 if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op))
6553 gcc_unreachable ();
6554 tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)];
6555 /* Search the pattern def sequence and the main pattern stmt. Note
6556 we may have inserted all into a containing pattern def sequence
6557 so the following is a bit awkward. */
6558 gimple_stmt_iterator si;
6559 gimple *s;
6560 if (def_seq)
6562 si = gsi_start (def_seq);
6563 s = gsi_stmt (si);
6564 gsi_next (&si);
6566 else
6568 si = gsi_none ();
6569 s = pattern_stmt;
6573 bool found = false;
6574 if (gimple_extract_op (s, &op))
6575 for (unsigned i = 0; i < op.num_ops; ++i)
6576 if (op.ops[i] == lookfor)
6578 STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
6579 lookfor = gimple_get_lhs (s);
6580 found = true;
6581 break;
6583 if (s == pattern_stmt)
6585 if (!found && dump_enabled_p ())
6586 dump_printf_loc (MSG_NOTE, vect_location,
6587 "failed to update reduction index.\n");
6588 break;
6590 if (gsi_end_p (si))
6591 s = pattern_stmt;
6592 else
6594 s = gsi_stmt (si);
6595 if (s == pattern_stmt)
6596 /* Found the end inside a bigger pattern def seq. */
6597 si = gsi_none ();
6598 else
6599 gsi_next (&si);
6601 } while (1);
6605 /* Function vect_pattern_recog_1
6607 Input:
6608 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
6609 computation pattern.
6610 STMT_INFO: A stmt from which the pattern search should start.
6612 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
6613 a sequence of statements that has the same functionality and can be
6614 used to replace STMT_INFO. It returns the last statement in the sequence
6615 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
6616 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
6617 statement, having first checked that the target supports the new operation
6618 in that type.
6620 This function also does some bookkeeping, as explained in the documentation
6621 for vect_recog_pattern. */
6623 static void
6624 vect_pattern_recog_1 (vec_info *vinfo,
6625 vect_recog_func *recog_func, stmt_vec_info stmt_info)
6627 gimple *pattern_stmt;
6628 loop_vec_info loop_vinfo;
6629 tree pattern_vectype;
6631 /* If this statement has already been replaced with pattern statements,
6632 leave the original statement alone, since the first match wins.
6633 Instead try to match against the definition statements that feed
6634 the main pattern statement. */
6635 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
6637 gimple_stmt_iterator gsi;
6638 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
6639 !gsi_end_p (gsi); gsi_next (&gsi))
6640 vect_pattern_recog_1 (vinfo, recog_func,
6641 vinfo->lookup_stmt (gsi_stmt (gsi)));
6642 return;
6645 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
6646 pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
6647 if (!pattern_stmt)
6649 /* Clear any half-formed pattern definition sequence. */
6650 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
6651 return;
6654 loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6656 /* Found a vectorizable pattern. */
6657 if (dump_enabled_p ())
6658 dump_printf_loc (MSG_NOTE, vect_location,
6659 "%s pattern recognized: %G",
6660 recog_func->name, pattern_stmt);
6662 /* Mark the stmts that are involved in the pattern. */
6663 vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
6665 /* Patterns cannot be vectorized using SLP, because they change the order of
6666 computation. */
6667 if (loop_vinfo)
6669 unsigned ix, ix2;
6670 stmt_vec_info *elem_ptr;
6671 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
6672 elem_ptr, *elem_ptr == stmt_info);
6677 /* Function vect_pattern_recog
6679 Input:
6680 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
6681 computation idioms.
6683 Output - for each computation idiom that is detected we create a new stmt
6684 that provides the same functionality and that can be vectorized. We
6685 also record some information in the struct_stmt_info of the relevant
6686 stmts, as explained below:
6688 At the entry to this function we have the following stmts, with the
6689 following initial value in the STMT_VINFO fields:
6691 stmt in_pattern_p related_stmt vec_stmt
6692 S1: a_i = .... - - -
6693 S2: a_2 = ..use(a_i).. - - -
6694 S3: a_1 = ..use(a_2).. - - -
6695 S4: a_0 = ..use(a_1).. - - -
6696 S5: ... = ..use(a_0).. - - -
6698 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
6699 represented by a single stmt. We then:
6700 - create a new stmt S6 equivalent to the pattern (the stmt is not
6701 inserted into the code)
6702 - fill in the STMT_VINFO fields as follows:
6704 in_pattern_p related_stmt vec_stmt
6705 S1: a_i = .... - - -
6706 S2: a_2 = ..use(a_i).. - - -
6707 S3: a_1 = ..use(a_2).. - - -
6708 S4: a_0 = ..use(a_1).. true S6 -
6709 '---> S6: a_new = .... - S4 -
6710 S5: ... = ..use(a_0).. - - -
6712 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
6713 to each other through the RELATED_STMT field).
6715 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
6716 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
6717 remain irrelevant unless used by stmts other than S4.
6719 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
6720 (because they are marked as irrelevant). It will vectorize S6, and record
6721 a pointer to the new vector stmt VS6 from S6 (as usual).
6722 S4 will be skipped, and S5 will be vectorized as usual:
6724 in_pattern_p related_stmt vec_stmt
6725 S1: a_i = .... - - -
6726 S2: a_2 = ..use(a_i).. - - -
6727 S3: a_1 = ..use(a_2).. - - -
6728 > VS6: va_new = .... - - -
6729 S4: a_0 = ..use(a_1).. true S6 VS6
6730 '---> S6: a_new = .... - S4 VS6
6731 > VS5: ... = ..vuse(va_new).. - - -
6732 S5: ... = ..use(a_0).. - - -
6734 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
6735 elsewhere), and we'll end up with:
6737 VS6: va_new = ....
6738 VS5: ... = ..vuse(va_new)..
6740 In case of more than one pattern statements, e.g., widen-mult with
6741 intermediate type:
6743 S1 a_t = ;
6744 S2 a_T = (TYPE) a_t;
6745 '--> S3: a_it = (interm_type) a_t;
6746 S4 prod_T = a_T * CONST;
6747 '--> S5: prod_T' = a_it w* CONST;
6749 there may be other users of a_T outside the pattern. In that case S2 will
6750 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
6751 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
6752 be recorded in S3. */
6754 void
6755 vect_pattern_recog (vec_info *vinfo)
6757 class loop *loop;
6758 basic_block *bbs;
6759 unsigned int nbbs;
6760 gimple_stmt_iterator si;
6761 unsigned int i, j;
6763 vect_determine_precisions (vinfo);
6765 DUMP_VECT_SCOPE ("vect_pattern_recog");
6767 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
6769 loop = LOOP_VINFO_LOOP (loop_vinfo);
6770 bbs = LOOP_VINFO_BBS (loop_vinfo);
6771 nbbs = loop->num_nodes;
6773 /* Scan through the loop stmts, applying the pattern recognition
6774 functions starting at each stmt visited: */
6775 for (i = 0; i < nbbs; i++)
6777 basic_block bb = bbs[i];
6778 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
6780 if (is_gimple_debug (gsi_stmt (si)))
6781 continue;
6782 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
6783 /* Scan over all generic vect_recog_xxx_pattern functions. */
6784 for (j = 0; j < NUM_PATTERNS; j++)
6785 vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j],
6786 stmt_info);
6790 else
6792 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
6793 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
6794 for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]);
6795 !gsi_end_p (gsi); gsi_next (&gsi))
6797 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (gsi));
6798 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
6799 continue;
6801 /* Scan over all generic vect_recog_xxx_pattern functions. */
6802 for (j = 0; j < NUM_PATTERNS; j++)
6803 vect_pattern_recog_1 (vinfo,
6804 &vect_vect_recog_func_ptrs[j], stmt_info);
6808 /* After this no more add_stmt calls are allowed. */
6809 vinfo->stmt_vec_info_ro = true;