compiler: only build thunk struct type when it is needed
[official-gcc.git] / gcc / tree-vect-patterns.cc
blobd2bd15b5e9005bce2612f0b32c0acf6ffe776343
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2022 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
49 #include "tree-vector-builder.h"
50 #include "vec-perm-indices.h"
51 #include "gimple-range.h"
54 /* TODO: Note the vectorizer still builds COND_EXPRs with GENERIC compares
55 in the first operand. Disentangling this is future work, the
56 IL is properly transfered to VEC_COND_EXPRs with separate compares. */
59 /* Return true if we have a useful VR_RANGE range for VAR, storing it
60 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
62 static bool
63 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
65 value_range vr;
66 get_range_query (cfun)->range_of_expr (vr, var);
67 if (vr.undefined_p ())
68 vr.set_varying (TREE_TYPE (var));
69 *min_value = wi::to_wide (vr.min ());
70 *max_value = wi::to_wide (vr.max ());
71 value_range_kind vr_type = vr.kind ();
72 wide_int nonzero = get_nonzero_bits (var);
73 signop sgn = TYPE_SIGN (TREE_TYPE (var));
74 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
75 nonzero, sgn) == VR_RANGE)
77 if (dump_enabled_p ())
79 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
80 dump_printf (MSG_NOTE, " has range [");
81 dump_hex (MSG_NOTE, *min_value);
82 dump_printf (MSG_NOTE, ", ");
83 dump_hex (MSG_NOTE, *max_value);
84 dump_printf (MSG_NOTE, "]\n");
86 return true;
88 else
90 if (dump_enabled_p ())
92 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
93 dump_printf (MSG_NOTE, " has no range info\n");
95 return false;
99 /* Report that we've found an instance of pattern PATTERN in
100 statement STMT. */
102 static void
103 vect_pattern_detected (const char *name, gimple *stmt)
105 if (dump_enabled_p ())
106 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
109 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
110 return the pattern statement's stmt_vec_info. Set its vector type to
111 VECTYPE if it doesn't have one already. */
113 static stmt_vec_info
114 vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
115 stmt_vec_info orig_stmt_info, tree vectype)
117 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
118 if (pattern_stmt_info == NULL)
119 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
120 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
122 pattern_stmt_info->pattern_stmt_p = true;
123 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
124 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
125 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
126 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
128 gcc_assert (!vectype
129 || (VECTOR_BOOLEAN_TYPE_P (vectype)
130 == vect_use_mask_type_p (orig_stmt_info)));
131 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
132 pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
134 return pattern_stmt_info;
137 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
138 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
139 have one already. */
141 static void
142 vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
143 stmt_vec_info orig_stmt_info, tree vectype)
145 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
146 STMT_VINFO_RELATED_STMT (orig_stmt_info)
147 = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
150 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
151 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
152 be different from the vector type of the final pattern statement.
153 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
154 from which it was derived. */
156 static inline void
157 append_pattern_def_seq (vec_info *vinfo,
158 stmt_vec_info stmt_info, gimple *new_stmt,
159 tree vectype = NULL_TREE,
160 tree scalar_type_for_mask = NULL_TREE)
162 gcc_assert (!scalar_type_for_mask
163 == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
164 if (vectype)
166 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
167 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
168 if (scalar_type_for_mask)
169 new_stmt_info->mask_precision
170 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
172 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
173 new_stmt);
176 /* The caller wants to perform new operations on vect_external variable
177 VAR, so that the result of the operations would also be vect_external.
178 Return the edge on which the operations can be performed, if one exists.
179 Return null if the operations should instead be treated as part of
180 the pattern that needs them. */
182 static edge
183 vect_get_external_def_edge (vec_info *vinfo, tree var)
185 edge e = NULL;
186 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
188 e = loop_preheader_edge (loop_vinfo->loop);
189 if (!SSA_NAME_IS_DEFAULT_DEF (var))
191 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
192 if (bb == NULL
193 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
194 e = NULL;
197 return e;
200 /* Return true if the target supports a vector version of CODE,
201 where CODE is known to map to a direct optab with the given SUBTYPE.
202 ITYPE specifies the type of (some of) the scalar inputs and OTYPE
203 specifies the type of the scalar result.
205 If CODE allows the inputs and outputs to have different type
206 (such as for WIDEN_SUM_EXPR), it is the input mode rather
207 than the output mode that determines the appropriate target pattern.
208 Operand 0 of the target pattern then specifies the mode that the output
209 must have.
211 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
212 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
213 is nonnull. */
215 static bool
216 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
217 tree itype, tree *vecotype_out,
218 tree *vecitype_out = NULL,
219 enum optab_subtype subtype = optab_default)
221 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
222 if (!vecitype)
223 return false;
225 tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
226 if (!vecotype)
227 return false;
229 optab optab = optab_for_tree_code (code, vecitype, subtype);
230 if (!optab)
231 return false;
233 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
234 if (icode == CODE_FOR_nothing
235 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
236 return false;
238 *vecotype_out = vecotype;
239 if (vecitype_out)
240 *vecitype_out = vecitype;
241 return true;
244 /* Round bit precision PRECISION up to a full element. */
246 static unsigned int
247 vect_element_precision (unsigned int precision)
249 precision = 1 << ceil_log2 (precision);
250 return MAX (precision, BITS_PER_UNIT);
253 /* If OP is defined by a statement that's being considered for vectorization,
254 return information about that statement, otherwise return NULL. */
256 static stmt_vec_info
257 vect_get_internal_def (vec_info *vinfo, tree op)
259 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
260 if (def_stmt_info
261 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
262 return def_stmt_info;
263 return NULL;
266 /* Check whether NAME, an ssa-name used in STMT_VINFO,
267 is a result of a type promotion, such that:
268 DEF_STMT: NAME = NOP (name0)
269 If CHECK_SIGN is TRUE, check that either both types are signed or both are
270 unsigned. */
272 static bool
273 type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
274 tree *orig_type, gimple **def_stmt, bool *promotion)
276 tree type = TREE_TYPE (name);
277 tree oprnd0;
278 enum vect_def_type dt;
280 stmt_vec_info def_stmt_info;
281 if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
282 return false;
284 if (dt != vect_internal_def
285 && dt != vect_external_def && dt != vect_constant_def)
286 return false;
288 if (!*def_stmt)
289 return false;
291 if (!is_gimple_assign (*def_stmt))
292 return false;
294 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
295 return false;
297 oprnd0 = gimple_assign_rhs1 (*def_stmt);
299 *orig_type = TREE_TYPE (oprnd0);
300 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
301 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
302 return false;
304 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
305 *promotion = true;
306 else
307 *promotion = false;
309 if (!vect_is_simple_use (oprnd0, vinfo, &dt))
310 return false;
312 return true;
315 /* Holds information about an input operand after some sign changes
316 and type promotions have been peeled away. */
317 class vect_unpromoted_value {
318 public:
319 vect_unpromoted_value ();
321 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
323 /* The value obtained after peeling away zero or more casts. */
324 tree op;
326 /* The type of OP. */
327 tree type;
329 /* The definition type of OP. */
330 vect_def_type dt;
332 /* If OP is the result of peeling at least one cast, and if the cast
333 of OP itself is a vectorizable statement, CASTER identifies that
334 statement, otherwise it is null. */
335 stmt_vec_info caster;
338 inline vect_unpromoted_value::vect_unpromoted_value ()
339 : op (NULL_TREE),
340 type (NULL_TREE),
341 dt (vect_uninitialized_def),
342 caster (NULL)
346 /* Set the operand to OP_IN, its definition type to DT_IN, and the
347 statement that casts it to CASTER_IN. */
349 inline void
350 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
351 stmt_vec_info caster_in)
353 op = op_in;
354 type = TREE_TYPE (op);
355 dt = dt_in;
356 caster = caster_in;
359 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
360 to reach some vectorizable inner operand OP', continuing as long as it
361 is possible to convert OP' back to OP using a possible sign change
362 followed by a possible promotion P. Return this OP', or null if OP is
363 not a vectorizable SSA name. If there is a promotion P, describe its
364 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
365 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
366 have more than one user.
368 A successful return means that it is possible to go from OP' to OP
369 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
370 whereas the cast from UNPROM to OP might be a promotion, a sign
371 change, or a nop.
373 E.g. say we have:
375 signed short *ptr = ...;
376 signed short C = *ptr;
377 unsigned short B = (unsigned short) C; // sign change
378 signed int A = (signed int) B; // unsigned promotion
379 ...possible other uses of A...
380 unsigned int OP = (unsigned int) A; // sign change
382 In this case it's possible to go directly from C to OP using:
384 OP = (unsigned int) (unsigned short) C;
385 +------------+ +--------------+
386 promotion sign change
388 so OP' would be C. The input to the promotion is B, so UNPROM
389 would describe B. */
391 static tree
392 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
393 vect_unpromoted_value *unprom,
394 bool *single_use_p = NULL)
396 tree res = NULL_TREE;
397 tree op_type = TREE_TYPE (op);
398 unsigned int orig_precision = TYPE_PRECISION (op_type);
399 unsigned int min_precision = orig_precision;
400 stmt_vec_info caster = NULL;
401 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
403 /* See whether OP is simple enough to vectorize. */
404 stmt_vec_info def_stmt_info;
405 gimple *def_stmt;
406 vect_def_type dt;
407 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
408 break;
410 /* If OP is the input of a demotion, skip over it to see whether
411 OP is itself the result of a promotion. If so, the combined
412 effect of the promotion and the demotion might fit the required
413 pattern, otherwise neither operation fits.
415 This copes with cases such as the result of an arithmetic
416 operation being truncated before being stored, and where that
417 arithmetic operation has been recognized as an over-widened one. */
418 if (TYPE_PRECISION (op_type) <= min_precision)
420 /* Use OP as the UNPROM described above if we haven't yet
421 found a promotion, or if using the new input preserves the
422 sign of the previous promotion. */
423 if (!res
424 || TYPE_PRECISION (unprom->type) == orig_precision
425 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
427 unprom->set_op (op, dt, caster);
428 min_precision = TYPE_PRECISION (op_type);
430 /* Stop if we've already seen a promotion and if this
431 conversion does more than change the sign. */
432 else if (TYPE_PRECISION (op_type)
433 != TYPE_PRECISION (unprom->type))
434 break;
436 /* The sequence now extends to OP. */
437 res = op;
440 /* See whether OP is defined by a cast. Record it as CASTER if
441 the cast is potentially vectorizable. */
442 if (!def_stmt)
443 break;
444 caster = def_stmt_info;
446 /* Ignore pattern statements, since we don't link uses for them. */
447 if (caster
448 && single_use_p
449 && !STMT_VINFO_RELATED_STMT (caster)
450 && !has_single_use (res))
451 *single_use_p = false;
453 gassign *assign = dyn_cast <gassign *> (def_stmt);
454 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
455 break;
457 /* Continue with the input to the cast. */
458 op = gimple_assign_rhs1 (def_stmt);
459 op_type = TREE_TYPE (op);
461 return res;
464 /* OP is an integer operand to an operation that returns TYPE, and we
465 want to treat the operation as a widening one. So far we can treat
466 it as widening from *COMMON_TYPE.
468 Return true if OP is suitable for such a widening operation,
469 either widening from *COMMON_TYPE or from some supertype of it.
470 Update *COMMON_TYPE to the supertype in the latter case.
472 SHIFT_P is true if OP is a shift amount. */
474 static bool
475 vect_joust_widened_integer (tree type, bool shift_p, tree op,
476 tree *common_type)
478 /* Calculate the minimum precision required by OP, without changing
479 the sign of either operand. */
480 unsigned int precision;
481 if (shift_p)
483 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
484 return false;
485 precision = TREE_INT_CST_LOW (op);
487 else
489 precision = wi::min_precision (wi::to_widest (op),
490 TYPE_SIGN (*common_type));
491 if (precision * 2 > TYPE_PRECISION (type))
492 return false;
495 /* If OP requires a wider type, switch to that type. The checks
496 above ensure that this is still narrower than the result. */
497 precision = vect_element_precision (precision);
498 if (TYPE_PRECISION (*common_type) < precision)
499 *common_type = build_nonstandard_integer_type
500 (precision, TYPE_UNSIGNED (*common_type));
501 return true;
504 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
505 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
507 static bool
508 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
510 if (types_compatible_p (*common_type, new_type))
511 return true;
513 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
514 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
515 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
516 return true;
518 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
519 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
520 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
522 *common_type = new_type;
523 return true;
526 /* We have mismatched signs, with the signed type being
527 no wider than the unsigned type. In this case we need
528 a wider signed type. */
529 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
530 TYPE_PRECISION (new_type));
531 precision *= 2;
533 if (precision * 2 > TYPE_PRECISION (type))
534 return false;
536 *common_type = build_nonstandard_integer_type (precision, false);
537 return true;
540 /* Check whether STMT_INFO can be viewed as a tree of integer operations
541 in which each node either performs CODE or WIDENED_CODE, and where
542 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
543 specifies the maximum number of leaf operands. SHIFT_P says whether
544 CODE and WIDENED_CODE are some sort of shift.
546 If STMT_INFO is such a tree, return the number of leaf operands
547 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
548 to a type that (a) is narrower than the result of STMT_INFO and
549 (b) can hold all leaf operand values.
551 If SUBTYPE then allow that the signs of the operands
552 may differ in signs but not in precision. SUBTYPE is updated to reflect
553 this.
555 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
556 exists. */
558 static unsigned int
559 vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
560 tree_code widened_code, bool shift_p,
561 unsigned int max_nops,
562 vect_unpromoted_value *unprom, tree *common_type,
563 enum optab_subtype *subtype = NULL)
565 /* Check for an integer operation with the right code. */
566 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
567 if (!assign)
568 return 0;
570 tree_code rhs_code = gimple_assign_rhs_code (assign);
571 if (rhs_code != code && rhs_code != widened_code)
572 return 0;
574 tree type = TREE_TYPE (gimple_assign_lhs (assign));
575 if (!INTEGRAL_TYPE_P (type))
576 return 0;
578 /* Assume that both operands will be leaf operands. */
579 max_nops -= 2;
581 /* Check the operands. */
582 unsigned int next_op = 0;
583 for (unsigned int i = 0; i < 2; ++i)
585 vect_unpromoted_value *this_unprom = &unprom[next_op];
586 unsigned int nops = 1;
587 tree op = gimple_op (assign, i + 1);
588 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
590 /* We already have a common type from earlier operands.
591 Update it to account for OP. */
592 this_unprom->set_op (op, vect_constant_def);
593 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
594 return 0;
596 else
598 /* Only allow shifts by constants. */
599 if (shift_p && i == 1)
600 return 0;
602 if (!vect_look_through_possible_promotion (vinfo, op, this_unprom))
603 return 0;
605 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
607 /* The operand isn't widened. If STMT_INFO has the code
608 for an unwidened operation, recursively check whether
609 this operand is a node of the tree. */
610 if (rhs_code != code
611 || max_nops == 0
612 || this_unprom->dt != vect_internal_def)
613 return 0;
615 /* Give back the leaf slot allocated above now that we're
616 not treating this as a leaf operand. */
617 max_nops += 1;
619 /* Recursively process the definition of the operand. */
620 stmt_vec_info def_stmt_info
621 = vinfo->lookup_def (this_unprom->op);
622 nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
623 widened_code, shift_p, max_nops,
624 this_unprom, common_type,
625 subtype);
626 if (nops == 0)
627 return 0;
629 max_nops -= nops;
631 else
633 /* Make sure that the operand is narrower than the result. */
634 if (TYPE_PRECISION (this_unprom->type) * 2
635 > TYPE_PRECISION (type))
636 return 0;
638 /* Update COMMON_TYPE for the new operand. */
639 if (i == 0)
640 *common_type = this_unprom->type;
641 else if (!vect_joust_widened_type (type, this_unprom->type,
642 common_type))
644 if (subtype)
646 /* See if we can sign extend the smaller type. */
647 if (TYPE_PRECISION (this_unprom->type)
648 > TYPE_PRECISION (*common_type))
649 *common_type = this_unprom->type;
650 *subtype = optab_vector_mixed_sign;
652 else
653 return 0;
657 next_op += nops;
659 return next_op;
662 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
663 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
665 static tree
666 vect_recog_temp_ssa_var (tree type, gimple *stmt)
668 return make_temp_ssa_name (type, stmt, "patt");
671 /* STMT2_INFO describes a type conversion that could be split into STMT1
672 followed by a version of STMT2_INFO that takes NEW_RHS as its first
673 input. Try to do this using pattern statements, returning true on
674 success. */
676 static bool
677 vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
678 gimple *stmt1, tree vectype)
680 if (is_pattern_stmt_p (stmt2_info))
682 /* STMT2_INFO is part of a pattern. Get the statement to which
683 the pattern is attached. */
684 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
685 vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
687 if (dump_enabled_p ())
688 dump_printf_loc (MSG_NOTE, vect_location,
689 "Splitting pattern statement: %G", stmt2_info->stmt);
691 /* Since STMT2_INFO is a pattern statement, we can change it
692 in-situ without worrying about changing the code for the
693 containing block. */
694 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
696 if (dump_enabled_p ())
698 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
699 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
700 stmt2_info->stmt);
703 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
704 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
705 /* STMT2_INFO is the actual pattern statement. Add STMT1
706 to the end of the definition sequence. */
707 gimple_seq_add_stmt_without_update (def_seq, stmt1);
708 else
710 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
711 before it. */
712 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
713 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
715 return true;
717 else
719 /* STMT2_INFO doesn't yet have a pattern. Try to create a
720 two-statement pattern now. */
721 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
722 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
723 tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
724 if (!lhs_vectype)
725 return false;
727 if (dump_enabled_p ())
728 dump_printf_loc (MSG_NOTE, vect_location,
729 "Splitting statement: %G", stmt2_info->stmt);
731 /* Add STMT1 as a singleton pattern definition sequence. */
732 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
733 vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
734 gimple_seq_add_stmt_without_update (def_seq, stmt1);
736 /* Build the second of the two pattern statements. */
737 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
738 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
739 vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
741 if (dump_enabled_p ())
743 dump_printf_loc (MSG_NOTE, vect_location,
744 "into pattern statements: %G", stmt1);
745 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
746 (gimple *) new_stmt2);
749 return true;
753 /* Convert UNPROM to TYPE and return the result, adding new statements
754 to STMT_INFO's pattern definition statements if no better way is
755 available. VECTYPE is the vector form of TYPE.
757 If SUBTYPE then convert the type based on the subtype. */
759 static tree
760 vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
761 vect_unpromoted_value *unprom, tree vectype,
762 enum optab_subtype subtype = optab_default)
764 /* Update the type if the signs differ. */
765 if (subtype == optab_vector_mixed_sign)
767 gcc_assert (!TYPE_UNSIGNED (type));
768 if (TYPE_UNSIGNED (TREE_TYPE (unprom->op)))
770 type = unsigned_type_for (type);
771 vectype = unsigned_type_for (vectype);
775 /* Check for a no-op conversion. */
776 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
777 return unprom->op;
779 /* Allow the caller to create constant vect_unpromoted_values. */
780 if (TREE_CODE (unprom->op) == INTEGER_CST)
781 return wide_int_to_tree (type, wi::to_widest (unprom->op));
783 tree input = unprom->op;
784 if (unprom->caster)
786 tree lhs = gimple_get_lhs (unprom->caster->stmt);
787 tree lhs_type = TREE_TYPE (lhs);
789 /* If the result of the existing cast is the right width, use it
790 instead of the source of the cast. */
791 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
792 input = lhs;
793 /* If the precision we want is between the source and result
794 precisions of the existing cast, try splitting the cast into
795 two and tapping into a mid-way point. */
796 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
797 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
799 /* In order to preserve the semantics of the original cast,
800 give the mid-way point the same signedness as the input value.
802 It would be possible to use a signed type here instead if
803 TYPE is signed and UNPROM->TYPE is unsigned, but that would
804 make the sign of the midtype sensitive to the order in
805 which we process the statements, since the signedness of
806 TYPE is the signedness required by just one of possibly
807 many users. Also, unsigned promotions are usually as cheap
808 as or cheaper than signed ones, so it's better to keep an
809 unsigned promotion. */
810 tree midtype = build_nonstandard_integer_type
811 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
812 tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
813 if (vec_midtype)
815 input = vect_recog_temp_ssa_var (midtype, NULL);
816 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
817 unprom->op);
818 if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
819 vec_midtype))
820 append_pattern_def_seq (vinfo, stmt_info,
821 new_stmt, vec_midtype);
825 /* See if we can reuse an existing result. */
826 if (types_compatible_p (type, TREE_TYPE (input)))
827 return input;
830 /* We need a new conversion statement. */
831 tree new_op = vect_recog_temp_ssa_var (type, NULL);
832 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
834 /* If OP is an external value, see if we can insert the new statement
835 on an incoming edge. */
836 if (input == unprom->op && unprom->dt == vect_external_def)
837 if (edge e = vect_get_external_def_edge (vinfo, input))
839 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
840 gcc_assert (!new_bb);
841 return new_op;
844 /* As a (common) last resort, add the statement to the pattern itself. */
845 append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
846 return new_op;
849 /* Invoke vect_convert_input for N elements of UNPROM and store the
850 result in the corresponding elements of RESULT.
852 If SUBTYPE then convert the type based on the subtype. */
854 static void
855 vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
856 tree *result, tree type, vect_unpromoted_value *unprom,
857 tree vectype, enum optab_subtype subtype = optab_default)
859 for (unsigned int i = 0; i < n; ++i)
861 unsigned int j;
862 for (j = 0; j < i; ++j)
863 if (unprom[j].op == unprom[i].op)
864 break;
866 if (j < i)
867 result[i] = result[j];
868 else
869 result[i] = vect_convert_input (vinfo, stmt_info,
870 type, &unprom[i], vectype, subtype);
874 /* The caller has created a (possibly empty) sequence of pattern definition
875 statements followed by a single statement PATTERN_STMT. Cast the result
876 of this final statement to TYPE. If a new statement is needed, add
877 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
878 and return the new statement, otherwise return PATTERN_STMT as-is.
879 VECITYPE is the vector form of PATTERN_STMT's result type. */
881 static gimple *
882 vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
883 gimple *pattern_stmt, tree vecitype)
885 tree lhs = gimple_get_lhs (pattern_stmt);
886 if (!types_compatible_p (type, TREE_TYPE (lhs)))
888 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
889 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
890 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
892 return pattern_stmt;
895 /* Return true if STMT_VINFO describes a reduction for which reassociation
896 is allowed. If STMT_INFO is part of a group, assume that it's part of
897 a reduction chain and optimistically assume that all statements
898 except the last allow reassociation.
899 Also require it to have code CODE and to be a reduction
900 in the outermost loop. When returning true, store the operands in
901 *OP0_OUT and *OP1_OUT. */
903 static bool
904 vect_reassociating_reduction_p (vec_info *vinfo,
905 stmt_vec_info stmt_info, tree_code code,
906 tree *op0_out, tree *op1_out)
908 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
909 if (!loop_info)
910 return false;
912 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
913 if (!assign || gimple_assign_rhs_code (assign) != code)
914 return false;
916 /* We don't allow changing the order of the computation in the inner-loop
917 when doing outer-loop vectorization. */
918 class loop *loop = LOOP_VINFO_LOOP (loop_info);
919 if (loop && nested_in_vect_loop_p (loop, stmt_info))
920 return false;
922 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
924 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
925 code))
926 return false;
928 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
929 return false;
931 *op0_out = gimple_assign_rhs1 (assign);
932 *op1_out = gimple_assign_rhs2 (assign);
933 if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
934 std::swap (*op0_out, *op1_out);
935 return true;
938 /* match.pd function to match
939 (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
940 with conditions:
941 1) @1, @2, c, d, a, b are all integral type.
942 2) There's single_use for both @1 and @2.
943 3) a, c have same precision.
944 4) c and @1 have different precision.
945 5) c, d are the same type or they can differ in sign when convert is
946 truncation.
948 record a and c and d and @3. */
950 extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree));
952 /* Function vect_recog_cond_expr_convert
954 Try to find the following pattern:
956 TYPE_AB A,B;
957 TYPE_CD C,D;
958 TYPE_E E;
959 TYPE_E op_true = (TYPE_E) A;
960 TYPE_E op_false = (TYPE_E) B;
962 E = C cmp D ? op_true : op_false;
964 where
965 TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
966 TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
967 single_use of op_true and op_false.
968 TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
970 Input:
972 * STMT_VINFO: The stmt from which the pattern search begins.
973 here it starts with E = c cmp D ? op_true : op_false;
975 Output:
977 TYPE1 E' = C cmp D ? A : B;
978 TYPE3 E = (TYPE3) E';
980 There may extra nop_convert for A or B to handle different signness.
982 * TYPE_OUT: The vector type of the output of this pattern.
984 * Return value: A new stmt that will be used to replace the sequence of
985 stmts that constitute the pattern. In this case it will be:
986 E = (TYPE3)E';
987 E' = C cmp D ? A : B; is recorded in pattern definition statements; */
989 static gimple *
990 vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
991 stmt_vec_info stmt_vinfo, tree *type_out)
993 gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
994 tree lhs, match[4], temp, type, new_lhs, op2;
995 gimple *cond_stmt;
996 gimple *pattern_stmt;
998 if (!last_stmt)
999 return NULL;
1001 lhs = gimple_assign_lhs (last_stmt);
1003 /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
1004 TYPE_PRECISION (A) == TYPE_PRECISION (C). */
1005 if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
1006 return NULL;
1008 vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
1010 op2 = match[2];
1011 type = TREE_TYPE (match[1]);
1012 if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
1014 op2 = vect_recog_temp_ssa_var (type, NULL);
1015 gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
1016 append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt,
1017 get_vectype_for_scalar_type (vinfo, type));
1020 temp = vect_recog_temp_ssa_var (type, NULL);
1021 cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
1022 match[1], op2));
1023 append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt,
1024 get_vectype_for_scalar_type (vinfo, type));
1025 new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
1026 pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp);
1027 *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
1029 if (dump_enabled_p ())
1030 dump_printf_loc (MSG_NOTE, vect_location,
1031 "created pattern stmt: %G", pattern_stmt);
1032 return pattern_stmt;
1035 /* Function vect_recog_dot_prod_pattern
1037 Try to find the following pattern:
1039 type1a x_t
1040 type1b y_t;
1041 TYPE1 prod;
1042 TYPE2 sum = init;
1043 loop:
1044 sum_0 = phi <init, sum_1>
1045 S1 x_t = ...
1046 S2 y_t = ...
1047 S3 x_T = (TYPE1) x_t;
1048 S4 y_T = (TYPE1) y_t;
1049 S5 prod = x_T * y_T;
1050 [S6 prod = (TYPE2) prod; #optional]
1051 S7 sum_1 = prod + sum_0;
1053 where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
1054 the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
1055 'type1a' and 'type1b' can differ.
1057 Input:
1059 * STMT_VINFO: The stmt from which the pattern search begins. In the
1060 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
1061 will be detected.
1063 Output:
1065 * TYPE_OUT: The type of the output of this pattern.
1067 * Return value: A new stmt that will be used to replace the sequence of
1068 stmts that constitute the pattern. In this case it will be:
1069 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
1071 Note: The dot-prod idiom is a widening reduction pattern that is
1072 vectorized without preserving all the intermediate results. It
1073 produces only N/2 (widened) results (by summing up pairs of
1074 intermediate results) rather than all N results. Therefore, we
1075 cannot allow this pattern when we want to get all the results and in
1076 the correct order (as is the case when this computation is in an
1077 inner-loop nested in an outer-loop that us being vectorized). */
1079 static gimple *
1080 vect_recog_dot_prod_pattern (vec_info *vinfo,
1081 stmt_vec_info stmt_vinfo, tree *type_out)
1083 tree oprnd0, oprnd1;
1084 gimple *last_stmt = stmt_vinfo->stmt;
1085 tree type, half_type;
1086 gimple *pattern_stmt;
1087 tree var;
1089 /* Look for the following pattern
1090 DX = (TYPE1) X;
1091 DY = (TYPE1) Y;
1092 DPROD = DX * DY;
1093 DDPROD = (TYPE2) DPROD;
1094 sum_1 = DDPROD + sum_0;
1095 In which
1096 - DX is double the size of X
1097 - DY is double the size of Y
1098 - DX, DY, DPROD all have the same type but the sign
1099 between X, Y and DPROD can differ.
1100 - sum is the same size of DPROD or bigger
1101 - sum has been recognized as a reduction variable.
1103 This is equivalent to:
1104 DPROD = X w* Y; #widen mult
1105 sum_1 = DPROD w+ sum_0; #widen summation
1107 DPROD = X w* Y; #widen mult
1108 sum_1 = DPROD + sum_0; #summation
1111 /* Starting from LAST_STMT, follow the defs of its uses in search
1112 of the above pattern. */
1114 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1115 &oprnd0, &oprnd1))
1116 return NULL;
1118 type = TREE_TYPE (gimple_get_lhs (last_stmt));
1120 vect_unpromoted_value unprom_mult;
1121 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
1123 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1124 we know that oprnd1 is the reduction variable (defined by a loop-header
1125 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1126 Left to check that oprnd0 is defined by a (widen_)mult_expr */
1127 if (!oprnd0)
1128 return NULL;
1130 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
1131 if (!mult_vinfo)
1132 return NULL;
1134 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1135 inside the loop (in case we are analyzing an outer-loop). */
1136 vect_unpromoted_value unprom0[2];
1137 enum optab_subtype subtype = optab_vector;
1138 if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
1139 false, 2, unprom0, &half_type, &subtype))
1140 return NULL;
1142 /* If there are two widening operations, make sure they agree on the sign
1143 of the extension. The result of an optab_vector_mixed_sign operation
1144 is signed; otherwise, the result has the same sign as the operands. */
1145 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
1146 && (subtype == optab_vector_mixed_sign
1147 ? TYPE_UNSIGNED (unprom_mult.type)
1148 : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
1149 return NULL;
1151 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
1153 /* If the inputs have mixed signs, canonicalize on using the signed
1154 input type for analysis. This also helps when emulating mixed-sign
1155 operations using signed operations. */
1156 if (subtype == optab_vector_mixed_sign)
1157 half_type = signed_type_for (half_type);
1159 tree half_vectype;
1160 if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1161 type_out, &half_vectype, subtype))
1163 /* We can emulate a mixed-sign dot-product using a sequence of
1164 signed dot-products; see vect_emulate_mixed_dot_prod for details. */
1165 if (subtype != optab_vector_mixed_sign
1166 || !vect_supportable_direct_optab_p (vinfo, signed_type_for (type),
1167 DOT_PROD_EXPR, half_type,
1168 type_out, &half_vectype,
1169 optab_vector))
1170 return NULL;
1172 *type_out = signed_or_unsigned_type_for (TYPE_UNSIGNED (type),
1173 *type_out);
1176 /* Get the inputs in the appropriate types. */
1177 tree mult_oprnd[2];
1178 vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
1179 unprom0, half_vectype, subtype);
1181 var = vect_recog_temp_ssa_var (type, NULL);
1182 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1183 mult_oprnd[0], mult_oprnd[1], oprnd1);
1185 return pattern_stmt;
1189 /* Function vect_recog_sad_pattern
1191 Try to find the following Sum of Absolute Difference (SAD) pattern:
1193 type x_t, y_t;
1194 signed TYPE1 diff, abs_diff;
1195 TYPE2 sum = init;
1196 loop:
1197 sum_0 = phi <init, sum_1>
1198 S1 x_t = ...
1199 S2 y_t = ...
1200 S3 x_T = (TYPE1) x_t;
1201 S4 y_T = (TYPE1) y_t;
1202 S5 diff = x_T - y_T;
1203 S6 abs_diff = ABS_EXPR <diff>;
1204 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1205 S8 sum_1 = abs_diff + sum_0;
1207 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1208 same size of 'TYPE1' or bigger. This is a special case of a reduction
1209 computation.
1211 Input:
1213 * STMT_VINFO: The stmt from which the pattern search begins. In the
1214 example, when this function is called with S8, the pattern
1215 {S3,S4,S5,S6,S7,S8} will be detected.
1217 Output:
1219 * TYPE_OUT: The type of the output of this pattern.
1221 * Return value: A new stmt that will be used to replace the sequence of
1222 stmts that constitute the pattern. In this case it will be:
1223 SAD_EXPR <x_t, y_t, sum_0>
1226 static gimple *
1227 vect_recog_sad_pattern (vec_info *vinfo,
1228 stmt_vec_info stmt_vinfo, tree *type_out)
1230 gimple *last_stmt = stmt_vinfo->stmt;
1231 tree half_type;
1233 /* Look for the following pattern
1234 DX = (TYPE1) X;
1235 DY = (TYPE1) Y;
1236 DDIFF = DX - DY;
1237 DAD = ABS_EXPR <DDIFF>;
1238 DDPROD = (TYPE2) DPROD;
1239 sum_1 = DAD + sum_0;
1240 In which
1241 - DX is at least double the size of X
1242 - DY is at least double the size of Y
1243 - DX, DY, DDIFF, DAD all have the same type
1244 - sum is the same size of DAD or bigger
1245 - sum has been recognized as a reduction variable.
1247 This is equivalent to:
1248 DDIFF = X w- Y; #widen sub
1249 DAD = ABS_EXPR <DDIFF>;
1250 sum_1 = DAD w+ sum_0; #widen summation
1252 DDIFF = X w- Y; #widen sub
1253 DAD = ABS_EXPR <DDIFF>;
1254 sum_1 = DAD + sum_0; #summation
1257 /* Starting from LAST_STMT, follow the defs of its uses in search
1258 of the above pattern. */
1260 tree plus_oprnd0, plus_oprnd1;
1261 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1262 &plus_oprnd0, &plus_oprnd1))
1263 return NULL;
1265 tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
1267 /* Any non-truncating sequence of conversions is OK here, since
1268 with a successful match, the result of the ABS(U) is known to fit
1269 within the nonnegative range of the result type. (It cannot be the
1270 negative of the minimum signed value due to the range of the widening
1271 MINUS_EXPR.) */
1272 vect_unpromoted_value unprom_abs;
1273 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1274 &unprom_abs);
1276 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1277 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1278 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1279 Then check that plus_oprnd0 is defined by an abs_expr. */
1281 if (!plus_oprnd0)
1282 return NULL;
1284 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1285 if (!abs_stmt_vinfo)
1286 return NULL;
1288 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1289 inside the loop (in case we are analyzing an outer-loop). */
1290 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1291 if (!abs_stmt
1292 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1293 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1294 return NULL;
1296 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1297 tree abs_type = TREE_TYPE (abs_oprnd);
1298 if (TYPE_UNSIGNED (abs_type))
1299 return NULL;
1301 /* Peel off conversions from the ABS input. This can involve sign
1302 changes (e.g. from an unsigned subtraction to a signed ABS input)
1303 or signed promotion, but it can't include unsigned promotion.
1304 (Note that ABS of an unsigned promotion should have been folded
1305 away before now anyway.) */
1306 vect_unpromoted_value unprom_diff;
1307 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1308 &unprom_diff);
1309 if (!abs_oprnd)
1310 return NULL;
1311 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1312 && TYPE_UNSIGNED (unprom_diff.type))
1313 return NULL;
1315 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1316 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1317 if (!diff_stmt_vinfo)
1318 return NULL;
1320 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1321 inside the loop (in case we are analyzing an outer-loop). */
1322 vect_unpromoted_value unprom[2];
1323 if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
1324 false, 2, unprom, &half_type))
1325 return NULL;
1327 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1329 tree half_vectype;
1330 if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1331 type_out, &half_vectype))
1332 return NULL;
1334 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1335 tree sad_oprnd[2];
1336 vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
1337 unprom, half_vectype);
1339 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1340 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1341 sad_oprnd[1], plus_oprnd1);
1343 return pattern_stmt;
1346 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1347 so that it can be treated as though it had the form:
1349 A_TYPE a;
1350 B_TYPE b;
1351 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1352 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1353 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1354 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1355 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1357 Try to replace the pattern with:
1359 A_TYPE a;
1360 B_TYPE b;
1361 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1362 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1363 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1364 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1366 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1368 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1369 name of the pattern being matched, for dump purposes. */
1371 static gimple *
1372 vect_recog_widen_op_pattern (vec_info *vinfo,
1373 stmt_vec_info last_stmt_info, tree *type_out,
1374 tree_code orig_code, tree_code wide_code,
1375 bool shift_p, const char *name)
1377 gimple *last_stmt = last_stmt_info->stmt;
1379 vect_unpromoted_value unprom[2];
1380 tree half_type;
1381 if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
1382 shift_p, 2, unprom, &half_type))
1383 return NULL;
1385 /* Pattern detected. */
1386 vect_pattern_detected (name, last_stmt);
1388 tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
1389 tree itype = type;
1390 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1391 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1392 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1393 TYPE_UNSIGNED (half_type));
1395 /* Check target support */
1396 tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1397 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1398 tree ctype = itype;
1399 tree vecctype = vecitype;
1400 if (orig_code == MINUS_EXPR
1401 && TYPE_UNSIGNED (itype)
1402 && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
1404 /* Subtraction is special, even if half_type is unsigned and no matter
1405 whether type is signed or unsigned, if type is wider than itype,
1406 we need to sign-extend from the widening operation result to the
1407 result type.
1408 Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1409 itype unsigned short and type either int or unsigned int.
1410 Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1411 (unsigned short) 0xffff, but for type int we want the result -1
1412 and for type unsigned int 0xffffffff rather than 0xffff. */
1413 ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
1414 vecctype = get_vectype_for_scalar_type (vinfo, ctype);
1417 enum tree_code dummy_code;
1418 int dummy_int;
1419 auto_vec<tree> dummy_vec;
1420 if (!vectype
1421 || !vecitype
1422 || !vecctype
1423 || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1424 vecitype, vectype,
1425 &dummy_code, &dummy_code,
1426 &dummy_int, &dummy_vec))
1427 return NULL;
1429 *type_out = get_vectype_for_scalar_type (vinfo, type);
1430 if (!*type_out)
1431 return NULL;
1433 tree oprnd[2];
1434 vect_convert_inputs (vinfo, last_stmt_info,
1435 2, oprnd, half_type, unprom, vectype);
1437 tree var = vect_recog_temp_ssa_var (itype, NULL);
1438 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1439 oprnd[0], oprnd[1]);
1441 if (vecctype != vecitype)
1442 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
1443 pattern_stmt, vecitype);
1445 return vect_convert_output (vinfo, last_stmt_info,
1446 type, pattern_stmt, vecctype);
1449 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1450 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1452 static gimple *
1453 vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1454 tree *type_out)
1456 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1457 MULT_EXPR, WIDEN_MULT_EXPR, false,
1458 "vect_recog_widen_mult_pattern");
1461 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1462 to WIDEN_PLUS_EXPR. See vect_recog_widen_op_pattern for details. */
1464 static gimple *
1465 vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1466 tree *type_out)
1468 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1469 PLUS_EXPR, WIDEN_PLUS_EXPR, false,
1470 "vect_recog_widen_plus_pattern");
1473 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1474 to WIDEN_MINUS_EXPR. See vect_recog_widen_op_pattern for details. */
1475 static gimple *
1476 vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1477 tree *type_out)
1479 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1480 MINUS_EXPR, WIDEN_MINUS_EXPR, false,
1481 "vect_recog_widen_minus_pattern");
1484 /* Function vect_recog_popcount_pattern
1486 Try to find the following pattern:
1488 UTYPE1 A;
1489 TYPE1 B;
1490 UTYPE2 temp_in;
1491 TYPE3 temp_out;
1492 temp_in = (UTYPE2)A;
1494 temp_out = __builtin_popcount{,l,ll} (temp_in);
1495 B = (TYPE1) temp_out;
1497 TYPE2 may or may not be equal to TYPE3.
1498 i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
1499 i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
1501 Input:
1503 * STMT_VINFO: The stmt from which the pattern search begins.
1504 here it starts with B = (TYPE1) temp_out;
1506 Output:
1508 * TYPE_OUT: The vector type of the output of this pattern.
1510 * Return value: A new stmt that will be used to replace the sequence of
1511 stmts that constitute the pattern. In this case it will be:
1512 B = .POPCOUNT (A);
1515 static gimple *
1516 vect_recog_popcount_pattern (vec_info *vinfo,
1517 stmt_vec_info stmt_vinfo, tree *type_out)
1519 gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
1520 gimple *popcount_stmt, *pattern_stmt;
1521 tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
1522 auto_vec<tree> vargs;
1524 /* Find B = (TYPE1) temp_out. */
1525 if (!last_stmt)
1526 return NULL;
1527 tree_code code = gimple_assign_rhs_code (last_stmt);
1528 if (!CONVERT_EXPR_CODE_P (code))
1529 return NULL;
1531 lhs_oprnd = gimple_assign_lhs (last_stmt);
1532 lhs_type = TREE_TYPE (lhs_oprnd);
1533 if (!INTEGRAL_TYPE_P (lhs_type))
1534 return NULL;
1536 rhs_oprnd = gimple_assign_rhs1 (last_stmt);
1537 if (TREE_CODE (rhs_oprnd) != SSA_NAME
1538 || !has_single_use (rhs_oprnd))
1539 return NULL;
1540 popcount_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
1542 /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
1543 if (!is_gimple_call (popcount_stmt))
1544 return NULL;
1545 switch (gimple_call_combined_fn (popcount_stmt))
1547 CASE_CFN_POPCOUNT:
1548 break;
1549 default:
1550 return NULL;
1553 if (gimple_call_num_args (popcount_stmt) != 1)
1554 return NULL;
1556 rhs_oprnd = gimple_call_arg (popcount_stmt, 0);
1557 vect_unpromoted_value unprom_diff;
1558 rhs_origin = vect_look_through_possible_promotion (vinfo, rhs_oprnd,
1559 &unprom_diff);
1561 if (!rhs_origin)
1562 return NULL;
1564 /* Input and output of .POPCOUNT should be same-precision integer.
1565 Also A should be unsigned or same precision as temp_in,
1566 otherwise there would be sign_extend from A to temp_in. */
1567 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type)
1568 || (!TYPE_UNSIGNED (unprom_diff.type)
1569 && (TYPE_PRECISION (unprom_diff.type)
1570 != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))))
1571 return NULL;
1572 vargs.safe_push (unprom_diff.op);
1574 vect_pattern_detected ("vec_regcog_popcount_pattern", popcount_stmt);
1575 vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
1576 /* Do it only if the backend has popcount<vector_mode>2 pattern. */
1577 if (!vec_type
1578 || !direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
1579 OPTIMIZE_FOR_SPEED))
1580 return NULL;
1582 /* Create B = .POPCOUNT (A). */
1583 new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1584 pattern_stmt = gimple_build_call_internal_vec (IFN_POPCOUNT, vargs);
1585 gimple_call_set_lhs (pattern_stmt, new_var);
1586 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1587 *type_out = vec_type;
1589 if (dump_enabled_p ())
1590 dump_printf_loc (MSG_NOTE, vect_location,
1591 "created pattern stmt: %G", pattern_stmt);
1592 return pattern_stmt;
1595 /* Function vect_recog_pow_pattern
1597 Try to find the following pattern:
1599 x = POW (y, N);
1601 with POW being one of pow, powf, powi, powif and N being
1602 either 2 or 0.5.
1604 Input:
1606 * STMT_VINFO: The stmt from which the pattern search begins.
1608 Output:
1610 * TYPE_OUT: The type of the output of this pattern.
1612 * Return value: A new stmt that will be used to replace the sequence of
1613 stmts that constitute the pattern. In this case it will be:
1614 x = x * x
1616 x = sqrt (x)
1619 static gimple *
1620 vect_recog_pow_pattern (vec_info *vinfo,
1621 stmt_vec_info stmt_vinfo, tree *type_out)
1623 gimple *last_stmt = stmt_vinfo->stmt;
1624 tree base, exp;
1625 gimple *stmt;
1626 tree var;
1628 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1629 return NULL;
1631 switch (gimple_call_combined_fn (last_stmt))
1633 CASE_CFN_POW:
1634 CASE_CFN_POWI:
1635 break;
1637 default:
1638 return NULL;
1641 base = gimple_call_arg (last_stmt, 0);
1642 exp = gimple_call_arg (last_stmt, 1);
1643 if (TREE_CODE (exp) != REAL_CST
1644 && TREE_CODE (exp) != INTEGER_CST)
1646 if (flag_unsafe_math_optimizations
1647 && TREE_CODE (base) == REAL_CST
1648 && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
1650 combined_fn log_cfn;
1651 built_in_function exp_bfn;
1652 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1654 case BUILT_IN_POW:
1655 log_cfn = CFN_BUILT_IN_LOG;
1656 exp_bfn = BUILT_IN_EXP;
1657 break;
1658 case BUILT_IN_POWF:
1659 log_cfn = CFN_BUILT_IN_LOGF;
1660 exp_bfn = BUILT_IN_EXPF;
1661 break;
1662 case BUILT_IN_POWL:
1663 log_cfn = CFN_BUILT_IN_LOGL;
1664 exp_bfn = BUILT_IN_EXPL;
1665 break;
1666 default:
1667 return NULL;
1669 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1670 tree exp_decl = builtin_decl_implicit (exp_bfn);
1671 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1672 does that, but if C is a power of 2, we want to use
1673 exp2 (log2 (C) * x) in the non-vectorized version, but for
1674 vectorization we don't have vectorized exp2. */
1675 if (logc
1676 && TREE_CODE (logc) == REAL_CST
1677 && exp_decl
1678 && lookup_attribute ("omp declare simd",
1679 DECL_ATTRIBUTES (exp_decl)))
1681 cgraph_node *node = cgraph_node::get_create (exp_decl);
1682 if (node->simd_clones == NULL)
1684 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1685 || node->definition)
1686 return NULL;
1687 expand_simd_clones (node);
1688 if (node->simd_clones == NULL)
1689 return NULL;
1691 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1692 if (!*type_out)
1693 return NULL;
1694 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1695 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1696 append_pattern_def_seq (vinfo, stmt_vinfo, g);
1697 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1698 g = gimple_build_call (exp_decl, 1, def);
1699 gimple_call_set_lhs (g, res);
1700 return g;
1704 return NULL;
1707 /* We now have a pow or powi builtin function call with a constant
1708 exponent. */
1710 /* Catch squaring. */
1711 if ((tree_fits_shwi_p (exp)
1712 && tree_to_shwi (exp) == 2)
1713 || (TREE_CODE (exp) == REAL_CST
1714 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1716 if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
1717 TREE_TYPE (base), type_out))
1718 return NULL;
1720 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1721 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1722 return stmt;
1725 /* Catch square root. */
1726 if (TREE_CODE (exp) == REAL_CST
1727 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1729 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1730 if (*type_out
1731 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1732 OPTIMIZE_FOR_SPEED))
1734 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1735 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1736 gimple_call_set_lhs (stmt, var);
1737 gimple_call_set_nothrow (stmt, true);
1738 return stmt;
1742 return NULL;
1746 /* Function vect_recog_widen_sum_pattern
1748 Try to find the following pattern:
1750 type x_t;
1751 TYPE x_T, sum = init;
1752 loop:
1753 sum_0 = phi <init, sum_1>
1754 S1 x_t = *p;
1755 S2 x_T = (TYPE) x_t;
1756 S3 sum_1 = x_T + sum_0;
1758 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1759 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1760 a special case of a reduction computation.
1762 Input:
1764 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1765 when this function is called with S3, the pattern {S2,S3} will be detected.
1767 Output:
1769 * TYPE_OUT: The type of the output of this pattern.
1771 * Return value: A new stmt that will be used to replace the sequence of
1772 stmts that constitute the pattern. In this case it will be:
1773 WIDEN_SUM <x_t, sum_0>
1775 Note: The widening-sum idiom is a widening reduction pattern that is
1776 vectorized without preserving all the intermediate results. It
1777 produces only N/2 (widened) results (by summing up pairs of
1778 intermediate results) rather than all N results. Therefore, we
1779 cannot allow this pattern when we want to get all the results and in
1780 the correct order (as is the case when this computation is in an
1781 inner-loop nested in an outer-loop that us being vectorized). */
1783 static gimple *
1784 vect_recog_widen_sum_pattern (vec_info *vinfo,
1785 stmt_vec_info stmt_vinfo, tree *type_out)
1787 gimple *last_stmt = stmt_vinfo->stmt;
1788 tree oprnd0, oprnd1;
1789 tree type;
1790 gimple *pattern_stmt;
1791 tree var;
1793 /* Look for the following pattern
1794 DX = (TYPE) X;
1795 sum_1 = DX + sum_0;
1796 In which DX is at least double the size of X, and sum_1 has been
1797 recognized as a reduction variable.
1800 /* Starting from LAST_STMT, follow the defs of its uses in search
1801 of the above pattern. */
1803 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1804 &oprnd0, &oprnd1))
1805 return NULL;
1807 type = TREE_TYPE (gimple_get_lhs (last_stmt));
1809 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1810 we know that oprnd1 is the reduction variable (defined by a loop-header
1811 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1812 Left to check that oprnd0 is defined by a cast from type 'type' to type
1813 'TYPE'. */
1815 vect_unpromoted_value unprom0;
1816 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1817 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1818 return NULL;
1820 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1822 if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
1823 unprom0.type, type_out))
1824 return NULL;
1826 var = vect_recog_temp_ssa_var (type, NULL);
1827 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1829 return pattern_stmt;
1832 /* Recognize cases in which an operation is performed in one type WTYPE
1833 but could be done more efficiently in a narrower type NTYPE. For example,
1834 if we have:
1836 ATYPE a; // narrower than NTYPE
1837 BTYPE b; // narrower than NTYPE
1838 WTYPE aw = (WTYPE) a;
1839 WTYPE bw = (WTYPE) b;
1840 WTYPE res = aw + bw; // only uses of aw and bw
1842 then it would be more efficient to do:
1844 NTYPE an = (NTYPE) a;
1845 NTYPE bn = (NTYPE) b;
1846 NTYPE resn = an + bn;
1847 WTYPE res = (WTYPE) resn;
1849 Other situations include things like:
1851 ATYPE a; // NTYPE or narrower
1852 WTYPE aw = (WTYPE) a;
1853 WTYPE res = aw + b;
1855 when only "(NTYPE) res" is significant. In that case it's more efficient
1856 to truncate "b" and do the operation on NTYPE instead:
1858 NTYPE an = (NTYPE) a;
1859 NTYPE bn = (NTYPE) b; // truncation
1860 NTYPE resn = an + bn;
1861 WTYPE res = (WTYPE) resn;
1863 All users of "res" should then use "resn" instead, making the final
1864 statement dead (not marked as relevant). The final statement is still
1865 needed to maintain the type correctness of the IR.
1867 vect_determine_precisions has already determined the minimum
1868 precison of the operation and the minimum precision required
1869 by users of the result. */
1871 static gimple *
1872 vect_recog_over_widening_pattern (vec_info *vinfo,
1873 stmt_vec_info last_stmt_info, tree *type_out)
1875 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1876 if (!last_stmt)
1877 return NULL;
1879 /* See whether we have found that this operation can be done on a
1880 narrower type without changing its semantics. */
1881 unsigned int new_precision = last_stmt_info->operation_precision;
1882 if (!new_precision)
1883 return NULL;
1885 tree lhs = gimple_assign_lhs (last_stmt);
1886 tree type = TREE_TYPE (lhs);
1887 tree_code code = gimple_assign_rhs_code (last_stmt);
1889 /* Punt for reductions where we don't handle the type conversions. */
1890 if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
1891 return NULL;
1893 /* Keep the first operand of a COND_EXPR as-is: only the other two
1894 operands are interesting. */
1895 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1897 /* Check the operands. */
1898 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1899 auto_vec <vect_unpromoted_value, 3> unprom (nops);
1900 unprom.quick_grow (nops);
1901 unsigned int min_precision = 0;
1902 bool single_use_p = false;
1903 for (unsigned int i = 0; i < nops; ++i)
1905 tree op = gimple_op (last_stmt, first_op + i);
1906 if (TREE_CODE (op) == INTEGER_CST)
1907 unprom[i].set_op (op, vect_constant_def);
1908 else if (TREE_CODE (op) == SSA_NAME)
1910 bool op_single_use_p = true;
1911 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1912 &op_single_use_p))
1913 return NULL;
1914 /* If:
1916 (1) N bits of the result are needed;
1917 (2) all inputs are widened from M<N bits; and
1918 (3) one operand OP is a single-use SSA name
1920 we can shift the M->N widening from OP to the output
1921 without changing the number or type of extensions involved.
1922 This then reduces the number of copies of STMT_INFO.
1924 If instead of (3) more than one operand is a single-use SSA name,
1925 shifting the extension to the output is even more of a win.
1927 If instead:
1929 (1) N bits of the result are needed;
1930 (2) one operand OP2 is widened from M2<N bits;
1931 (3) another operand OP1 is widened from M1<M2 bits; and
1932 (4) both OP1 and OP2 are single-use
1934 the choice is between:
1936 (a) truncating OP2 to M1, doing the operation on M1,
1937 and then widening the result to N
1939 (b) widening OP1 to M2, doing the operation on M2, and then
1940 widening the result to N
1942 Both shift the M2->N widening of the inputs to the output.
1943 (a) additionally shifts the M1->M2 widening to the output;
1944 it requires fewer copies of STMT_INFO but requires an extra
1945 M2->M1 truncation.
1947 Which is better will depend on the complexity and cost of
1948 STMT_INFO, which is hard to predict at this stage. However,
1949 a clear tie-breaker in favor of (b) is the fact that the
1950 truncation in (a) increases the length of the operation chain.
1952 If instead of (4) only one of OP1 or OP2 is single-use,
1953 (b) is still a win over doing the operation in N bits:
1954 it still shifts the M2->N widening on the single-use operand
1955 to the output and reduces the number of STMT_INFO copies.
1957 If neither operand is single-use then operating on fewer than
1958 N bits might lead to more extensions overall. Whether it does
1959 or not depends on global information about the vectorization
1960 region, and whether that's a good trade-off would again
1961 depend on the complexity and cost of the statements involved,
1962 as well as things like register pressure that are not normally
1963 modelled at this stage. We therefore ignore these cases
1964 and just optimize the clear single-use wins above.
1966 Thus we take the maximum precision of the unpromoted operands
1967 and record whether any operand is single-use. */
1968 if (unprom[i].dt == vect_internal_def)
1970 min_precision = MAX (min_precision,
1971 TYPE_PRECISION (unprom[i].type));
1972 single_use_p |= op_single_use_p;
1975 else
1976 return NULL;
1979 /* Although the operation could be done in operation_precision, we have
1980 to balance that against introducing extra truncations or extensions.
1981 Calculate the minimum precision that can be handled efficiently.
1983 The loop above determined that the operation could be handled
1984 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1985 extension from the inputs to the output without introducing more
1986 instructions, and would reduce the number of instructions required
1987 for STMT_INFO itself.
1989 vect_determine_precisions has also determined that the result only
1990 needs min_output_precision bits. Truncating by a factor of N times
1991 requires a tree of N - 1 instructions, so if TYPE is N times wider
1992 than min_output_precision, doing the operation in TYPE and truncating
1993 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1994 In contrast:
1996 - truncating the input to a unary operation and doing the operation
1997 in the new type requires at most N - 1 + 1 = N instructions per
1998 output vector
2000 - doing the same for a binary operation requires at most
2001 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
2003 Both unary and binary operations require fewer instructions than
2004 this if the operands were extended from a suitable truncated form.
2005 Thus there is usually nothing to lose by doing operations in
2006 min_output_precision bits, but there can be something to gain. */
2007 if (!single_use_p)
2008 min_precision = last_stmt_info->min_output_precision;
2009 else
2010 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
2012 /* Apply the minimum efficient precision we just calculated. */
2013 if (new_precision < min_precision)
2014 new_precision = min_precision;
2015 new_precision = vect_element_precision (new_precision);
2016 if (new_precision >= TYPE_PRECISION (type))
2017 return NULL;
2019 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
2021 *type_out = get_vectype_for_scalar_type (vinfo, type);
2022 if (!*type_out)
2023 return NULL;
2025 /* We've found a viable pattern. Get the new type of the operation. */
2026 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
2027 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
2029 /* If we're truncating an operation, we need to make sure that we
2030 don't introduce new undefined overflow. The codes tested here are
2031 a subset of those accepted by vect_truncatable_operation_p. */
2032 tree op_type = new_type;
2033 if (TYPE_OVERFLOW_UNDEFINED (new_type)
2034 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
2035 op_type = build_nonstandard_integer_type (new_precision, true);
2037 /* We specifically don't check here whether the target supports the
2038 new operation, since it might be something that a later pattern
2039 wants to rewrite anyway. If targets have a minimum element size
2040 for some optabs, we should pattern-match smaller ops to larger ops
2041 where beneficial. */
2042 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2043 tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
2044 if (!new_vectype || !op_vectype)
2045 return NULL;
2047 if (dump_enabled_p ())
2048 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
2049 type, new_type);
2051 /* Calculate the rhs operands for an operation on OP_TYPE. */
2052 tree ops[3] = {};
2053 for (unsigned int i = 1; i < first_op; ++i)
2054 ops[i - 1] = gimple_op (last_stmt, i);
2055 vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
2056 op_type, &unprom[0], op_vectype);
2058 /* Use the operation to produce a result of type OP_TYPE. */
2059 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
2060 gimple *pattern_stmt = gimple_build_assign (new_var, code,
2061 ops[0], ops[1], ops[2]);
2062 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2064 if (dump_enabled_p ())
2065 dump_printf_loc (MSG_NOTE, vect_location,
2066 "created pattern stmt: %G", pattern_stmt);
2068 /* Convert back to the original signedness, if OP_TYPE is different
2069 from NEW_TYPE. */
2070 if (op_type != new_type)
2071 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
2072 pattern_stmt, op_vectype);
2074 /* Promote the result to the original type. */
2075 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
2076 pattern_stmt, new_vectype);
2078 return pattern_stmt;
2081 /* Recognize the following patterns:
2083 ATYPE a; // narrower than TYPE
2084 BTYPE b; // narrower than TYPE
2086 1) Multiply high with scaling
2087 TYPE res = ((TYPE) a * (TYPE) b) >> c;
2088 Here, c is bitsize (TYPE) / 2 - 1.
2090 2) ... or also with rounding
2091 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
2092 Here, d is bitsize (TYPE) / 2 - 2.
2094 3) Normal multiply high
2095 TYPE res = ((TYPE) a * (TYPE) b) >> e;
2096 Here, e is bitsize (TYPE) / 2.
2098 where only the bottom half of res is used. */
2100 static gimple *
2101 vect_recog_mulhs_pattern (vec_info *vinfo,
2102 stmt_vec_info last_stmt_info, tree *type_out)
2104 /* Check for a right shift. */
2105 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2106 if (!last_stmt
2107 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
2108 return NULL;
2110 /* Check that the shift result is wider than the users of the
2111 result need (i.e. that narrowing would be a natural choice). */
2112 tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
2113 unsigned int target_precision
2114 = vect_element_precision (last_stmt_info->min_output_precision);
2115 if (!INTEGRAL_TYPE_P (lhs_type)
2116 || target_precision >= TYPE_PRECISION (lhs_type))
2117 return NULL;
2119 /* Look through any change in sign on the outer shift input. */
2120 vect_unpromoted_value unprom_rshift_input;
2121 tree rshift_input = vect_look_through_possible_promotion
2122 (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
2123 if (!rshift_input
2124 || TYPE_PRECISION (TREE_TYPE (rshift_input))
2125 != TYPE_PRECISION (lhs_type))
2126 return NULL;
2128 /* Get the definition of the shift input. */
2129 stmt_vec_info rshift_input_stmt_info
2130 = vect_get_internal_def (vinfo, rshift_input);
2131 if (!rshift_input_stmt_info)
2132 return NULL;
2133 gassign *rshift_input_stmt
2134 = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
2135 if (!rshift_input_stmt)
2136 return NULL;
2138 stmt_vec_info mulh_stmt_info;
2139 tree scale_term;
2140 bool rounding_p = false;
2142 /* Check for the presence of the rounding term. */
2143 if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
2145 /* Check that the outer shift was by 1. */
2146 if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
2147 return NULL;
2149 /* Check that the second operand of the PLUS_EXPR is 1. */
2150 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
2151 return NULL;
2153 /* Look through any change in sign on the addition input. */
2154 vect_unpromoted_value unprom_plus_input;
2155 tree plus_input = vect_look_through_possible_promotion
2156 (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
2157 if (!plus_input
2158 || TYPE_PRECISION (TREE_TYPE (plus_input))
2159 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
2160 return NULL;
2162 /* Get the definition of the multiply-high-scale part. */
2163 stmt_vec_info plus_input_stmt_info
2164 = vect_get_internal_def (vinfo, plus_input);
2165 if (!plus_input_stmt_info)
2166 return NULL;
2167 gassign *plus_input_stmt
2168 = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
2169 if (!plus_input_stmt
2170 || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
2171 return NULL;
2173 /* Look through any change in sign on the scaling input. */
2174 vect_unpromoted_value unprom_scale_input;
2175 tree scale_input = vect_look_through_possible_promotion
2176 (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
2177 if (!scale_input
2178 || TYPE_PRECISION (TREE_TYPE (scale_input))
2179 != TYPE_PRECISION (TREE_TYPE (plus_input)))
2180 return NULL;
2182 /* Get the definition of the multiply-high part. */
2183 mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
2184 if (!mulh_stmt_info)
2185 return NULL;
2187 /* Get the scaling term. */
2188 scale_term = gimple_assign_rhs2 (plus_input_stmt);
2189 rounding_p = true;
2191 else
2193 mulh_stmt_info = rshift_input_stmt_info;
2194 scale_term = gimple_assign_rhs2 (last_stmt);
2197 /* Check that the scaling factor is constant. */
2198 if (TREE_CODE (scale_term) != INTEGER_CST)
2199 return NULL;
2201 /* Check whether the scaling input term can be seen as two widened
2202 inputs multiplied together. */
2203 vect_unpromoted_value unprom_mult[2];
2204 tree new_type;
2205 unsigned int nops
2206 = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
2207 false, 2, unprom_mult, &new_type);
2208 if (nops != 2)
2209 return NULL;
2211 /* Adjust output precision. */
2212 if (TYPE_PRECISION (new_type) < target_precision)
2213 new_type = build_nonstandard_integer_type
2214 (target_precision, TYPE_UNSIGNED (new_type));
2216 unsigned mult_precision = TYPE_PRECISION (new_type);
2217 internal_fn ifn;
2218 /* Check that the scaling factor is expected. Instead of
2219 target_precision, we should use the one that we actually
2220 use for internal function. */
2221 if (rounding_p)
2223 /* Check pattern 2). */
2224 if (wi::to_widest (scale_term) + mult_precision + 2
2225 != TYPE_PRECISION (lhs_type))
2226 return NULL;
2228 ifn = IFN_MULHRS;
2230 else
2232 /* Check for pattern 1). */
2233 if (wi::to_widest (scale_term) + mult_precision + 1
2234 == TYPE_PRECISION (lhs_type))
2235 ifn = IFN_MULHS;
2236 /* Check for pattern 3). */
2237 else if (wi::to_widest (scale_term) + mult_precision
2238 == TYPE_PRECISION (lhs_type))
2239 ifn = IFN_MULH;
2240 else
2241 return NULL;
2244 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
2246 /* Check for target support. */
2247 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2248 if (!new_vectype
2249 || !direct_internal_fn_supported_p
2250 (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2251 return NULL;
2253 /* The IR requires a valid vector type for the cast result, even though
2254 it's likely to be discarded. */
2255 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2256 if (!*type_out)
2257 return NULL;
2259 /* Generate the IFN_MULHRS call. */
2260 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2261 tree new_ops[2];
2262 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2263 unprom_mult, new_vectype);
2264 gcall *mulhrs_stmt
2265 = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
2266 gimple_call_set_lhs (mulhrs_stmt, new_var);
2267 gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
2269 if (dump_enabled_p ())
2270 dump_printf_loc (MSG_NOTE, vect_location,
2271 "created pattern stmt: %G", (gimple *) mulhrs_stmt);
2273 return vect_convert_output (vinfo, last_stmt_info, lhs_type,
2274 mulhrs_stmt, new_vectype);
2277 /* Recognize the patterns:
2279 ATYPE a; // narrower than TYPE
2280 BTYPE b; // narrower than TYPE
2281 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
2282 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
2284 where only the bottom half of avg is used. Try to transform them into:
2286 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
2287 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
2289 followed by:
2291 TYPE avg = (TYPE) avg';
2293 where NTYPE is no wider than half of TYPE. Since only the bottom half
2294 of avg is used, all or part of the cast of avg' should become redundant.
2296 If there is no target support available, generate code to distribute rshift
2297 over plus and add a carry. */
2299 static gimple *
2300 vect_recog_average_pattern (vec_info *vinfo,
2301 stmt_vec_info last_stmt_info, tree *type_out)
2303 /* Check for a shift right by one bit. */
2304 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2305 if (!last_stmt
2306 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
2307 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
2308 return NULL;
2310 /* Check that the shift result is wider than the users of the
2311 result need (i.e. that narrowing would be a natural choice). */
2312 tree lhs = gimple_assign_lhs (last_stmt);
2313 tree type = TREE_TYPE (lhs);
2314 unsigned int target_precision
2315 = vect_element_precision (last_stmt_info->min_output_precision);
2316 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
2317 return NULL;
2319 /* Look through any change in sign on the shift input. */
2320 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
2321 vect_unpromoted_value unprom_plus;
2322 rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
2323 &unprom_plus);
2324 if (!rshift_rhs
2325 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
2326 return NULL;
2328 /* Get the definition of the shift input. */
2329 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
2330 if (!plus_stmt_info)
2331 return NULL;
2333 /* Check whether the shift input can be seen as a tree of additions on
2334 2 or 3 widened inputs.
2336 Note that the pattern should be a win even if the result of one or
2337 more additions is reused elsewhere: if the pattern matches, we'd be
2338 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
2339 internal_fn ifn = IFN_AVG_FLOOR;
2340 vect_unpromoted_value unprom[3];
2341 tree new_type;
2342 unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
2343 WIDEN_PLUS_EXPR, false, 3,
2344 unprom, &new_type);
2345 if (nops == 0)
2346 return NULL;
2347 if (nops == 3)
2349 /* Check that one operand is 1. */
2350 unsigned int i;
2351 for (i = 0; i < 3; ++i)
2352 if (integer_onep (unprom[i].op))
2353 break;
2354 if (i == 3)
2355 return NULL;
2356 /* Throw away the 1 operand and keep the other two. */
2357 if (i < 2)
2358 unprom[i] = unprom[2];
2359 ifn = IFN_AVG_CEIL;
2362 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
2364 /* We know that:
2366 (a) the operation can be viewed as:
2368 TYPE widened0 = (TYPE) UNPROM[0];
2369 TYPE widened1 = (TYPE) UNPROM[1];
2370 TYPE tmp1 = widened0 + widened1 {+ 1};
2371 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
2373 (b) the first two statements are equivalent to:
2375 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
2376 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
2378 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
2379 where sensible;
2381 (d) all the operations can be performed correctly at twice the width of
2382 NEW_TYPE, due to the nature of the average operation; and
2384 (e) users of the result of the right shift need only TARGET_PRECISION
2385 bits, where TARGET_PRECISION is no more than half of TYPE's
2386 precision.
2388 Under these circumstances, the only situation in which NEW_TYPE
2389 could be narrower than TARGET_PRECISION is if widened0, widened1
2390 and an addition result are all used more than once. Thus we can
2391 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
2392 as "free", whereas widening the result of the average instruction
2393 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
2394 therefore better not to go narrower than TARGET_PRECISION. */
2395 if (TYPE_PRECISION (new_type) < target_precision)
2396 new_type = build_nonstandard_integer_type (target_precision,
2397 TYPE_UNSIGNED (new_type));
2399 /* Check for target support. */
2400 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2401 if (!new_vectype)
2402 return NULL;
2404 bool fallback_p = false;
2406 if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2408 else if (TYPE_UNSIGNED (new_type)
2409 && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
2410 && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
2411 && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
2412 && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
2413 fallback_p = true;
2414 else
2415 return NULL;
2417 /* The IR requires a valid vector type for the cast result, even though
2418 it's likely to be discarded. */
2419 *type_out = get_vectype_for_scalar_type (vinfo, type);
2420 if (!*type_out)
2421 return NULL;
2423 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2424 tree new_ops[2];
2425 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2426 unprom, new_vectype);
2428 if (fallback_p)
2430 /* As a fallback, generate code for following sequence:
2432 shifted_op0 = new_ops[0] >> 1;
2433 shifted_op1 = new_ops[1] >> 1;
2434 sum_of_shifted = shifted_op0 + shifted_op1;
2435 unmasked_carry = new_ops[0] and/or new_ops[1];
2436 carry = unmasked_carry & 1;
2437 new_var = sum_of_shifted + carry;
2440 tree one_cst = build_one_cst (new_type);
2441 gassign *g;
2443 tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
2444 g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
2445 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2447 tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
2448 g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
2449 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2451 tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
2452 g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
2453 shifted_op0, shifted_op1);
2454 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2456 tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
2457 tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
2458 g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
2459 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2461 tree carry = vect_recog_temp_ssa_var (new_type, NULL);
2462 g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
2463 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2465 g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
2466 return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
2469 /* Generate the IFN_AVG* call. */
2470 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
2471 new_ops[1]);
2472 gimple_call_set_lhs (average_stmt, new_var);
2473 gimple_set_location (average_stmt, gimple_location (last_stmt));
2475 if (dump_enabled_p ())
2476 dump_printf_loc (MSG_NOTE, vect_location,
2477 "created pattern stmt: %G", (gimple *) average_stmt);
2479 return vect_convert_output (vinfo, last_stmt_info,
2480 type, average_stmt, new_vectype);
2483 /* Recognize cases in which the input to a cast is wider than its
2484 output, and the input is fed by a widening operation. Fold this
2485 by removing the unnecessary intermediate widening. E.g.:
2487 unsigned char a;
2488 unsigned int b = (unsigned int) a;
2489 unsigned short c = (unsigned short) b;
2493 unsigned short c = (unsigned short) a;
2495 Although this is rare in input IR, it is an expected side-effect
2496 of the over-widening pattern above.
2498 This is beneficial also for integer-to-float conversions, if the
2499 widened integer has more bits than the float, and if the unwidened
2500 input doesn't. */
2502 static gimple *
2503 vect_recog_cast_forwprop_pattern (vec_info *vinfo,
2504 stmt_vec_info last_stmt_info, tree *type_out)
2506 /* Check for a cast, including an integer-to-float conversion. */
2507 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2508 if (!last_stmt)
2509 return NULL;
2510 tree_code code = gimple_assign_rhs_code (last_stmt);
2511 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
2512 return NULL;
2514 /* Make sure that the rhs is a scalar with a natural bitsize. */
2515 tree lhs = gimple_assign_lhs (last_stmt);
2516 if (!lhs)
2517 return NULL;
2518 tree lhs_type = TREE_TYPE (lhs);
2519 scalar_mode lhs_mode;
2520 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
2521 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
2522 return NULL;
2524 /* Check for a narrowing operation (from a vector point of view). */
2525 tree rhs = gimple_assign_rhs1 (last_stmt);
2526 tree rhs_type = TREE_TYPE (rhs);
2527 if (!INTEGRAL_TYPE_P (rhs_type)
2528 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
2529 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
2530 return NULL;
2532 /* Try to find an unpromoted input. */
2533 vect_unpromoted_value unprom;
2534 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
2535 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
2536 return NULL;
2538 /* If the bits above RHS_TYPE matter, make sure that they're the
2539 same when extending from UNPROM as they are when extending from RHS. */
2540 if (!INTEGRAL_TYPE_P (lhs_type)
2541 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
2542 return NULL;
2544 /* We can get the same result by casting UNPROM directly, to avoid
2545 the unnecessary widening and narrowing. */
2546 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
2548 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2549 if (!*type_out)
2550 return NULL;
2552 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2553 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
2554 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2556 return pattern_stmt;
2559 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
2560 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
2562 static gimple *
2563 vect_recog_widen_shift_pattern (vec_info *vinfo,
2564 stmt_vec_info last_stmt_info, tree *type_out)
2566 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
2567 LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
2568 "vect_recog_widen_shift_pattern");
2571 /* Detect a rotate pattern wouldn't be otherwise vectorized:
2573 type a_t, b_t, c_t;
2575 S0 a_t = b_t r<< c_t;
2577 Input/Output:
2579 * STMT_VINFO: The stmt from which the pattern search begins,
2580 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
2581 with a sequence:
2583 S1 d_t = -c_t;
2584 S2 e_t = d_t & (B - 1);
2585 S3 f_t = b_t << c_t;
2586 S4 g_t = b_t >> e_t;
2587 S0 a_t = f_t | g_t;
2589 where B is element bitsize of type.
2591 Output:
2593 * TYPE_OUT: The type of the output of this pattern.
2595 * Return value: A new stmt that will be used to replace the rotate
2596 S0 stmt. */
2598 static gimple *
2599 vect_recog_rotate_pattern (vec_info *vinfo,
2600 stmt_vec_info stmt_vinfo, tree *type_out)
2602 gimple *last_stmt = stmt_vinfo->stmt;
2603 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
2604 gimple *pattern_stmt, *def_stmt;
2605 enum tree_code rhs_code;
2606 enum vect_def_type dt;
2607 optab optab1, optab2;
2608 edge ext_def = NULL;
2609 bool bswap16_p = false;
2611 if (is_gimple_assign (last_stmt))
2613 rhs_code = gimple_assign_rhs_code (last_stmt);
2614 switch (rhs_code)
2616 case LROTATE_EXPR:
2617 case RROTATE_EXPR:
2618 break;
2619 default:
2620 return NULL;
2623 lhs = gimple_assign_lhs (last_stmt);
2624 oprnd0 = gimple_assign_rhs1 (last_stmt);
2625 type = TREE_TYPE (oprnd0);
2626 oprnd1 = gimple_assign_rhs2 (last_stmt);
2628 else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
2630 /* __builtin_bswap16 (x) is another form of x r>> 8.
2631 The vectorizer has bswap support, but only if the argument isn't
2632 promoted. */
2633 lhs = gimple_call_lhs (last_stmt);
2634 oprnd0 = gimple_call_arg (last_stmt, 0);
2635 type = TREE_TYPE (oprnd0);
2636 if (!lhs
2637 || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
2638 || TYPE_PRECISION (type) <= 16
2639 || TREE_CODE (oprnd0) != SSA_NAME
2640 || BITS_PER_UNIT != 8)
2641 return NULL;
2643 stmt_vec_info def_stmt_info;
2644 if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
2645 return NULL;
2647 if (dt != vect_internal_def)
2648 return NULL;
2650 if (gimple_assign_cast_p (def_stmt))
2652 def = gimple_assign_rhs1 (def_stmt);
2653 if (INTEGRAL_TYPE_P (TREE_TYPE (def))
2654 && TYPE_PRECISION (TREE_TYPE (def)) == 16)
2655 oprnd0 = def;
2658 type = TREE_TYPE (lhs);
2659 vectype = get_vectype_for_scalar_type (vinfo, type);
2660 if (vectype == NULL_TREE)
2661 return NULL;
2663 if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
2665 /* The encoding uses one stepped pattern for each byte in the
2666 16-bit word. */
2667 vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
2668 for (unsigned i = 0; i < 3; ++i)
2669 for (unsigned j = 0; j < 2; ++j)
2670 elts.quick_push ((i + 1) * 2 - j - 1);
2672 vec_perm_indices indices (elts, 1,
2673 TYPE_VECTOR_SUBPARTS (char_vectype));
2674 machine_mode vmode = TYPE_MODE (char_vectype);
2675 if (can_vec_perm_const_p (vmode, vmode, indices))
2677 /* vectorizable_bswap can handle the __builtin_bswap16 if we
2678 undo the argument promotion. */
2679 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2681 def = vect_recog_temp_ssa_var (type, NULL);
2682 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2683 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2684 oprnd0 = def;
2687 /* Pattern detected. */
2688 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2690 *type_out = vectype;
2692 /* Pattern supported. Create a stmt to be used to replace the
2693 pattern, with the unpromoted argument. */
2694 var = vect_recog_temp_ssa_var (type, NULL);
2695 pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
2696 1, oprnd0);
2697 gimple_call_set_lhs (pattern_stmt, var);
2698 gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
2699 gimple_call_fntype (last_stmt));
2700 return pattern_stmt;
2704 oprnd1 = build_int_cst (integer_type_node, 8);
2705 rhs_code = LROTATE_EXPR;
2706 bswap16_p = true;
2708 else
2709 return NULL;
2711 if (TREE_CODE (oprnd0) != SSA_NAME
2712 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
2713 || !INTEGRAL_TYPE_P (type))
2714 return NULL;
2716 stmt_vec_info def_stmt_info;
2717 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
2718 return NULL;
2720 if (dt != vect_internal_def
2721 && dt != vect_constant_def
2722 && dt != vect_external_def)
2723 return NULL;
2725 vectype = get_vectype_for_scalar_type (vinfo, type);
2726 if (vectype == NULL_TREE)
2727 return NULL;
2729 /* If vector/vector or vector/scalar rotate is supported by the target,
2730 don't do anything here. */
2731 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2732 if (optab1
2733 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2735 use_rotate:
2736 if (bswap16_p)
2738 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2740 def = vect_recog_temp_ssa_var (type, NULL);
2741 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2742 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2743 oprnd0 = def;
2746 /* Pattern detected. */
2747 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2749 *type_out = vectype;
2751 /* Pattern supported. Create a stmt to be used to replace the
2752 pattern. */
2753 var = vect_recog_temp_ssa_var (type, NULL);
2754 pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
2755 oprnd1);
2756 return pattern_stmt;
2758 return NULL;
2761 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
2763 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2764 if (optab2
2765 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2766 goto use_rotate;
2769 tree utype = unsigned_type_for (type);
2770 tree uvectype = get_vectype_for_scalar_type (vinfo, utype);
2771 if (!uvectype)
2772 return NULL;
2774 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2775 don't do anything here either. */
2776 optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_vector);
2777 optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_vector);
2778 if (!optab1
2779 || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
2780 || !optab2
2781 || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
2783 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2784 return NULL;
2785 optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_scalar);
2786 optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_scalar);
2787 if (!optab1
2788 || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
2789 || !optab2
2790 || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
2791 return NULL;
2794 *type_out = vectype;
2796 if (!useless_type_conversion_p (utype, TREE_TYPE (oprnd0)))
2798 def = vect_recog_temp_ssa_var (utype, NULL);
2799 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2800 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2801 oprnd0 = def;
2804 if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
2805 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2807 def = NULL_TREE;
2808 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (utype);
2809 if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2810 def = oprnd1;
2811 else if (def_stmt && gimple_assign_cast_p (def_stmt))
2813 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2814 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2815 && TYPE_PRECISION (TREE_TYPE (rhs1))
2816 == TYPE_PRECISION (type))
2817 def = rhs1;
2820 if (def == NULL_TREE)
2822 def = vect_recog_temp_ssa_var (utype, NULL);
2823 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2824 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2826 stype = TREE_TYPE (def);
2828 if (TREE_CODE (def) == INTEGER_CST)
2830 if (!tree_fits_uhwi_p (def)
2831 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2832 || integer_zerop (def))
2833 return NULL;
2834 def2 = build_int_cst (stype,
2835 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2837 else
2839 tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
2841 if (vecstype == NULL_TREE)
2842 return NULL;
2843 def2 = vect_recog_temp_ssa_var (stype, NULL);
2844 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2845 if (ext_def)
2847 basic_block new_bb
2848 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2849 gcc_assert (!new_bb);
2851 else
2852 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2854 def2 = vect_recog_temp_ssa_var (stype, NULL);
2855 tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
2856 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2857 gimple_assign_lhs (def_stmt), mask);
2858 if (ext_def)
2860 basic_block new_bb
2861 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2862 gcc_assert (!new_bb);
2864 else
2865 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2868 var1 = vect_recog_temp_ssa_var (utype, NULL);
2869 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2870 ? LSHIFT_EXPR : RSHIFT_EXPR,
2871 oprnd0, def);
2872 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2874 var2 = vect_recog_temp_ssa_var (utype, NULL);
2875 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2876 ? RSHIFT_EXPR : LSHIFT_EXPR,
2877 oprnd0, def2);
2878 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2880 /* Pattern detected. */
2881 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2883 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2884 var = vect_recog_temp_ssa_var (utype, NULL);
2885 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2887 if (!useless_type_conversion_p (type, utype))
2889 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
2890 tree result = vect_recog_temp_ssa_var (type, NULL);
2891 pattern_stmt = gimple_build_assign (result, NOP_EXPR, var);
2893 return pattern_stmt;
2896 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2897 vectorized:
2899 type a_t;
2900 TYPE b_T, res_T;
2902 S1 a_t = ;
2903 S2 b_T = ;
2904 S3 res_T = b_T op a_t;
2906 where type 'TYPE' is a type with different size than 'type',
2907 and op is <<, >> or rotate.
2909 Also detect cases:
2911 type a_t;
2912 TYPE b_T, c_T, res_T;
2914 S0 c_T = ;
2915 S1 a_t = (type) c_T;
2916 S2 b_T = ;
2917 S3 res_T = b_T op a_t;
2919 Input/Output:
2921 * STMT_VINFO: The stmt from which the pattern search begins,
2922 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2923 with a shift/rotate which has same type on both operands, in the
2924 second case just b_T op c_T, in the first case with added cast
2925 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2927 Output:
2929 * TYPE_OUT: The type of the output of this pattern.
2931 * Return value: A new stmt that will be used to replace the shift/rotate
2932 S3 stmt. */
2934 static gimple *
2935 vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
2936 stmt_vec_info stmt_vinfo,
2937 tree *type_out)
2939 gimple *last_stmt = stmt_vinfo->stmt;
2940 tree oprnd0, oprnd1, lhs, var;
2941 gimple *pattern_stmt;
2942 enum tree_code rhs_code;
2944 if (!is_gimple_assign (last_stmt))
2945 return NULL;
2947 rhs_code = gimple_assign_rhs_code (last_stmt);
2948 switch (rhs_code)
2950 case LSHIFT_EXPR:
2951 case RSHIFT_EXPR:
2952 case LROTATE_EXPR:
2953 case RROTATE_EXPR:
2954 break;
2955 default:
2956 return NULL;
2959 lhs = gimple_assign_lhs (last_stmt);
2960 oprnd0 = gimple_assign_rhs1 (last_stmt);
2961 oprnd1 = gimple_assign_rhs2 (last_stmt);
2962 if (TREE_CODE (oprnd0) != SSA_NAME
2963 || TREE_CODE (oprnd1) != SSA_NAME
2964 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2965 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2966 || TYPE_PRECISION (TREE_TYPE (lhs))
2967 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2968 return NULL;
2970 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2971 if (!def_vinfo)
2972 return NULL;
2974 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
2975 if (*type_out == NULL_TREE)
2976 return NULL;
2978 tree def = NULL_TREE;
2979 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2980 if (def_stmt && gimple_assign_cast_p (def_stmt))
2982 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2983 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2984 && TYPE_PRECISION (TREE_TYPE (rhs1))
2985 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2987 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2988 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2989 def = rhs1;
2990 else
2992 tree mask
2993 = build_low_bits_mask (TREE_TYPE (rhs1),
2994 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2995 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2996 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2997 tree vecstype = get_vectype_for_scalar_type (vinfo,
2998 TREE_TYPE (rhs1));
2999 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
3004 if (def == NULL_TREE)
3006 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
3007 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
3008 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3011 /* Pattern detected. */
3012 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
3014 /* Pattern supported. Create a stmt to be used to replace the pattern. */
3015 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
3016 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
3018 return pattern_stmt;
3021 /* Return true iff the target has a vector optab implementing the operation
3022 CODE on type VECTYPE. */
3024 static bool
3025 target_has_vecop_for_code (tree_code code, tree vectype)
3027 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
3028 return voptab
3029 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
3032 /* Verify that the target has optabs of VECTYPE to perform all the steps
3033 needed by the multiplication-by-immediate synthesis algorithm described by
3034 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
3035 present. Return true iff the target supports all the steps. */
3037 static bool
3038 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
3039 tree vectype, bool synth_shift_p)
3041 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
3042 return false;
3044 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
3045 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
3047 if (var == negate_variant
3048 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
3049 return false;
3051 /* If we must synthesize shifts with additions make sure that vector
3052 addition is available. */
3053 if ((var == add_variant || synth_shift_p) && !supports_vplus)
3054 return false;
3056 for (int i = 1; i < alg->ops; i++)
3058 switch (alg->op[i])
3060 case alg_shift:
3061 break;
3062 case alg_add_t_m2:
3063 case alg_add_t2_m:
3064 case alg_add_factor:
3065 if (!supports_vplus)
3066 return false;
3067 break;
3068 case alg_sub_t_m2:
3069 case alg_sub_t2_m:
3070 case alg_sub_factor:
3071 if (!supports_vminus)
3072 return false;
3073 break;
3074 case alg_unknown:
3075 case alg_m:
3076 case alg_zero:
3077 case alg_impossible:
3078 return false;
3079 default:
3080 gcc_unreachable ();
3084 return true;
3087 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
3088 putting the final result in DEST. Append all statements but the last into
3089 VINFO. Return the last statement. */
3091 static gimple *
3092 synth_lshift_by_additions (vec_info *vinfo,
3093 tree dest, tree op, HOST_WIDE_INT amnt,
3094 stmt_vec_info stmt_info)
3096 HOST_WIDE_INT i;
3097 tree itype = TREE_TYPE (op);
3098 tree prev_res = op;
3099 gcc_assert (amnt >= 0);
3100 for (i = 0; i < amnt; i++)
3102 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
3103 : dest;
3104 gimple *stmt
3105 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
3106 prev_res = tmp_var;
3107 if (i < amnt - 1)
3108 append_pattern_def_seq (vinfo, stmt_info, stmt);
3109 else
3110 return stmt;
3112 gcc_unreachable ();
3113 return NULL;
3116 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
3117 CODE to operands OP1 and OP2, creating a new temporary SSA var in
3118 the process if necessary. Append the resulting assignment statements
3119 to the sequence in STMT_VINFO. Return the SSA variable that holds the
3120 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
3121 left shifts using additions. */
3123 static tree
3124 apply_binop_and_append_stmt (vec_info *vinfo,
3125 tree_code code, tree op1, tree op2,
3126 stmt_vec_info stmt_vinfo, bool synth_shift_p)
3128 if (integer_zerop (op2)
3129 && (code == LSHIFT_EXPR
3130 || code == PLUS_EXPR))
3132 gcc_assert (TREE_CODE (op1) == SSA_NAME);
3133 return op1;
3136 gimple *stmt;
3137 tree itype = TREE_TYPE (op1);
3138 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
3140 if (code == LSHIFT_EXPR
3141 && synth_shift_p)
3143 stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
3144 TREE_INT_CST_LOW (op2), stmt_vinfo);
3145 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3146 return tmp_var;
3149 stmt = gimple_build_assign (tmp_var, code, op1, op2);
3150 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3151 return tmp_var;
3154 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
3155 and simple arithmetic operations to be vectorized. Record the statements
3156 produced in STMT_VINFO and return the last statement in the sequence or
3157 NULL if it's not possible to synthesize such a multiplication.
3158 This function mirrors the behavior of expand_mult_const in expmed.cc but
3159 works on tree-ssa form. */
3161 static gimple *
3162 vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
3163 stmt_vec_info stmt_vinfo)
3165 tree itype = TREE_TYPE (op);
3166 machine_mode mode = TYPE_MODE (itype);
3167 struct algorithm alg;
3168 mult_variant variant;
3169 if (!tree_fits_shwi_p (val))
3170 return NULL;
3172 /* Multiplication synthesis by shifts, adds and subs can introduce
3173 signed overflow where the original operation didn't. Perform the
3174 operations on an unsigned type and cast back to avoid this.
3175 In the future we may want to relax this for synthesis algorithms
3176 that we can prove do not cause unexpected overflow. */
3177 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
3179 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
3180 tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
3181 if (!vectype)
3182 return NULL;
3184 /* Targets that don't support vector shifts but support vector additions
3185 can synthesize shifts that way. */
3186 bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
3188 HOST_WIDE_INT hwval = tree_to_shwi (val);
3189 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
3190 The vectorizer's benefit analysis will decide whether it's beneficial
3191 to do this. */
3192 bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
3193 ? TYPE_MODE (vectype) : mode,
3194 hwval, &alg, &variant, MAX_COST);
3195 if (!possible)
3196 return NULL;
3198 if (!target_supports_mult_synth_alg (&alg, variant, vectype, synth_shift_p))
3199 return NULL;
3201 tree accumulator;
3203 /* Clear out the sequence of statements so we can populate it below. */
3204 gimple *stmt = NULL;
3206 if (cast_to_unsigned_p)
3208 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
3209 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
3210 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3211 op = tmp_op;
3214 if (alg.op[0] == alg_zero)
3215 accumulator = build_int_cst (multtype, 0);
3216 else
3217 accumulator = op;
3219 bool needs_fixup = (variant == negate_variant)
3220 || (variant == add_variant);
3222 for (int i = 1; i < alg.ops; i++)
3224 tree shft_log = build_int_cst (multtype, alg.log[i]);
3225 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3226 tree tmp_var = NULL_TREE;
3228 switch (alg.op[i])
3230 case alg_shift:
3231 if (synth_shift_p)
3232 stmt
3233 = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
3234 alg.log[i], stmt_vinfo);
3235 else
3236 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
3237 shft_log);
3238 break;
3239 case alg_add_t_m2:
3240 tmp_var
3241 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
3242 stmt_vinfo, synth_shift_p);
3243 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
3244 tmp_var);
3245 break;
3246 case alg_sub_t_m2:
3247 tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
3248 shft_log, stmt_vinfo,
3249 synth_shift_p);
3250 /* In some algorithms the first step involves zeroing the
3251 accumulator. If subtracting from such an accumulator
3252 just emit the negation directly. */
3253 if (integer_zerop (accumulator))
3254 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
3255 else
3256 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
3257 tmp_var);
3258 break;
3259 case alg_add_t2_m:
3260 tmp_var
3261 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3262 shft_log, stmt_vinfo, synth_shift_p);
3263 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
3264 break;
3265 case alg_sub_t2_m:
3266 tmp_var
3267 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3268 shft_log, stmt_vinfo, synth_shift_p);
3269 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
3270 break;
3271 case alg_add_factor:
3272 tmp_var
3273 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3274 shft_log, stmt_vinfo, synth_shift_p);
3275 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
3276 tmp_var);
3277 break;
3278 case alg_sub_factor:
3279 tmp_var
3280 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3281 shft_log, stmt_vinfo, synth_shift_p);
3282 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
3283 accumulator);
3284 break;
3285 default:
3286 gcc_unreachable ();
3288 /* We don't want to append the last stmt in the sequence to stmt_vinfo
3289 but rather return it directly. */
3291 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
3292 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3293 accumulator = accum_tmp;
3295 if (variant == negate_variant)
3297 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3298 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
3299 accumulator = accum_tmp;
3300 if (cast_to_unsigned_p)
3301 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3303 else if (variant == add_variant)
3305 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3306 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
3307 accumulator = accum_tmp;
3308 if (cast_to_unsigned_p)
3309 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3311 /* Move back to a signed if needed. */
3312 if (cast_to_unsigned_p)
3314 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
3315 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
3318 return stmt;
3321 /* Detect multiplication by constant and convert it into a sequence of
3322 shifts and additions, subtractions, negations. We reuse the
3323 choose_mult_variant algorithms from expmed.cc
3325 Input/Output:
3327 STMT_VINFO: The stmt from which the pattern search begins,
3328 i.e. the mult stmt.
3330 Output:
3332 * TYPE_OUT: The type of the output of this pattern.
3334 * Return value: A new stmt that will be used to replace
3335 the multiplication. */
3337 static gimple *
3338 vect_recog_mult_pattern (vec_info *vinfo,
3339 stmt_vec_info stmt_vinfo, tree *type_out)
3341 gimple *last_stmt = stmt_vinfo->stmt;
3342 tree oprnd0, oprnd1, vectype, itype;
3343 gimple *pattern_stmt;
3345 if (!is_gimple_assign (last_stmt))
3346 return NULL;
3348 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
3349 return NULL;
3351 oprnd0 = gimple_assign_rhs1 (last_stmt);
3352 oprnd1 = gimple_assign_rhs2 (last_stmt);
3353 itype = TREE_TYPE (oprnd0);
3355 if (TREE_CODE (oprnd0) != SSA_NAME
3356 || TREE_CODE (oprnd1) != INTEGER_CST
3357 || !INTEGRAL_TYPE_P (itype)
3358 || !type_has_mode_precision_p (itype))
3359 return NULL;
3361 vectype = get_vectype_for_scalar_type (vinfo, itype);
3362 if (vectype == NULL_TREE)
3363 return NULL;
3365 /* If the target can handle vectorized multiplication natively,
3366 don't attempt to optimize this. */
3367 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
3368 if (mul_optab != unknown_optab)
3370 machine_mode vec_mode = TYPE_MODE (vectype);
3371 int icode = (int) optab_handler (mul_optab, vec_mode);
3372 if (icode != CODE_FOR_nothing)
3373 return NULL;
3376 pattern_stmt = vect_synth_mult_by_constant (vinfo,
3377 oprnd0, oprnd1, stmt_vinfo);
3378 if (!pattern_stmt)
3379 return NULL;
3381 /* Pattern detected. */
3382 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
3384 *type_out = vectype;
3386 return pattern_stmt;
3389 /* Detect a signed division by a constant that wouldn't be
3390 otherwise vectorized:
3392 type a_t, b_t;
3394 S1 a_t = b_t / N;
3396 where type 'type' is an integral type and N is a constant.
3398 Similarly handle modulo by a constant:
3400 S4 a_t = b_t % N;
3402 Input/Output:
3404 * STMT_VINFO: The stmt from which the pattern search begins,
3405 i.e. the division stmt. S1 is replaced by if N is a power
3406 of two constant and type is signed:
3407 S3 y_t = b_t < 0 ? N - 1 : 0;
3408 S2 x_t = b_t + y_t;
3409 S1' a_t = x_t >> log2 (N);
3411 S4 is replaced if N is a power of two constant and
3412 type is signed by (where *_T temporaries have unsigned type):
3413 S9 y_T = b_t < 0 ? -1U : 0U;
3414 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
3415 S7 z_t = (type) z_T;
3416 S6 w_t = b_t + z_t;
3417 S5 x_t = w_t & (N - 1);
3418 S4' a_t = x_t - z_t;
3420 Output:
3422 * TYPE_OUT: The type of the output of this pattern.
3424 * Return value: A new stmt that will be used to replace the division
3425 S1 or modulo S4 stmt. */
3427 static gimple *
3428 vect_recog_divmod_pattern (vec_info *vinfo,
3429 stmt_vec_info stmt_vinfo, tree *type_out)
3431 gimple *last_stmt = stmt_vinfo->stmt;
3432 tree oprnd0, oprnd1, vectype, itype, cond;
3433 gimple *pattern_stmt, *def_stmt;
3434 enum tree_code rhs_code;
3435 optab optab;
3436 tree q;
3437 int dummy_int, prec;
3439 if (!is_gimple_assign (last_stmt))
3440 return NULL;
3442 rhs_code = gimple_assign_rhs_code (last_stmt);
3443 switch (rhs_code)
3445 case TRUNC_DIV_EXPR:
3446 case EXACT_DIV_EXPR:
3447 case TRUNC_MOD_EXPR:
3448 break;
3449 default:
3450 return NULL;
3453 oprnd0 = gimple_assign_rhs1 (last_stmt);
3454 oprnd1 = gimple_assign_rhs2 (last_stmt);
3455 itype = TREE_TYPE (oprnd0);
3456 if (TREE_CODE (oprnd0) != SSA_NAME
3457 || TREE_CODE (oprnd1) != INTEGER_CST
3458 || TREE_CODE (itype) != INTEGER_TYPE
3459 || !type_has_mode_precision_p (itype))
3460 return NULL;
3462 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
3463 vectype = get_vectype_for_scalar_type (vinfo, itype);
3464 if (vectype == NULL_TREE)
3465 return NULL;
3467 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
3469 /* If the target can handle vectorized division or modulo natively,
3470 don't attempt to optimize this, since native division is likely
3471 to give smaller code. */
3472 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
3473 if (optab != unknown_optab)
3475 machine_mode vec_mode = TYPE_MODE (vectype);
3476 int icode = (int) optab_handler (optab, vec_mode);
3477 if (icode != CODE_FOR_nothing)
3478 return NULL;
3482 prec = TYPE_PRECISION (itype);
3483 if (integer_pow2p (oprnd1))
3485 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
3486 return NULL;
3488 /* Pattern detected. */
3489 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3491 *type_out = vectype;
3493 /* Check if the target supports this internal function. */
3494 internal_fn ifn = IFN_DIV_POW2;
3495 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
3497 tree shift = build_int_cst (itype, tree_log2 (oprnd1));
3499 tree var_div = vect_recog_temp_ssa_var (itype, NULL);
3500 gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
3501 gimple_call_set_lhs (div_stmt, var_div);
3503 if (rhs_code == TRUNC_MOD_EXPR)
3505 append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
3506 def_stmt
3507 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3508 LSHIFT_EXPR, var_div, shift);
3509 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3510 pattern_stmt
3511 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3512 MINUS_EXPR, oprnd0,
3513 gimple_assign_lhs (def_stmt));
3515 else
3516 pattern_stmt = div_stmt;
3517 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3519 return pattern_stmt;
3522 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
3523 build_int_cst (itype, 0));
3524 if (rhs_code == TRUNC_DIV_EXPR
3525 || rhs_code == EXACT_DIV_EXPR)
3527 tree var = vect_recog_temp_ssa_var (itype, NULL);
3528 tree shift;
3529 def_stmt
3530 = gimple_build_assign (var, COND_EXPR, cond,
3531 fold_build2 (MINUS_EXPR, itype, oprnd1,
3532 build_int_cst (itype, 1)),
3533 build_int_cst (itype, 0));
3534 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3535 var = vect_recog_temp_ssa_var (itype, NULL);
3536 def_stmt
3537 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
3538 gimple_assign_lhs (def_stmt));
3539 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3541 shift = build_int_cst (itype, tree_log2 (oprnd1));
3542 pattern_stmt
3543 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3544 RSHIFT_EXPR, var, shift);
3546 else
3548 tree signmask;
3549 if (compare_tree_int (oprnd1, 2) == 0)
3551 signmask = vect_recog_temp_ssa_var (itype, NULL);
3552 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
3553 build_int_cst (itype, 1),
3554 build_int_cst (itype, 0));
3555 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3557 else
3559 tree utype
3560 = build_nonstandard_integer_type (prec, 1);
3561 tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
3562 tree shift
3563 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
3564 - tree_log2 (oprnd1));
3565 tree var = vect_recog_temp_ssa_var (utype, NULL);
3567 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
3568 build_int_cst (utype, -1),
3569 build_int_cst (utype, 0));
3570 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3571 var = vect_recog_temp_ssa_var (utype, NULL);
3572 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
3573 gimple_assign_lhs (def_stmt),
3574 shift);
3575 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3576 signmask = vect_recog_temp_ssa_var (itype, NULL);
3577 def_stmt
3578 = gimple_build_assign (signmask, NOP_EXPR, var);
3579 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3581 def_stmt
3582 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3583 PLUS_EXPR, oprnd0, signmask);
3584 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3585 def_stmt
3586 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3587 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
3588 fold_build2 (MINUS_EXPR, itype, oprnd1,
3589 build_int_cst (itype, 1)));
3590 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3592 pattern_stmt
3593 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3594 MINUS_EXPR, gimple_assign_lhs (def_stmt),
3595 signmask);
3598 return pattern_stmt;
3601 if (prec > HOST_BITS_PER_WIDE_INT
3602 || integer_zerop (oprnd1))
3603 return NULL;
3605 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
3606 return NULL;
3608 if (TYPE_UNSIGNED (itype))
3610 unsigned HOST_WIDE_INT mh, ml;
3611 int pre_shift, post_shift;
3612 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
3613 & GET_MODE_MASK (itype_mode));
3614 tree t1, t2, t3, t4;
3616 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
3617 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
3618 return NULL;
3620 /* Find a suitable multiplier and right shift count
3621 instead of multiplying with D. */
3622 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
3624 /* If the suggested multiplier is more than SIZE bits, we can do better
3625 for even divisors, using an initial right shift. */
3626 if (mh != 0 && (d & 1) == 0)
3628 pre_shift = ctz_or_zero (d);
3629 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
3630 &ml, &post_shift, &dummy_int);
3631 gcc_assert (!mh);
3633 else
3634 pre_shift = 0;
3636 if (mh != 0)
3638 if (post_shift - 1 >= prec)
3639 return NULL;
3641 /* t1 = oprnd0 h* ml;
3642 t2 = oprnd0 - t1;
3643 t3 = t2 >> 1;
3644 t4 = t1 + t3;
3645 q = t4 >> (post_shift - 1); */
3646 t1 = vect_recog_temp_ssa_var (itype, NULL);
3647 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3648 build_int_cst (itype, ml));
3649 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3651 t2 = vect_recog_temp_ssa_var (itype, NULL);
3652 def_stmt
3653 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
3654 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3656 t3 = vect_recog_temp_ssa_var (itype, NULL);
3657 def_stmt
3658 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
3659 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3661 t4 = vect_recog_temp_ssa_var (itype, NULL);
3662 def_stmt
3663 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
3665 if (post_shift != 1)
3667 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3669 q = vect_recog_temp_ssa_var (itype, NULL);
3670 pattern_stmt
3671 = gimple_build_assign (q, RSHIFT_EXPR, t4,
3672 build_int_cst (itype, post_shift - 1));
3674 else
3676 q = t4;
3677 pattern_stmt = def_stmt;
3680 else
3682 if (pre_shift >= prec || post_shift >= prec)
3683 return NULL;
3685 /* t1 = oprnd0 >> pre_shift;
3686 t2 = t1 h* ml;
3687 q = t2 >> post_shift; */
3688 if (pre_shift)
3690 t1 = vect_recog_temp_ssa_var (itype, NULL);
3691 def_stmt
3692 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
3693 build_int_cst (NULL, pre_shift));
3694 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3696 else
3697 t1 = oprnd0;
3699 t2 = vect_recog_temp_ssa_var (itype, NULL);
3700 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
3701 build_int_cst (itype, ml));
3703 if (post_shift)
3705 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3707 q = vect_recog_temp_ssa_var (itype, NULL);
3708 def_stmt
3709 = gimple_build_assign (q, RSHIFT_EXPR, t2,
3710 build_int_cst (itype, post_shift));
3712 else
3713 q = t2;
3715 pattern_stmt = def_stmt;
3718 else
3720 unsigned HOST_WIDE_INT ml;
3721 int post_shift;
3722 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
3723 unsigned HOST_WIDE_INT abs_d;
3724 bool add = false;
3725 tree t1, t2, t3, t4;
3727 /* Give up for -1. */
3728 if (d == -1)
3729 return NULL;
3731 /* Since d might be INT_MIN, we have to cast to
3732 unsigned HOST_WIDE_INT before negating to avoid
3733 undefined signed overflow. */
3734 abs_d = (d >= 0
3735 ? (unsigned HOST_WIDE_INT) d
3736 : - (unsigned HOST_WIDE_INT) d);
3738 /* n rem d = n rem -d */
3739 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
3741 d = abs_d;
3742 oprnd1 = build_int_cst (itype, abs_d);
3744 if (HOST_BITS_PER_WIDE_INT >= prec
3745 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
3746 /* This case is not handled correctly below. */
3747 return NULL;
3749 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
3750 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
3752 add = true;
3753 ml |= HOST_WIDE_INT_M1U << (prec - 1);
3755 if (post_shift >= prec)
3756 return NULL;
3758 /* t1 = oprnd0 h* ml; */
3759 t1 = vect_recog_temp_ssa_var (itype, NULL);
3760 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3761 build_int_cst (itype, ml));
3763 if (add)
3765 /* t2 = t1 + oprnd0; */
3766 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3767 t2 = vect_recog_temp_ssa_var (itype, NULL);
3768 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
3770 else
3771 t2 = t1;
3773 if (post_shift)
3775 /* t3 = t2 >> post_shift; */
3776 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3777 t3 = vect_recog_temp_ssa_var (itype, NULL);
3778 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
3779 build_int_cst (itype, post_shift));
3781 else
3782 t3 = t2;
3784 int msb = 1;
3785 value_range r;
3786 get_range_query (cfun)->range_of_expr (r, oprnd0);
3787 if (r.kind () == VR_RANGE)
3789 if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
3790 msb = 0;
3791 else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
3792 msb = -1;
3795 if (msb == 0 && d >= 0)
3797 /* q = t3; */
3798 q = t3;
3799 pattern_stmt = def_stmt;
3801 else
3803 /* t4 = oprnd0 >> (prec - 1);
3804 or if we know from VRP that oprnd0 >= 0
3805 t4 = 0;
3806 or if we know from VRP that oprnd0 < 0
3807 t4 = -1; */
3808 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3809 t4 = vect_recog_temp_ssa_var (itype, NULL);
3810 if (msb != 1)
3811 def_stmt = gimple_build_assign (t4, INTEGER_CST,
3812 build_int_cst (itype, msb));
3813 else
3814 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3815 build_int_cst (itype, prec - 1));
3816 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3818 /* q = t3 - t4; or q = t4 - t3; */
3819 q = vect_recog_temp_ssa_var (itype, NULL);
3820 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3821 d < 0 ? t3 : t4);
3825 if (rhs_code == TRUNC_MOD_EXPR)
3827 tree r, t1;
3829 /* We divided. Now finish by:
3830 t1 = q * oprnd1;
3831 r = oprnd0 - t1; */
3832 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
3834 t1 = vect_recog_temp_ssa_var (itype, NULL);
3835 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3836 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3838 r = vect_recog_temp_ssa_var (itype, NULL);
3839 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3842 /* Pattern detected. */
3843 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3845 *type_out = vectype;
3846 return pattern_stmt;
3849 /* Function vect_recog_mixed_size_cond_pattern
3851 Try to find the following pattern:
3853 type x_t, y_t;
3854 TYPE a_T, b_T, c_T;
3855 loop:
3856 S1 a_T = x_t CMP y_t ? b_T : c_T;
3858 where type 'TYPE' is an integral type which has different size
3859 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3860 than 'type', the constants need to fit into an integer type
3861 with the same width as 'type') or results of conversion from 'type'.
3863 Input:
3865 * STMT_VINFO: The stmt from which the pattern search begins.
3867 Output:
3869 * TYPE_OUT: The type of the output of this pattern.
3871 * Return value: A new stmt that will be used to replace the pattern.
3872 Additionally a def_stmt is added.
3874 a_it = x_t CMP y_t ? b_it : c_it;
3875 a_T = (TYPE) a_it; */
3877 static gimple *
3878 vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
3879 stmt_vec_info stmt_vinfo, tree *type_out)
3881 gimple *last_stmt = stmt_vinfo->stmt;
3882 tree cond_expr, then_clause, else_clause;
3883 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3884 gimple *pattern_stmt, *def_stmt;
3885 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3886 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3887 bool promotion;
3888 tree comp_scalar_type;
3890 if (!is_gimple_assign (last_stmt)
3891 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3892 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3893 return NULL;
3895 cond_expr = gimple_assign_rhs1 (last_stmt);
3896 then_clause = gimple_assign_rhs2 (last_stmt);
3897 else_clause = gimple_assign_rhs3 (last_stmt);
3899 if (!COMPARISON_CLASS_P (cond_expr))
3900 return NULL;
3902 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3903 comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
3904 if (comp_vectype == NULL_TREE)
3905 return NULL;
3907 type = TREE_TYPE (gimple_assign_lhs (last_stmt));
3908 if (types_compatible_p (type, comp_scalar_type)
3909 || ((TREE_CODE (then_clause) != INTEGER_CST
3910 || TREE_CODE (else_clause) != INTEGER_CST)
3911 && !INTEGRAL_TYPE_P (comp_scalar_type))
3912 || !INTEGRAL_TYPE_P (type))
3913 return NULL;
3915 if ((TREE_CODE (then_clause) != INTEGER_CST
3916 && !type_conversion_p (vinfo, then_clause, false,
3917 &orig_type0, &def_stmt0, &promotion))
3918 || (TREE_CODE (else_clause) != INTEGER_CST
3919 && !type_conversion_p (vinfo, else_clause, false,
3920 &orig_type1, &def_stmt1, &promotion)))
3921 return NULL;
3923 if (orig_type0 && orig_type1
3924 && !types_compatible_p (orig_type0, orig_type1))
3925 return NULL;
3927 if (orig_type0)
3929 if (!types_compatible_p (orig_type0, comp_scalar_type))
3930 return NULL;
3931 then_clause = gimple_assign_rhs1 (def_stmt0);
3932 itype = orig_type0;
3935 if (orig_type1)
3937 if (!types_compatible_p (orig_type1, comp_scalar_type))
3938 return NULL;
3939 else_clause = gimple_assign_rhs1 (def_stmt1);
3940 itype = orig_type1;
3944 HOST_WIDE_INT cmp_mode_size
3945 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3947 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3948 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3949 return NULL;
3951 vectype = get_vectype_for_scalar_type (vinfo, type);
3952 if (vectype == NULL_TREE)
3953 return NULL;
3955 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3956 return NULL;
3958 if (itype == NULL_TREE)
3959 itype = build_nonstandard_integer_type (cmp_mode_size,
3960 TYPE_UNSIGNED (type));
3962 if (itype == NULL_TREE
3963 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3964 return NULL;
3966 vecitype = get_vectype_for_scalar_type (vinfo, itype);
3967 if (vecitype == NULL_TREE)
3968 return NULL;
3970 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3971 return NULL;
3973 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3975 if ((TREE_CODE (then_clause) == INTEGER_CST
3976 && !int_fits_type_p (then_clause, itype))
3977 || (TREE_CODE (else_clause) == INTEGER_CST
3978 && !int_fits_type_p (else_clause, itype)))
3979 return NULL;
3982 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3983 COND_EXPR, unshare_expr (cond_expr),
3984 fold_convert (itype, then_clause),
3985 fold_convert (itype, else_clause));
3986 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3987 NOP_EXPR, gimple_assign_lhs (def_stmt));
3989 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
3990 *type_out = vectype;
3992 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3994 return pattern_stmt;
3998 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3999 true if bool VAR can and should be optimized that way. Assume it shouldn't
4000 in case it's a result of a comparison which can be directly vectorized into
4001 a vector comparison. Fills in STMTS with all stmts visited during the
4002 walk. */
4004 static bool
4005 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
4007 tree rhs1;
4008 enum tree_code rhs_code;
4010 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
4011 if (!def_stmt_info)
4012 return false;
4014 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
4015 if (!def_stmt)
4016 return false;
4018 if (stmts.contains (def_stmt))
4019 return true;
4021 rhs1 = gimple_assign_rhs1 (def_stmt);
4022 rhs_code = gimple_assign_rhs_code (def_stmt);
4023 switch (rhs_code)
4025 case SSA_NAME:
4026 if (! check_bool_pattern (rhs1, vinfo, stmts))
4027 return false;
4028 break;
4030 CASE_CONVERT:
4031 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
4032 return false;
4033 if (! check_bool_pattern (rhs1, vinfo, stmts))
4034 return false;
4035 break;
4037 case BIT_NOT_EXPR:
4038 if (! check_bool_pattern (rhs1, vinfo, stmts))
4039 return false;
4040 break;
4042 case BIT_AND_EXPR:
4043 case BIT_IOR_EXPR:
4044 case BIT_XOR_EXPR:
4045 if (! check_bool_pattern (rhs1, vinfo, stmts)
4046 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
4047 return false;
4048 break;
4050 default:
4051 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
4053 tree vecitype, comp_vectype;
4055 /* If the comparison can throw, then is_gimple_condexpr will be
4056 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
4057 if (stmt_could_throw_p (cfun, def_stmt))
4058 return false;
4060 comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
4061 if (comp_vectype == NULL_TREE)
4062 return false;
4064 tree mask_type = get_mask_type_for_scalar_type (vinfo,
4065 TREE_TYPE (rhs1));
4066 if (mask_type
4067 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
4068 return false;
4070 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
4072 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
4073 tree itype
4074 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
4075 vecitype = get_vectype_for_scalar_type (vinfo, itype);
4076 if (vecitype == NULL_TREE)
4077 return false;
4079 else
4080 vecitype = comp_vectype;
4081 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
4082 return false;
4084 else
4085 return false;
4086 break;
4089 bool res = stmts.add (def_stmt);
4090 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
4091 gcc_assert (!res);
4093 return true;
4097 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
4098 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
4099 pattern sequence. */
4101 static tree
4102 adjust_bool_pattern_cast (vec_info *vinfo,
4103 tree type, tree var, stmt_vec_info stmt_info)
4105 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
4106 NOP_EXPR, var);
4107 append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
4108 get_vectype_for_scalar_type (vinfo, type));
4109 return gimple_assign_lhs (cast_stmt);
4112 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
4113 VAR is an SSA_NAME that should be transformed from bool to a wider integer
4114 type, OUT_TYPE is the desired final integer type of the whole pattern.
4115 STMT_INFO is the info of the pattern root and is where pattern stmts should
4116 be associated with. DEFS is a map of pattern defs. */
4118 static void
4119 adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
4120 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
4122 gimple *stmt = SSA_NAME_DEF_STMT (var);
4123 enum tree_code rhs_code, def_rhs_code;
4124 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
4125 location_t loc;
4126 gimple *pattern_stmt, *def_stmt;
4127 tree trueval = NULL_TREE;
4129 rhs1 = gimple_assign_rhs1 (stmt);
4130 rhs2 = gimple_assign_rhs2 (stmt);
4131 rhs_code = gimple_assign_rhs_code (stmt);
4132 loc = gimple_location (stmt);
4133 switch (rhs_code)
4135 case SSA_NAME:
4136 CASE_CONVERT:
4137 irhs1 = *defs.get (rhs1);
4138 itype = TREE_TYPE (irhs1);
4139 pattern_stmt
4140 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4141 SSA_NAME, irhs1);
4142 break;
4144 case BIT_NOT_EXPR:
4145 irhs1 = *defs.get (rhs1);
4146 itype = TREE_TYPE (irhs1);
4147 pattern_stmt
4148 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4149 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
4150 break;
4152 case BIT_AND_EXPR:
4153 /* Try to optimize x = y & (a < b ? 1 : 0); into
4154 x = (a < b ? y : 0);
4156 E.g. for:
4157 bool a_b, b_b, c_b;
4158 TYPE d_T;
4160 S1 a_b = x1 CMP1 y1;
4161 S2 b_b = x2 CMP2 y2;
4162 S3 c_b = a_b & b_b;
4163 S4 d_T = (TYPE) c_b;
4165 we would normally emit:
4167 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4168 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4169 S3' c_T = a_T & b_T;
4170 S4' d_T = c_T;
4172 but we can save one stmt by using the
4173 result of one of the COND_EXPRs in the other COND_EXPR and leave
4174 BIT_AND_EXPR stmt out:
4176 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4177 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4178 S4' f_T = c_T;
4180 At least when VEC_COND_EXPR is implemented using masks
4181 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
4182 computes the comparison masks and ands it, in one case with
4183 all ones vector, in the other case with a vector register.
4184 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
4185 often more expensive. */
4186 def_stmt = SSA_NAME_DEF_STMT (rhs2);
4187 def_rhs_code = gimple_assign_rhs_code (def_stmt);
4188 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
4190 irhs1 = *defs.get (rhs1);
4191 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
4192 if (TYPE_PRECISION (TREE_TYPE (irhs1))
4193 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
4195 rhs_code = def_rhs_code;
4196 rhs1 = def_rhs1;
4197 rhs2 = gimple_assign_rhs2 (def_stmt);
4198 trueval = irhs1;
4199 goto do_compare;
4201 else
4202 irhs2 = *defs.get (rhs2);
4203 goto and_ior_xor;
4205 def_stmt = SSA_NAME_DEF_STMT (rhs1);
4206 def_rhs_code = gimple_assign_rhs_code (def_stmt);
4207 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
4209 irhs2 = *defs.get (rhs2);
4210 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
4211 if (TYPE_PRECISION (TREE_TYPE (irhs2))
4212 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
4214 rhs_code = def_rhs_code;
4215 rhs1 = def_rhs1;
4216 rhs2 = gimple_assign_rhs2 (def_stmt);
4217 trueval = irhs2;
4218 goto do_compare;
4220 else
4221 irhs1 = *defs.get (rhs1);
4222 goto and_ior_xor;
4224 /* FALLTHRU */
4225 case BIT_IOR_EXPR:
4226 case BIT_XOR_EXPR:
4227 irhs1 = *defs.get (rhs1);
4228 irhs2 = *defs.get (rhs2);
4229 and_ior_xor:
4230 if (TYPE_PRECISION (TREE_TYPE (irhs1))
4231 != TYPE_PRECISION (TREE_TYPE (irhs2)))
4233 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
4234 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
4235 int out_prec = TYPE_PRECISION (out_type);
4236 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
4237 irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
4238 stmt_info);
4239 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
4240 irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
4241 stmt_info);
4242 else
4244 irhs1 = adjust_bool_pattern_cast (vinfo,
4245 out_type, irhs1, stmt_info);
4246 irhs2 = adjust_bool_pattern_cast (vinfo,
4247 out_type, irhs2, stmt_info);
4250 itype = TREE_TYPE (irhs1);
4251 pattern_stmt
4252 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4253 rhs_code, irhs1, irhs2);
4254 break;
4256 default:
4257 do_compare:
4258 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
4259 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
4260 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
4261 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
4262 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
4264 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
4265 itype
4266 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
4268 else
4269 itype = TREE_TYPE (rhs1);
4270 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
4271 if (trueval == NULL_TREE)
4272 trueval = build_int_cst (itype, 1);
4273 else
4274 gcc_checking_assert (useless_type_conversion_p (itype,
4275 TREE_TYPE (trueval)));
4276 pattern_stmt
4277 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4278 COND_EXPR, cond_expr, trueval,
4279 build_int_cst (itype, 0));
4280 break;
4283 gimple_set_location (pattern_stmt, loc);
4284 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
4285 get_vectype_for_scalar_type (vinfo, itype));
4286 defs.put (var, gimple_assign_lhs (pattern_stmt));
4289 /* Comparison function to qsort a vector of gimple stmts after UID. */
4291 static int
4292 sort_after_uid (const void *p1, const void *p2)
4294 const gimple *stmt1 = *(const gimple * const *)p1;
4295 const gimple *stmt2 = *(const gimple * const *)p2;
4296 return gimple_uid (stmt1) - gimple_uid (stmt2);
4299 /* Create pattern stmts for all stmts participating in the bool pattern
4300 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
4301 OUT_TYPE. Return the def of the pattern root. */
4303 static tree
4304 adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
4305 tree out_type, stmt_vec_info stmt_info)
4307 /* Gather original stmts in the bool pattern in their order of appearance
4308 in the IL. */
4309 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
4310 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
4311 i != bool_stmt_set.end (); ++i)
4312 bool_stmts.quick_push (*i);
4313 bool_stmts.qsort (sort_after_uid);
4315 /* Now process them in that order, producing pattern stmts. */
4316 hash_map <tree, tree> defs;
4317 for (unsigned i = 0; i < bool_stmts.length (); ++i)
4318 adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
4319 out_type, stmt_info, defs);
4321 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
4322 gimple *pattern_stmt
4323 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4324 return gimple_assign_lhs (pattern_stmt);
4327 /* Return the proper type for converting bool VAR into
4328 an integer value or NULL_TREE if no such type exists.
4329 The type is chosen so that the converted value has the
4330 same number of elements as VAR's vector type. */
4332 static tree
4333 integer_type_for_mask (tree var, vec_info *vinfo)
4335 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4336 return NULL_TREE;
4338 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
4339 if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
4340 return NULL_TREE;
4342 return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
4345 /* Function vect_recog_bool_pattern
4347 Try to find pattern like following:
4349 bool a_b, b_b, c_b, d_b, e_b;
4350 TYPE f_T;
4351 loop:
4352 S1 a_b = x1 CMP1 y1;
4353 S2 b_b = x2 CMP2 y2;
4354 S3 c_b = a_b & b_b;
4355 S4 d_b = x3 CMP3 y3;
4356 S5 e_b = c_b | d_b;
4357 S6 f_T = (TYPE) e_b;
4359 where type 'TYPE' is an integral type. Or a similar pattern
4360 ending in
4362 S6 f_Y = e_b ? r_Y : s_Y;
4364 as results from if-conversion of a complex condition.
4366 Input:
4368 * STMT_VINFO: The stmt at the end from which the pattern
4369 search begins, i.e. cast of a bool to
4370 an integer type.
4372 Output:
4374 * TYPE_OUT: The type of the output of this pattern.
4376 * Return value: A new stmt that will be used to replace the pattern.
4378 Assuming size of TYPE is the same as size of all comparisons
4379 (otherwise some casts would be added where needed), the above
4380 sequence we create related pattern stmts:
4381 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4382 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4383 S4' d_T = x3 CMP3 y3 ? 1 : 0;
4384 S5' e_T = c_T | d_T;
4385 S6' f_T = e_T;
4387 Instead of the above S3' we could emit:
4388 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4389 S3' c_T = a_T | b_T;
4390 but the above is more efficient. */
4392 static gimple *
4393 vect_recog_bool_pattern (vec_info *vinfo,
4394 stmt_vec_info stmt_vinfo, tree *type_out)
4396 gimple *last_stmt = stmt_vinfo->stmt;
4397 enum tree_code rhs_code;
4398 tree var, lhs, rhs, vectype;
4399 gimple *pattern_stmt;
4401 if (!is_gimple_assign (last_stmt))
4402 return NULL;
4404 var = gimple_assign_rhs1 (last_stmt);
4405 lhs = gimple_assign_lhs (last_stmt);
4406 rhs_code = gimple_assign_rhs_code (last_stmt);
4408 if (rhs_code == VIEW_CONVERT_EXPR)
4409 var = TREE_OPERAND (var, 0);
4411 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4412 return NULL;
4414 hash_set<gimple *> bool_stmts;
4416 if (CONVERT_EXPR_CODE_P (rhs_code)
4417 || rhs_code == VIEW_CONVERT_EXPR)
4419 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4420 || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4421 return NULL;
4422 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4424 if (check_bool_pattern (var, vinfo, bool_stmts))
4426 rhs = adjust_bool_stmts (vinfo, bool_stmts,
4427 TREE_TYPE (lhs), stmt_vinfo);
4428 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4429 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4430 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4431 else
4432 pattern_stmt
4433 = gimple_build_assign (lhs, NOP_EXPR, rhs);
4435 else
4437 tree type = integer_type_for_mask (var, vinfo);
4438 tree cst0, cst1, tmp;
4440 if (!type)
4441 return NULL;
4443 /* We may directly use cond with narrowed type to avoid
4444 multiple cond exprs with following result packing and
4445 perform single cond with packed mask instead. In case
4446 of widening we better make cond first and then extract
4447 results. */
4448 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
4449 type = TREE_TYPE (lhs);
4451 cst0 = build_int_cst (type, 0);
4452 cst1 = build_int_cst (type, 1);
4453 tmp = vect_recog_temp_ssa_var (type, NULL);
4454 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
4456 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
4458 tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
4459 append_pattern_def_seq (vinfo, stmt_vinfo,
4460 pattern_stmt, new_vectype);
4462 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4463 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
4467 *type_out = vectype;
4468 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4470 return pattern_stmt;
4472 else if (rhs_code == COND_EXPR
4473 && TREE_CODE (var) == SSA_NAME)
4475 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4476 if (vectype == NULL_TREE)
4477 return NULL;
4479 /* Build a scalar type for the boolean result that when
4480 vectorized matches the vector type of the result in
4481 size and number of elements. */
4482 unsigned prec
4483 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
4484 TYPE_VECTOR_SUBPARTS (vectype));
4486 tree type
4487 = build_nonstandard_integer_type (prec,
4488 TYPE_UNSIGNED (TREE_TYPE (var)));
4489 if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
4490 return NULL;
4492 if (check_bool_pattern (var, vinfo, bool_stmts))
4493 var = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
4494 else if (integer_type_for_mask (var, vinfo))
4495 return NULL;
4497 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4498 pattern_stmt
4499 = gimple_build_assign (lhs, COND_EXPR,
4500 build2 (NE_EXPR, boolean_type_node,
4501 var, build_int_cst (TREE_TYPE (var), 0)),
4502 gimple_assign_rhs2 (last_stmt),
4503 gimple_assign_rhs3 (last_stmt));
4504 *type_out = vectype;
4505 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4507 return pattern_stmt;
4509 else if (rhs_code == SSA_NAME
4510 && STMT_VINFO_DATA_REF (stmt_vinfo))
4512 stmt_vec_info pattern_stmt_info;
4513 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4514 if (!vectype || !VECTOR_MODE_P (TYPE_MODE (vectype)))
4515 return NULL;
4517 if (check_bool_pattern (var, vinfo, bool_stmts))
4518 rhs = adjust_bool_stmts (vinfo, bool_stmts,
4519 TREE_TYPE (vectype), stmt_vinfo);
4520 else
4522 tree type = integer_type_for_mask (var, vinfo);
4523 tree cst0, cst1, new_vectype;
4525 if (!type)
4526 return NULL;
4528 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
4529 type = TREE_TYPE (vectype);
4531 cst0 = build_int_cst (type, 0);
4532 cst1 = build_int_cst (type, 1);
4533 new_vectype = get_vectype_for_scalar_type (vinfo, type);
4535 rhs = vect_recog_temp_ssa_var (type, NULL);
4536 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
4537 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
4540 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
4541 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4543 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4544 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
4545 append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
4546 rhs = rhs2;
4548 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4549 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4550 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4551 *type_out = vectype;
4552 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4554 return pattern_stmt;
4556 else
4557 return NULL;
4561 /* A helper for vect_recog_mask_conversion_pattern. Build
4562 conversion of MASK to a type suitable for masking VECTYPE.
4563 Built statement gets required vectype and is appended to
4564 a pattern sequence of STMT_VINFO.
4566 Return converted mask. */
4568 static tree
4569 build_mask_conversion (vec_info *vinfo,
4570 tree mask, tree vectype, stmt_vec_info stmt_vinfo)
4572 gimple *stmt;
4573 tree masktype, tmp;
4575 masktype = truth_type_for (vectype);
4576 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
4577 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
4578 append_pattern_def_seq (vinfo, stmt_vinfo,
4579 stmt, masktype, TREE_TYPE (vectype));
4581 return tmp;
4585 /* Function vect_recog_mask_conversion_pattern
4587 Try to find statements which require boolean type
4588 converison. Additional conversion statements are
4589 added to handle such cases. For example:
4591 bool m_1, m_2, m_3;
4592 int i_4, i_5;
4593 double d_6, d_7;
4594 char c_1, c_2, c_3;
4596 S1 m_1 = i_4 > i_5;
4597 S2 m_2 = d_6 < d_7;
4598 S3 m_3 = m_1 & m_2;
4599 S4 c_1 = m_3 ? c_2 : c_3;
4601 Will be transformed into:
4603 S1 m_1 = i_4 > i_5;
4604 S2 m_2 = d_6 < d_7;
4605 S3'' m_2' = (_Bool[bitsize=32])m_2
4606 S3' m_3' = m_1 & m_2';
4607 S4'' m_3'' = (_Bool[bitsize=8])m_3'
4608 S4' c_1' = m_3'' ? c_2 : c_3; */
4610 static gimple *
4611 vect_recog_mask_conversion_pattern (vec_info *vinfo,
4612 stmt_vec_info stmt_vinfo, tree *type_out)
4614 gimple *last_stmt = stmt_vinfo->stmt;
4615 enum tree_code rhs_code;
4616 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
4617 tree vectype1, vectype2;
4618 stmt_vec_info pattern_stmt_info;
4619 tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
4620 tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
4622 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
4623 if (is_gimple_call (last_stmt)
4624 && gimple_call_internal_p (last_stmt))
4626 gcall *pattern_stmt;
4628 internal_fn ifn = gimple_call_internal_fn (last_stmt);
4629 int mask_argno = internal_fn_mask_index (ifn);
4630 if (mask_argno < 0)
4631 return NULL;
4633 bool store_p = internal_store_fn_p (ifn);
4634 if (store_p)
4636 int rhs_index = internal_fn_stored_value_index (ifn);
4637 tree rhs = gimple_call_arg (last_stmt, rhs_index);
4638 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
4640 else
4642 lhs = gimple_call_lhs (last_stmt);
4643 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4646 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
4647 tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
4648 if (!mask_arg_type)
4649 return NULL;
4650 vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
4652 if (!vectype1 || !vectype2
4653 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4654 TYPE_VECTOR_SUBPARTS (vectype2)))
4655 return NULL;
4657 tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
4659 auto_vec<tree, 8> args;
4660 unsigned int nargs = gimple_call_num_args (last_stmt);
4661 args.safe_grow (nargs, true);
4662 for (unsigned int i = 0; i < nargs; ++i)
4663 args[i] = ((int) i == mask_argno
4664 ? tmp
4665 : gimple_call_arg (last_stmt, i));
4666 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
4668 if (!store_p)
4670 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4671 gimple_call_set_lhs (pattern_stmt, lhs);
4673 gimple_call_set_nothrow (pattern_stmt, true);
4675 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4676 if (STMT_VINFO_DATA_REF (stmt_vinfo))
4677 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4679 *type_out = vectype1;
4680 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4682 return pattern_stmt;
4685 if (!is_gimple_assign (last_stmt))
4686 return NULL;
4688 gimple *pattern_stmt;
4689 lhs = gimple_assign_lhs (last_stmt);
4690 rhs1 = gimple_assign_rhs1 (last_stmt);
4691 rhs_code = gimple_assign_rhs_code (last_stmt);
4693 /* Check for cond expression requiring mask conversion. */
4694 if (rhs_code == COND_EXPR)
4696 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4698 if (TREE_CODE (rhs1) == SSA_NAME)
4700 rhs1_type = integer_type_for_mask (rhs1, vinfo);
4701 if (!rhs1_type)
4702 return NULL;
4704 else if (COMPARISON_CLASS_P (rhs1))
4706 /* Check whether we're comparing scalar booleans and (if so)
4707 whether a better mask type exists than the mask associated
4708 with boolean-sized elements. This avoids unnecessary packs
4709 and unpacks if the booleans are set from comparisons of
4710 wider types. E.g. in:
4712 int x1, x2, x3, x4, y1, y1;
4714 bool b1 = (x1 == x2);
4715 bool b2 = (x3 == x4);
4716 ... = b1 == b2 ? y1 : y2;
4718 it is better for b1 and b2 to use the mask type associated
4719 with int elements rather bool (byte) elements. */
4720 rhs1_op0 = TREE_OPERAND (rhs1, 0);
4721 rhs1_op1 = TREE_OPERAND (rhs1, 1);
4722 if (!rhs1_op0 || !rhs1_op1)
4723 return NULL;
4724 rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
4725 rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
4727 if (!rhs1_op0_type)
4728 rhs1_type = TREE_TYPE (rhs1_op0);
4729 else if (!rhs1_op1_type)
4730 rhs1_type = TREE_TYPE (rhs1_op1);
4731 else if (TYPE_PRECISION (rhs1_op0_type)
4732 != TYPE_PRECISION (rhs1_op1_type))
4734 int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
4735 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4736 int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
4737 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4738 if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
4740 if (abs (tmp0) > abs (tmp1))
4741 rhs1_type = rhs1_op1_type;
4742 else
4743 rhs1_type = rhs1_op0_type;
4745 else
4746 rhs1_type = build_nonstandard_integer_type
4747 (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
4749 else
4750 rhs1_type = rhs1_op0_type;
4752 else
4753 return NULL;
4755 vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4757 if (!vectype1 || !vectype2)
4758 return NULL;
4760 /* Continue if a conversion is needed. Also continue if we have
4761 a comparison whose vector type would normally be different from
4762 VECTYPE2 when considered in isolation. In that case we'll
4763 replace the comparison with an SSA name (so that we can record
4764 its vector type) and behave as though the comparison was an SSA
4765 name from the outset. */
4766 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4767 TYPE_VECTOR_SUBPARTS (vectype2))
4768 && !rhs1_op0_type
4769 && !rhs1_op1_type)
4770 return NULL;
4772 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4773 in place, we can handle it in vectorizable_condition. This avoids
4774 unnecessary promotion stmts and increased vectorization factor. */
4775 if (COMPARISON_CLASS_P (rhs1)
4776 && INTEGRAL_TYPE_P (rhs1_type)
4777 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4778 TYPE_VECTOR_SUBPARTS (vectype2)))
4780 enum vect_def_type dt;
4781 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4782 && dt == vect_external_def
4783 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4784 && (dt == vect_external_def
4785 || dt == vect_constant_def))
4787 tree wide_scalar_type = build_nonstandard_integer_type
4788 (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
4789 tree vectype3 = get_vectype_for_scalar_type (vinfo,
4790 wide_scalar_type);
4791 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4792 return NULL;
4796 /* If rhs1 is a comparison we need to move it into a
4797 separate statement. */
4798 if (TREE_CODE (rhs1) != SSA_NAME)
4800 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4801 if (rhs1_op0_type
4802 && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
4803 rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
4804 vectype2, stmt_vinfo);
4805 if (rhs1_op1_type
4806 && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
4807 rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
4808 vectype2, stmt_vinfo);
4809 pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
4810 rhs1_op0, rhs1_op1);
4811 rhs1 = tmp;
4812 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
4813 rhs1_type);
4816 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4817 TYPE_VECTOR_SUBPARTS (vectype2)))
4818 tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4819 else
4820 tmp = rhs1;
4822 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4823 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4824 gimple_assign_rhs2 (last_stmt),
4825 gimple_assign_rhs3 (last_stmt));
4827 *type_out = vectype1;
4828 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4830 return pattern_stmt;
4833 /* Now check for binary boolean operations requiring conversion for
4834 one of operands. */
4835 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4836 return NULL;
4838 if (rhs_code != BIT_IOR_EXPR
4839 && rhs_code != BIT_XOR_EXPR
4840 && rhs_code != BIT_AND_EXPR
4841 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4842 return NULL;
4844 rhs2 = gimple_assign_rhs2 (last_stmt);
4846 rhs1_type = integer_type_for_mask (rhs1, vinfo);
4847 rhs2_type = integer_type_for_mask (rhs2, vinfo);
4849 if (!rhs1_type || !rhs2_type
4850 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4851 return NULL;
4853 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4855 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4856 if (!vectype1)
4857 return NULL;
4858 rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
4860 else
4862 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
4863 if (!vectype1)
4864 return NULL;
4865 rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4868 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4869 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4871 *type_out = vectype1;
4872 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4874 return pattern_stmt;
4877 /* STMT_INFO is a load or store. If the load or store is conditional, return
4878 the boolean condition under which it occurs, otherwise return null. */
4880 static tree
4881 vect_get_load_store_mask (stmt_vec_info stmt_info)
4883 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
4885 gcc_assert (gimple_assign_single_p (def_assign));
4886 return NULL_TREE;
4889 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
4891 internal_fn ifn = gimple_call_internal_fn (def_call);
4892 int mask_index = internal_fn_mask_index (ifn);
4893 return gimple_call_arg (def_call, mask_index);
4896 gcc_unreachable ();
4899 /* Return MASK if MASK is suitable for masking an operation on vectors
4900 of type VECTYPE, otherwise convert it into such a form and return
4901 the result. Associate any conversion statements with STMT_INFO's
4902 pattern. */
4904 static tree
4905 vect_convert_mask_for_vectype (tree mask, tree vectype,
4906 stmt_vec_info stmt_info, vec_info *vinfo)
4908 tree mask_type = integer_type_for_mask (mask, vinfo);
4909 if (mask_type)
4911 tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
4912 if (mask_vectype
4913 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4914 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4915 mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
4917 return mask;
4920 /* Return the equivalent of:
4922 fold_convert (TYPE, VALUE)
4924 with the expectation that the operation will be vectorized.
4925 If new statements are needed, add them as pattern statements
4926 to STMT_INFO. */
4928 static tree
4929 vect_add_conversion_to_pattern (vec_info *vinfo,
4930 tree type, tree value, stmt_vec_info stmt_info)
4932 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4933 return value;
4935 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4936 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4937 append_pattern_def_seq (vinfo, stmt_info, conversion,
4938 get_vectype_for_scalar_type (vinfo, type));
4939 return new_value;
4942 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4943 internal function. Return the final statement on success and set
4944 *TYPE_OUT to the vector type being loaded or stored.
4946 This function only handles gathers and scatters that were recognized
4947 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4949 static gimple *
4950 vect_recog_gather_scatter_pattern (vec_info *vinfo,
4951 stmt_vec_info stmt_info, tree *type_out)
4953 /* Currently we only support this for loop vectorization. */
4954 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4955 if (!loop_vinfo)
4956 return NULL;
4958 /* Make sure that we're looking at a gather load or scatter store. */
4959 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4960 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4961 return NULL;
4963 /* Get the boolean that controls whether the load or store happens.
4964 This is null if the operation is unconditional. */
4965 tree mask = vect_get_load_store_mask (stmt_info);
4967 /* Make sure that the target supports an appropriate internal
4968 function for the gather/scatter operation. */
4969 gather_scatter_info gs_info;
4970 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
4971 || gs_info.ifn == IFN_LAST)
4972 return NULL;
4974 /* Convert the mask to the right form. */
4975 tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
4976 gs_info.element_type);
4977 if (mask)
4978 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4979 loop_vinfo);
4980 else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
4981 || gs_info.ifn == IFN_MASK_GATHER_LOAD)
4982 mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
4984 /* Get the invariant base and non-invariant offset, converting the
4985 latter to the same width as the vector elements. */
4986 tree base = gs_info.base;
4987 tree offset_type = TREE_TYPE (gs_info.offset_vectype);
4988 tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
4989 gs_info.offset, stmt_info);
4991 /* Build the new pattern statement. */
4992 tree scale = size_int (gs_info.scale);
4993 gcall *pattern_stmt;
4994 if (DR_IS_READ (dr))
4996 tree zero = build_zero_cst (gs_info.element_type);
4997 if (mask != NULL)
4998 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
4999 offset, scale, zero, mask);
5000 else
5001 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
5002 offset, scale, zero);
5003 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
5004 gimple_call_set_lhs (pattern_stmt, load_lhs);
5006 else
5008 tree rhs = vect_get_store_rhs (stmt_info);
5009 if (mask != NULL)
5010 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
5011 base, offset, scale, rhs,
5012 mask);
5013 else
5014 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
5015 base, offset, scale, rhs);
5017 gimple_call_set_nothrow (pattern_stmt, true);
5019 /* Copy across relevant vectorization info and associate DR with the
5020 new pattern statement instead of the original statement. */
5021 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
5022 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
5024 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5025 *type_out = vectype;
5026 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
5028 return pattern_stmt;
5031 /* Return true if TYPE is a non-boolean integer type. These are the types
5032 that we want to consider for narrowing. */
5034 static bool
5035 vect_narrowable_type_p (tree type)
5037 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
5040 /* Return true if the operation given by CODE can be truncated to N bits
5041 when only N bits of the output are needed. This is only true if bit N+1
5042 of the inputs has no effect on the low N bits of the result. */
5044 static bool
5045 vect_truncatable_operation_p (tree_code code)
5047 switch (code)
5049 case PLUS_EXPR:
5050 case MINUS_EXPR:
5051 case MULT_EXPR:
5052 case BIT_AND_EXPR:
5053 case BIT_IOR_EXPR:
5054 case BIT_XOR_EXPR:
5055 case COND_EXPR:
5056 return true;
5058 default:
5059 return false;
5063 /* Record that STMT_INFO could be changed from operating on TYPE to
5064 operating on a type with the precision and sign given by PRECISION
5065 and SIGN respectively. PRECISION is an arbitrary bit precision;
5066 it might not be a whole number of bytes. */
5068 static void
5069 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
5070 unsigned int precision, signop sign)
5072 /* Round the precision up to a whole number of bytes. */
5073 precision = vect_element_precision (precision);
5074 if (precision < TYPE_PRECISION (type)
5075 && (!stmt_info->operation_precision
5076 || stmt_info->operation_precision > precision))
5078 stmt_info->operation_precision = precision;
5079 stmt_info->operation_sign = sign;
5083 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
5084 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
5085 is an arbitrary bit precision; it might not be a whole number of bytes. */
5087 static void
5088 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
5089 unsigned int min_input_precision)
5091 /* This operation in isolation only requires the inputs to have
5092 MIN_INPUT_PRECISION of precision, However, that doesn't mean
5093 that MIN_INPUT_PRECISION is a natural precision for the chain
5094 as a whole. E.g. consider something like:
5096 unsigned short *x, *y;
5097 *y = ((*x & 0xf0) >> 4) | (*y << 4);
5099 The right shift can be done on unsigned chars, and only requires the
5100 result of "*x & 0xf0" to be done on unsigned chars. But taking that
5101 approach would mean turning a natural chain of single-vector unsigned
5102 short operations into one that truncates "*x" and then extends
5103 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
5104 operation and one vector for each unsigned char operation.
5105 This would be a significant pessimization.
5107 Instead only propagate the maximum of this precision and the precision
5108 required by the users of the result. This means that we don't pessimize
5109 the case above but continue to optimize things like:
5111 unsigned char *y;
5112 unsigned short *x;
5113 *y = ((*x & 0xf0) >> 4) | (*y << 4);
5115 Here we would truncate two vectors of *x to a single vector of
5116 unsigned chars and use single-vector unsigned char operations for
5117 everything else, rather than doing two unsigned short copies of
5118 "(*x & 0xf0) >> 4" and then truncating the result. */
5119 min_input_precision = MAX (min_input_precision,
5120 stmt_info->min_output_precision);
5122 if (min_input_precision < TYPE_PRECISION (type)
5123 && (!stmt_info->min_input_precision
5124 || stmt_info->min_input_precision > min_input_precision))
5125 stmt_info->min_input_precision = min_input_precision;
5128 /* Subroutine of vect_determine_min_output_precision. Return true if
5129 we can calculate a reduced number of output bits for STMT_INFO,
5130 whose result is LHS. */
5132 static bool
5133 vect_determine_min_output_precision_1 (vec_info *vinfo,
5134 stmt_vec_info stmt_info, tree lhs)
5136 /* Take the maximum precision required by users of the result. */
5137 unsigned int precision = 0;
5138 imm_use_iterator iter;
5139 use_operand_p use;
5140 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
5142 gimple *use_stmt = USE_STMT (use);
5143 if (is_gimple_debug (use_stmt))
5144 continue;
5145 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
5146 if (!use_stmt_info || !use_stmt_info->min_input_precision)
5147 return false;
5148 /* The input precision recorded for COND_EXPRs applies only to the
5149 "then" and "else" values. */
5150 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
5151 if (assign
5152 && gimple_assign_rhs_code (assign) == COND_EXPR
5153 && use->use != gimple_assign_rhs2_ptr (assign)
5154 && use->use != gimple_assign_rhs3_ptr (assign))
5155 return false;
5156 precision = MAX (precision, use_stmt_info->min_input_precision);
5159 if (dump_enabled_p ())
5160 dump_printf_loc (MSG_NOTE, vect_location,
5161 "only the low %d bits of %T are significant\n",
5162 precision, lhs);
5163 stmt_info->min_output_precision = precision;
5164 return true;
5167 /* Calculate min_output_precision for STMT_INFO. */
5169 static void
5170 vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5172 /* We're only interested in statements with a narrowable result. */
5173 tree lhs = gimple_get_lhs (stmt_info->stmt);
5174 if (!lhs
5175 || TREE_CODE (lhs) != SSA_NAME
5176 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
5177 return;
5179 if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
5180 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
5183 /* Use range information to decide whether STMT (described by STMT_INFO)
5184 could be done in a narrower type. This is effectively a forward
5185 propagation, since it uses context-independent information that applies
5186 to all users of an SSA name. */
5188 static void
5189 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
5191 tree lhs = gimple_assign_lhs (stmt);
5192 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
5193 return;
5195 tree type = TREE_TYPE (lhs);
5196 if (!vect_narrowable_type_p (type))
5197 return;
5199 /* First see whether we have any useful range information for the result. */
5200 unsigned int precision = TYPE_PRECISION (type);
5201 signop sign = TYPE_SIGN (type);
5202 wide_int min_value, max_value;
5203 if (!vect_get_range_info (lhs, &min_value, &max_value))
5204 return;
5206 tree_code code = gimple_assign_rhs_code (stmt);
5207 unsigned int nops = gimple_num_ops (stmt);
5209 if (!vect_truncatable_operation_p (code))
5210 /* Check that all relevant input operands are compatible, and update
5211 [MIN_VALUE, MAX_VALUE] to include their ranges. */
5212 for (unsigned int i = 1; i < nops; ++i)
5214 tree op = gimple_op (stmt, i);
5215 if (TREE_CODE (op) == INTEGER_CST)
5217 /* Don't require the integer to have RHS_TYPE (which it might
5218 not for things like shift amounts, etc.), but do require it
5219 to fit the type. */
5220 if (!int_fits_type_p (op, type))
5221 return;
5223 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
5224 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
5226 else if (TREE_CODE (op) == SSA_NAME)
5228 /* Ignore codes that don't take uniform arguments. */
5229 if (!types_compatible_p (TREE_TYPE (op), type))
5230 return;
5232 wide_int op_min_value, op_max_value;
5233 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
5234 return;
5236 min_value = wi::min (min_value, op_min_value, sign);
5237 max_value = wi::max (max_value, op_max_value, sign);
5239 else
5240 return;
5243 /* Try to switch signed types for unsigned types if we can.
5244 This is better for two reasons. First, unsigned ops tend
5245 to be cheaper than signed ops. Second, it means that we can
5246 handle things like:
5248 signed char c;
5249 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
5253 signed char c;
5254 unsigned short res_1 = (unsigned short) c & 0xff00;
5255 int res = (int) res_1;
5257 where the intermediate result res_1 has unsigned rather than
5258 signed type. */
5259 if (sign == SIGNED && !wi::neg_p (min_value))
5260 sign = UNSIGNED;
5262 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
5263 unsigned int precision1 = wi::min_precision (min_value, sign);
5264 unsigned int precision2 = wi::min_precision (max_value, sign);
5265 unsigned int value_precision = MAX (precision1, precision2);
5266 if (value_precision >= precision)
5267 return;
5269 if (dump_enabled_p ())
5270 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
5271 " without loss of precision: %G",
5272 sign == SIGNED ? "signed" : "unsigned",
5273 value_precision, (gimple *) stmt);
5275 vect_set_operation_type (stmt_info, type, value_precision, sign);
5276 vect_set_min_input_precision (stmt_info, type, value_precision);
5279 /* Use information about the users of STMT's result to decide whether
5280 STMT (described by STMT_INFO) could be done in a narrower type.
5281 This is effectively a backward propagation. */
5283 static void
5284 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
5286 tree_code code = gimple_assign_rhs_code (stmt);
5287 unsigned int opno = (code == COND_EXPR ? 2 : 1);
5288 tree type = TREE_TYPE (gimple_op (stmt, opno));
5289 if (!vect_narrowable_type_p (type))
5290 return;
5292 unsigned int precision = TYPE_PRECISION (type);
5293 unsigned int operation_precision, min_input_precision;
5294 switch (code)
5296 CASE_CONVERT:
5297 /* Only the bits that contribute to the output matter. Don't change
5298 the precision of the operation itself. */
5299 operation_precision = precision;
5300 min_input_precision = stmt_info->min_output_precision;
5301 break;
5303 case LSHIFT_EXPR:
5304 case RSHIFT_EXPR:
5306 tree shift = gimple_assign_rhs2 (stmt);
5307 if (TREE_CODE (shift) != INTEGER_CST
5308 || !wi::ltu_p (wi::to_widest (shift), precision))
5309 return;
5310 unsigned int const_shift = TREE_INT_CST_LOW (shift);
5311 if (code == LSHIFT_EXPR)
5313 /* Avoid creating an undefined shift.
5315 ??? We could instead use min_output_precision as-is and
5316 optimize out-of-range shifts to zero. However, only
5317 degenerate testcases shift away all their useful input data,
5318 and it isn't natural to drop input operations in the middle
5319 of vectorization. This sort of thing should really be
5320 handled before vectorization. */
5321 operation_precision = MAX (stmt_info->min_output_precision,
5322 const_shift + 1);
5323 /* We need CONST_SHIFT fewer bits of the input. */
5324 min_input_precision = (MAX (operation_precision, const_shift)
5325 - const_shift);
5327 else
5329 /* We need CONST_SHIFT extra bits to do the operation. */
5330 operation_precision = (stmt_info->min_output_precision
5331 + const_shift);
5332 min_input_precision = operation_precision;
5334 break;
5337 default:
5338 if (vect_truncatable_operation_p (code))
5340 /* Input bit N has no effect on output bits N-1 and lower. */
5341 operation_precision = stmt_info->min_output_precision;
5342 min_input_precision = operation_precision;
5343 break;
5345 return;
5348 if (operation_precision < precision)
5350 if (dump_enabled_p ())
5351 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
5352 " without affecting users: %G",
5353 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
5354 operation_precision, (gimple *) stmt);
5355 vect_set_operation_type (stmt_info, type, operation_precision,
5356 TYPE_SIGN (type));
5358 vect_set_min_input_precision (stmt_info, type, min_input_precision);
5361 /* Return true if the statement described by STMT_INFO sets a boolean
5362 SSA_NAME and if we know how to vectorize this kind of statement using
5363 vector mask types. */
5365 static bool
5366 possible_vector_mask_operation_p (stmt_vec_info stmt_info)
5368 tree lhs = gimple_get_lhs (stmt_info->stmt);
5369 if (!lhs
5370 || TREE_CODE (lhs) != SSA_NAME
5371 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5372 return false;
5374 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5376 tree_code rhs_code = gimple_assign_rhs_code (assign);
5377 switch (rhs_code)
5379 CASE_CONVERT:
5380 case SSA_NAME:
5381 case BIT_NOT_EXPR:
5382 case BIT_IOR_EXPR:
5383 case BIT_XOR_EXPR:
5384 case BIT_AND_EXPR:
5385 return true;
5387 default:
5388 return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
5391 else if (is_a <gphi *> (stmt_info->stmt))
5392 return true;
5393 return false;
5396 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
5397 a vector mask type instead of a normal vector type. Record the
5398 result in STMT_INFO->mask_precision. */
5400 static void
5401 vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5403 if (!possible_vector_mask_operation_p (stmt_info))
5404 return;
5406 /* If at least one boolean input uses a vector mask type,
5407 pick the mask type with the narrowest elements.
5409 ??? This is the traditional behavior. It should always produce
5410 the smallest number of operations, but isn't necessarily the
5411 optimal choice. For example, if we have:
5413 a = b & c
5415 where:
5417 - the user of a wants it to have a mask type for 16-bit elements (M16)
5418 - b also uses M16
5419 - c uses a mask type for 8-bit elements (M8)
5421 then picking M8 gives:
5423 - 1 M16->M8 pack for b
5424 - 1 M8 AND for a
5425 - 2 M8->M16 unpacks for the user of a
5427 whereas picking M16 would have given:
5429 - 2 M8->M16 unpacks for c
5430 - 2 M16 ANDs for a
5432 The number of operations are equal, but M16 would have given
5433 a shorter dependency chain and allowed more ILP. */
5434 unsigned int precision = ~0U;
5435 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5437 unsigned int nops = gimple_num_ops (assign);
5438 for (unsigned int i = 1; i < nops; ++i)
5440 tree rhs = gimple_op (assign, i);
5441 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
5442 continue;
5444 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5445 if (!def_stmt_info)
5446 /* Don't let external or constant operands influence the choice.
5447 We can convert them to whichever vector type we pick. */
5448 continue;
5450 if (def_stmt_info->mask_precision)
5452 if (precision > def_stmt_info->mask_precision)
5453 precision = def_stmt_info->mask_precision;
5457 /* If the statement compares two values that shouldn't use vector masks,
5458 try comparing the values as normal scalars instead. */
5459 tree_code rhs_code = gimple_assign_rhs_code (assign);
5460 if (precision == ~0U
5461 && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5463 tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
5464 scalar_mode mode;
5465 tree vectype, mask_type;
5466 if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
5467 && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
5468 && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
5469 && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
5470 precision = GET_MODE_BITSIZE (mode);
5473 else
5475 gphi *phi = as_a <gphi *> (stmt_info->stmt);
5476 for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
5478 tree rhs = gimple_phi_arg_def (phi, i);
5480 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5481 if (!def_stmt_info)
5482 /* Don't let external or constant operands influence the choice.
5483 We can convert them to whichever vector type we pick. */
5484 continue;
5486 if (def_stmt_info->mask_precision)
5488 if (precision > def_stmt_info->mask_precision)
5489 precision = def_stmt_info->mask_precision;
5494 if (dump_enabled_p ())
5496 if (precision == ~0U)
5497 dump_printf_loc (MSG_NOTE, vect_location,
5498 "using normal nonmask vectors for %G",
5499 stmt_info->stmt);
5500 else
5501 dump_printf_loc (MSG_NOTE, vect_location,
5502 "using boolean precision %d for %G",
5503 precision, stmt_info->stmt);
5506 stmt_info->mask_precision = precision;
5509 /* Handle vect_determine_precisions for STMT_INFO, given that we
5510 have already done so for the users of its result. */
5512 void
5513 vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
5515 vect_determine_min_output_precision (vinfo, stmt_info);
5516 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
5518 vect_determine_precisions_from_range (stmt_info, stmt);
5519 vect_determine_precisions_from_users (stmt_info, stmt);
5523 /* Walk backwards through the vectorizable region to determine the
5524 values of these fields:
5526 - min_output_precision
5527 - min_input_precision
5528 - operation_precision
5529 - operation_sign. */
5531 void
5532 vect_determine_precisions (vec_info *vinfo)
5534 DUMP_VECT_SCOPE ("vect_determine_precisions");
5536 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5538 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5539 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5540 unsigned int nbbs = loop->num_nodes;
5542 for (unsigned int i = 0; i < nbbs; i++)
5544 basic_block bb = bbs[i];
5545 for (auto gsi = gsi_start_phis (bb);
5546 !gsi_end_p (gsi); gsi_next (&gsi))
5548 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5549 if (stmt_info)
5550 vect_determine_mask_precision (vinfo, stmt_info);
5552 for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5553 if (!is_gimple_debug (gsi_stmt (si)))
5554 vect_determine_mask_precision
5555 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5557 for (unsigned int i = 0; i < nbbs; i++)
5559 basic_block bb = bbs[nbbs - i - 1];
5560 for (gimple_stmt_iterator si = gsi_last_bb (bb);
5561 !gsi_end_p (si); gsi_prev (&si))
5562 if (!is_gimple_debug (gsi_stmt (si)))
5563 vect_determine_stmt_precisions
5564 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5565 for (auto gsi = gsi_start_phis (bb);
5566 !gsi_end_p (gsi); gsi_next (&gsi))
5568 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5569 if (stmt_info)
5570 vect_determine_stmt_precisions (vinfo, stmt_info);
5574 else
5576 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5577 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
5579 basic_block bb = bb_vinfo->bbs[i];
5580 for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5582 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5583 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5584 vect_determine_mask_precision (vinfo, stmt_info);
5586 for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5588 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5589 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5590 vect_determine_mask_precision (vinfo, stmt_info);
5593 for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
5595 for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]);
5596 !gsi_end_p (gsi); gsi_prev (&gsi))
5598 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5599 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5600 vect_determine_stmt_precisions (vinfo, stmt_info);
5602 for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]);
5603 !gsi_end_p (gsi); gsi_next (&gsi))
5605 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5606 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5607 vect_determine_stmt_precisions (vinfo, stmt_info);
5613 typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
5615 struct vect_recog_func
5617 vect_recog_func_ptr fn;
5618 const char *name;
5621 /* Note that ordering matters - the first pattern matching on a stmt is
5622 taken which means usually the more complex one needs to preceed the
5623 less comples onex (widen_sum only after dot_prod or sad for example). */
5624 static vect_recog_func vect_vect_recog_func_ptrs[] = {
5625 { vect_recog_over_widening_pattern, "over_widening" },
5626 /* Must come after over_widening, which narrows the shift as much as
5627 possible beforehand. */
5628 { vect_recog_average_pattern, "average" },
5629 { vect_recog_cond_expr_convert_pattern, "cond_expr_convert" },
5630 { vect_recog_mulhs_pattern, "mult_high" },
5631 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
5632 { vect_recog_widen_mult_pattern, "widen_mult" },
5633 { vect_recog_dot_prod_pattern, "dot_prod" },
5634 { vect_recog_sad_pattern, "sad" },
5635 { vect_recog_widen_sum_pattern, "widen_sum" },
5636 { vect_recog_pow_pattern, "pow" },
5637 { vect_recog_popcount_pattern, "popcount" },
5638 { vect_recog_widen_shift_pattern, "widen_shift" },
5639 { vect_recog_rotate_pattern, "rotate" },
5640 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
5641 { vect_recog_divmod_pattern, "divmod" },
5642 { vect_recog_mult_pattern, "mult" },
5643 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
5644 { vect_recog_bool_pattern, "bool" },
5645 /* This must come before mask conversion, and includes the parts
5646 of mask conversion that are needed for gather and scatter
5647 internal functions. */
5648 { vect_recog_gather_scatter_pattern, "gather_scatter" },
5649 { vect_recog_mask_conversion_pattern, "mask_conversion" },
5650 { vect_recog_widen_plus_pattern, "widen_plus" },
5651 { vect_recog_widen_minus_pattern, "widen_minus" },
5654 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
5656 /* Mark statements that are involved in a pattern. */
5658 void
5659 vect_mark_pattern_stmts (vec_info *vinfo,
5660 stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
5661 tree pattern_vectype)
5663 stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
5664 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5666 gimple *orig_pattern_stmt = NULL;
5667 if (is_pattern_stmt_p (orig_stmt_info))
5669 /* We're replacing a statement in an existing pattern definition
5670 sequence. */
5671 orig_pattern_stmt = orig_stmt_info->stmt;
5672 if (dump_enabled_p ())
5673 dump_printf_loc (MSG_NOTE, vect_location,
5674 "replacing earlier pattern %G", orig_pattern_stmt);
5676 /* To keep the book-keeping simple, just swap the lhs of the
5677 old and new statements, so that the old one has a valid but
5678 unused lhs. */
5679 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
5680 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
5681 gimple_set_lhs (pattern_stmt, old_lhs);
5683 if (dump_enabled_p ())
5684 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
5686 /* Switch to the statement that ORIG replaces. */
5687 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
5689 /* We shouldn't be replacing the main pattern statement. */
5690 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
5691 != orig_pattern_stmt);
5694 if (def_seq)
5695 for (gimple_stmt_iterator si = gsi_start (def_seq);
5696 !gsi_end_p (si); gsi_next (&si))
5698 if (dump_enabled_p ())
5699 dump_printf_loc (MSG_NOTE, vect_location,
5700 "extra pattern stmt: %G", gsi_stmt (si));
5701 stmt_vec_info pattern_stmt_info
5702 = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
5703 orig_stmt_info, pattern_vectype);
5704 /* Stmts in the def sequence are not vectorizable cycle or
5705 induction defs, instead they should all be vect_internal_def
5706 feeding the main pattern stmt which retains this def type. */
5707 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
5710 if (orig_pattern_stmt)
5712 vect_init_pattern_stmt (vinfo, pattern_stmt,
5713 orig_stmt_info, pattern_vectype);
5715 /* Insert all the new pattern statements before the original one. */
5716 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5717 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
5718 orig_def_seq);
5719 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
5720 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
5722 /* Remove the pattern statement that this new pattern replaces. */
5723 gsi_remove (&gsi, false);
5725 else
5726 vect_set_pattern_stmt (vinfo,
5727 pattern_stmt, orig_stmt_info, pattern_vectype);
5729 /* Transfer reduction path info to the pattern. */
5730 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
5732 gimple_match_op op;
5733 if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op))
5734 gcc_unreachable ();
5735 tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)];
5736 /* Search the pattern def sequence and the main pattern stmt. Note
5737 we may have inserted all into a containing pattern def sequence
5738 so the following is a bit awkward. */
5739 gimple_stmt_iterator si;
5740 gimple *s;
5741 if (def_seq)
5743 si = gsi_start (def_seq);
5744 s = gsi_stmt (si);
5745 gsi_next (&si);
5747 else
5749 si = gsi_none ();
5750 s = pattern_stmt;
5754 bool found = false;
5755 if (gimple_extract_op (s, &op))
5756 for (unsigned i = 0; i < op.num_ops; ++i)
5757 if (op.ops[i] == lookfor)
5759 STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
5760 lookfor = gimple_get_lhs (s);
5761 found = true;
5762 break;
5764 if (s == pattern_stmt)
5766 if (!found && dump_enabled_p ())
5767 dump_printf_loc (MSG_NOTE, vect_location,
5768 "failed to update reduction index.\n");
5769 break;
5771 if (gsi_end_p (si))
5772 s = pattern_stmt;
5773 else
5775 s = gsi_stmt (si);
5776 if (s == pattern_stmt)
5777 /* Found the end inside a bigger pattern def seq. */
5778 si = gsi_none ();
5779 else
5780 gsi_next (&si);
5782 } while (1);
5786 /* Function vect_pattern_recog_1
5788 Input:
5789 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
5790 computation pattern.
5791 STMT_INFO: A stmt from which the pattern search should start.
5793 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
5794 a sequence of statements that has the same functionality and can be
5795 used to replace STMT_INFO. It returns the last statement in the sequence
5796 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
5797 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
5798 statement, having first checked that the target supports the new operation
5799 in that type.
5801 This function also does some bookkeeping, as explained in the documentation
5802 for vect_recog_pattern. */
5804 static void
5805 vect_pattern_recog_1 (vec_info *vinfo,
5806 vect_recog_func *recog_func, stmt_vec_info stmt_info)
5808 gimple *pattern_stmt;
5809 loop_vec_info loop_vinfo;
5810 tree pattern_vectype;
5812 /* If this statement has already been replaced with pattern statements,
5813 leave the original statement alone, since the first match wins.
5814 Instead try to match against the definition statements that feed
5815 the main pattern statement. */
5816 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5818 gimple_stmt_iterator gsi;
5819 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5820 !gsi_end_p (gsi); gsi_next (&gsi))
5821 vect_pattern_recog_1 (vinfo, recog_func,
5822 vinfo->lookup_stmt (gsi_stmt (gsi)));
5823 return;
5826 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5827 pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
5828 if (!pattern_stmt)
5830 /* Clear any half-formed pattern definition sequence. */
5831 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
5832 return;
5835 loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5837 /* Found a vectorizable pattern. */
5838 if (dump_enabled_p ())
5839 dump_printf_loc (MSG_NOTE, vect_location,
5840 "%s pattern recognized: %G",
5841 recog_func->name, pattern_stmt);
5843 /* Mark the stmts that are involved in the pattern. */
5844 vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
5846 /* Patterns cannot be vectorized using SLP, because they change the order of
5847 computation. */
5848 if (loop_vinfo)
5850 unsigned ix, ix2;
5851 stmt_vec_info *elem_ptr;
5852 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
5853 elem_ptr, *elem_ptr == stmt_info);
5858 /* Function vect_pattern_recog
5860 Input:
5861 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
5862 computation idioms.
5864 Output - for each computation idiom that is detected we create a new stmt
5865 that provides the same functionality and that can be vectorized. We
5866 also record some information in the struct_stmt_info of the relevant
5867 stmts, as explained below:
5869 At the entry to this function we have the following stmts, with the
5870 following initial value in the STMT_VINFO fields:
5872 stmt in_pattern_p related_stmt vec_stmt
5873 S1: a_i = .... - - -
5874 S2: a_2 = ..use(a_i).. - - -
5875 S3: a_1 = ..use(a_2).. - - -
5876 S4: a_0 = ..use(a_1).. - - -
5877 S5: ... = ..use(a_0).. - - -
5879 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
5880 represented by a single stmt. We then:
5881 - create a new stmt S6 equivalent to the pattern (the stmt is not
5882 inserted into the code)
5883 - fill in the STMT_VINFO fields as follows:
5885 in_pattern_p related_stmt vec_stmt
5886 S1: a_i = .... - - -
5887 S2: a_2 = ..use(a_i).. - - -
5888 S3: a_1 = ..use(a_2).. - - -
5889 S4: a_0 = ..use(a_1).. true S6 -
5890 '---> S6: a_new = .... - S4 -
5891 S5: ... = ..use(a_0).. - - -
5893 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
5894 to each other through the RELATED_STMT field).
5896 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
5897 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
5898 remain irrelevant unless used by stmts other than S4.
5900 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
5901 (because they are marked as irrelevant). It will vectorize S6, and record
5902 a pointer to the new vector stmt VS6 from S6 (as usual).
5903 S4 will be skipped, and S5 will be vectorized as usual:
5905 in_pattern_p related_stmt vec_stmt
5906 S1: a_i = .... - - -
5907 S2: a_2 = ..use(a_i).. - - -
5908 S3: a_1 = ..use(a_2).. - - -
5909 > VS6: va_new = .... - - -
5910 S4: a_0 = ..use(a_1).. true S6 VS6
5911 '---> S6: a_new = .... - S4 VS6
5912 > VS5: ... = ..vuse(va_new).. - - -
5913 S5: ... = ..use(a_0).. - - -
5915 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
5916 elsewhere), and we'll end up with:
5918 VS6: va_new = ....
5919 VS5: ... = ..vuse(va_new)..
5921 In case of more than one pattern statements, e.g., widen-mult with
5922 intermediate type:
5924 S1 a_t = ;
5925 S2 a_T = (TYPE) a_t;
5926 '--> S3: a_it = (interm_type) a_t;
5927 S4 prod_T = a_T * CONST;
5928 '--> S5: prod_T' = a_it w* CONST;
5930 there may be other users of a_T outside the pattern. In that case S2 will
5931 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
5932 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
5933 be recorded in S3. */
5935 void
5936 vect_pattern_recog (vec_info *vinfo)
5938 class loop *loop;
5939 basic_block *bbs;
5940 unsigned int nbbs;
5941 gimple_stmt_iterator si;
5942 unsigned int i, j;
5944 vect_determine_precisions (vinfo);
5946 DUMP_VECT_SCOPE ("vect_pattern_recog");
5948 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5950 loop = LOOP_VINFO_LOOP (loop_vinfo);
5951 bbs = LOOP_VINFO_BBS (loop_vinfo);
5952 nbbs = loop->num_nodes;
5954 /* Scan through the loop stmts, applying the pattern recognition
5955 functions starting at each stmt visited: */
5956 for (i = 0; i < nbbs; i++)
5958 basic_block bb = bbs[i];
5959 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5961 if (is_gimple_debug (gsi_stmt (si)))
5962 continue;
5963 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
5964 /* Scan over all generic vect_recog_xxx_pattern functions. */
5965 for (j = 0; j < NUM_PATTERNS; j++)
5966 vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j],
5967 stmt_info);
5971 else
5973 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5974 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
5975 for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]);
5976 !gsi_end_p (gsi); gsi_next (&gsi))
5978 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (gsi));
5979 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
5980 continue;
5982 /* Scan over all generic vect_recog_xxx_pattern functions. */
5983 for (j = 0; j < NUM_PATTERNS; j++)
5984 vect_pattern_recog_1 (vinfo,
5985 &vect_vect_recog_func_ptrs[j], stmt_info);
5989 /* After this no more add_stmt calls are allowed. */
5990 vinfo->stmt_vec_info_ro = true;