1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2022 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
37 #include "gimple-iterator.h"
39 #include "tree-vectorizer.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
47 #include "omp-simd-clone.h"
49 #include "tree-vector-builder.h"
50 #include "vec-perm-indices.h"
51 #include "gimple-range.h"
54 /* TODO: Note the vectorizer still builds COND_EXPRs with GENERIC compares
55 in the first operand. Disentangling this is future work, the
56 IL is properly transfered to VEC_COND_EXPRs with separate compares. */
59 /* Return true if we have a useful VR_RANGE range for VAR, storing it
60 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
63 vect_get_range_info (tree var
, wide_int
*min_value
, wide_int
*max_value
)
66 get_range_query (cfun
)->range_of_expr (vr
, var
);
67 if (vr
.undefined_p ())
68 vr
.set_varying (TREE_TYPE (var
));
69 *min_value
= wi::to_wide (vr
.min ());
70 *max_value
= wi::to_wide (vr
.max ());
71 value_range_kind vr_type
= vr
.kind ();
72 wide_int nonzero
= get_nonzero_bits (var
);
73 signop sgn
= TYPE_SIGN (TREE_TYPE (var
));
74 if (intersect_range_with_nonzero_bits (vr_type
, min_value
, max_value
,
75 nonzero
, sgn
) == VR_RANGE
)
77 if (dump_enabled_p ())
79 dump_generic_expr_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, var
);
80 dump_printf (MSG_NOTE
, " has range [");
81 dump_hex (MSG_NOTE
, *min_value
);
82 dump_printf (MSG_NOTE
, ", ");
83 dump_hex (MSG_NOTE
, *max_value
);
84 dump_printf (MSG_NOTE
, "]\n");
90 if (dump_enabled_p ())
92 dump_generic_expr_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, var
);
93 dump_printf (MSG_NOTE
, " has no range info\n");
99 /* Report that we've found an instance of pattern PATTERN in
103 vect_pattern_detected (const char *name
, gimple
*stmt
)
105 if (dump_enabled_p ())
106 dump_printf_loc (MSG_NOTE
, vect_location
, "%s: detected: %G", name
, stmt
);
109 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
110 return the pattern statement's stmt_vec_info. Set its vector type to
111 VECTYPE if it doesn't have one already. */
114 vect_init_pattern_stmt (vec_info
*vinfo
, gimple
*pattern_stmt
,
115 stmt_vec_info orig_stmt_info
, tree vectype
)
117 stmt_vec_info pattern_stmt_info
= vinfo
->lookup_stmt (pattern_stmt
);
118 if (pattern_stmt_info
== NULL
)
119 pattern_stmt_info
= vinfo
->add_stmt (pattern_stmt
);
120 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt_info
->stmt
));
122 pattern_stmt_info
->pattern_stmt_p
= true;
123 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt_info
;
124 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
125 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
126 if (!STMT_VINFO_VECTYPE (pattern_stmt_info
))
129 || (VECTOR_BOOLEAN_TYPE_P (vectype
)
130 == vect_use_mask_type_p (orig_stmt_info
)));
131 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vectype
;
132 pattern_stmt_info
->mask_precision
= orig_stmt_info
->mask_precision
;
134 return pattern_stmt_info
;
137 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
138 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
142 vect_set_pattern_stmt (vec_info
*vinfo
, gimple
*pattern_stmt
,
143 stmt_vec_info orig_stmt_info
, tree vectype
)
145 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
146 STMT_VINFO_RELATED_STMT (orig_stmt_info
)
147 = vect_init_pattern_stmt (vinfo
, pattern_stmt
, orig_stmt_info
, vectype
);
150 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
151 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
152 be different from the vector type of the final pattern statement.
153 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
154 from which it was derived. */
157 append_pattern_def_seq (vec_info
*vinfo
,
158 stmt_vec_info stmt_info
, gimple
*new_stmt
,
159 tree vectype
= NULL_TREE
,
160 tree scalar_type_for_mask
= NULL_TREE
)
162 gcc_assert (!scalar_type_for_mask
163 == (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
)));
166 stmt_vec_info new_stmt_info
= vinfo
->add_stmt (new_stmt
);
167 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
168 if (scalar_type_for_mask
)
169 new_stmt_info
->mask_precision
170 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask
));
172 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
176 /* The caller wants to perform new operations on vect_external variable
177 VAR, so that the result of the operations would also be vect_external.
178 Return the edge on which the operations can be performed, if one exists.
179 Return null if the operations should instead be treated as part of
180 the pattern that needs them. */
183 vect_get_external_def_edge (vec_info
*vinfo
, tree var
)
186 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
188 e
= loop_preheader_edge (loop_vinfo
->loop
);
189 if (!SSA_NAME_IS_DEFAULT_DEF (var
))
191 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
193 || !dominated_by_p (CDI_DOMINATORS
, e
->dest
, bb
))
200 /* Return true if the target supports a vector version of CODE,
201 where CODE is known to map to a direct optab with the given SUBTYPE.
202 ITYPE specifies the type of (some of) the scalar inputs and OTYPE
203 specifies the type of the scalar result.
205 If CODE allows the inputs and outputs to have different type
206 (such as for WIDEN_SUM_EXPR), it is the input mode rather
207 than the output mode that determines the appropriate target pattern.
208 Operand 0 of the target pattern then specifies the mode that the output
211 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
212 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
216 vect_supportable_direct_optab_p (vec_info
*vinfo
, tree otype
, tree_code code
,
217 tree itype
, tree
*vecotype_out
,
218 tree
*vecitype_out
= NULL
,
219 enum optab_subtype subtype
= optab_default
)
221 tree vecitype
= get_vectype_for_scalar_type (vinfo
, itype
);
225 tree vecotype
= get_vectype_for_scalar_type (vinfo
, otype
);
229 optab optab
= optab_for_tree_code (code
, vecitype
, subtype
);
233 insn_code icode
= optab_handler (optab
, TYPE_MODE (vecitype
));
234 if (icode
== CODE_FOR_nothing
235 || insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (vecotype
))
238 *vecotype_out
= vecotype
;
240 *vecitype_out
= vecitype
;
244 /* Round bit precision PRECISION up to a full element. */
247 vect_element_precision (unsigned int precision
)
249 precision
= 1 << ceil_log2 (precision
);
250 return MAX (precision
, BITS_PER_UNIT
);
253 /* If OP is defined by a statement that's being considered for vectorization,
254 return information about that statement, otherwise return NULL. */
257 vect_get_internal_def (vec_info
*vinfo
, tree op
)
259 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (op
);
261 && STMT_VINFO_DEF_TYPE (def_stmt_info
) == vect_internal_def
)
262 return def_stmt_info
;
266 /* Check whether NAME, an ssa-name used in STMT_VINFO,
267 is a result of a type promotion, such that:
268 DEF_STMT: NAME = NOP (name0)
269 If CHECK_SIGN is TRUE, check that either both types are signed or both are
273 type_conversion_p (vec_info
*vinfo
, tree name
, bool check_sign
,
274 tree
*orig_type
, gimple
**def_stmt
, bool *promotion
)
276 tree type
= TREE_TYPE (name
);
278 enum vect_def_type dt
;
280 stmt_vec_info def_stmt_info
;
281 if (!vect_is_simple_use (name
, vinfo
, &dt
, &def_stmt_info
, def_stmt
))
284 if (dt
!= vect_internal_def
285 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
291 if (!is_gimple_assign (*def_stmt
))
294 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt
)))
297 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
299 *orig_type
= TREE_TYPE (oprnd0
);
300 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*orig_type
)
301 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*orig_type
)) && check_sign
))
304 if (TYPE_PRECISION (type
) >= (TYPE_PRECISION (*orig_type
) * 2))
309 if (!vect_is_simple_use (oprnd0
, vinfo
, &dt
))
315 /* Holds information about an input operand after some sign changes
316 and type promotions have been peeled away. */
317 class vect_unpromoted_value
{
319 vect_unpromoted_value ();
321 void set_op (tree
, vect_def_type
, stmt_vec_info
= NULL
);
323 /* The value obtained after peeling away zero or more casts. */
326 /* The type of OP. */
329 /* The definition type of OP. */
332 /* If OP is the result of peeling at least one cast, and if the cast
333 of OP itself is a vectorizable statement, CASTER identifies that
334 statement, otherwise it is null. */
335 stmt_vec_info caster
;
338 inline vect_unpromoted_value::vect_unpromoted_value ()
341 dt (vect_uninitialized_def
),
346 /* Set the operand to OP_IN, its definition type to DT_IN, and the
347 statement that casts it to CASTER_IN. */
350 vect_unpromoted_value::set_op (tree op_in
, vect_def_type dt_in
,
351 stmt_vec_info caster_in
)
354 type
= TREE_TYPE (op
);
359 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
360 to reach some vectorizable inner operand OP', continuing as long as it
361 is possible to convert OP' back to OP using a possible sign change
362 followed by a possible promotion P. Return this OP', or null if OP is
363 not a vectorizable SSA name. If there is a promotion P, describe its
364 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
365 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
366 have more than one user.
368 A successful return means that it is possible to go from OP' to OP
369 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
370 whereas the cast from UNPROM to OP might be a promotion, a sign
375 signed short *ptr = ...;
376 signed short C = *ptr;
377 unsigned short B = (unsigned short) C; // sign change
378 signed int A = (signed int) B; // unsigned promotion
379 ...possible other uses of A...
380 unsigned int OP = (unsigned int) A; // sign change
382 In this case it's possible to go directly from C to OP using:
384 OP = (unsigned int) (unsigned short) C;
385 +------------+ +--------------+
386 promotion sign change
388 so OP' would be C. The input to the promotion is B, so UNPROM
392 vect_look_through_possible_promotion (vec_info
*vinfo
, tree op
,
393 vect_unpromoted_value
*unprom
,
394 bool *single_use_p
= NULL
)
396 tree res
= NULL_TREE
;
397 tree op_type
= TREE_TYPE (op
);
398 unsigned int orig_precision
= TYPE_PRECISION (op_type
);
399 unsigned int min_precision
= orig_precision
;
400 stmt_vec_info caster
= NULL
;
401 while (TREE_CODE (op
) == SSA_NAME
&& INTEGRAL_TYPE_P (op_type
))
403 /* See whether OP is simple enough to vectorize. */
404 stmt_vec_info def_stmt_info
;
407 if (!vect_is_simple_use (op
, vinfo
, &dt
, &def_stmt_info
, &def_stmt
))
410 /* If OP is the input of a demotion, skip over it to see whether
411 OP is itself the result of a promotion. If so, the combined
412 effect of the promotion and the demotion might fit the required
413 pattern, otherwise neither operation fits.
415 This copes with cases such as the result of an arithmetic
416 operation being truncated before being stored, and where that
417 arithmetic operation has been recognized as an over-widened one. */
418 if (TYPE_PRECISION (op_type
) <= min_precision
)
420 /* Use OP as the UNPROM described above if we haven't yet
421 found a promotion, or if using the new input preserves the
422 sign of the previous promotion. */
424 || TYPE_PRECISION (unprom
->type
) == orig_precision
425 || TYPE_SIGN (unprom
->type
) == TYPE_SIGN (op_type
))
427 unprom
->set_op (op
, dt
, caster
);
428 min_precision
= TYPE_PRECISION (op_type
);
430 /* Stop if we've already seen a promotion and if this
431 conversion does more than change the sign. */
432 else if (TYPE_PRECISION (op_type
)
433 != TYPE_PRECISION (unprom
->type
))
436 /* The sequence now extends to OP. */
440 /* See whether OP is defined by a cast. Record it as CASTER if
441 the cast is potentially vectorizable. */
444 caster
= def_stmt_info
;
446 /* Ignore pattern statements, since we don't link uses for them. */
449 && !STMT_VINFO_RELATED_STMT (caster
)
450 && !has_single_use (res
))
451 *single_use_p
= false;
453 gassign
*assign
= dyn_cast
<gassign
*> (def_stmt
);
454 if (!assign
|| !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt
)))
457 /* Continue with the input to the cast. */
458 op
= gimple_assign_rhs1 (def_stmt
);
459 op_type
= TREE_TYPE (op
);
464 /* OP is an integer operand to an operation that returns TYPE, and we
465 want to treat the operation as a widening one. So far we can treat
466 it as widening from *COMMON_TYPE.
468 Return true if OP is suitable for such a widening operation,
469 either widening from *COMMON_TYPE or from some supertype of it.
470 Update *COMMON_TYPE to the supertype in the latter case.
472 SHIFT_P is true if OP is a shift amount. */
475 vect_joust_widened_integer (tree type
, bool shift_p
, tree op
,
478 /* Calculate the minimum precision required by OP, without changing
479 the sign of either operand. */
480 unsigned int precision
;
483 if (!wi::leu_p (wi::to_widest (op
), TYPE_PRECISION (type
) / 2))
485 precision
= TREE_INT_CST_LOW (op
);
489 precision
= wi::min_precision (wi::to_widest (op
),
490 TYPE_SIGN (*common_type
));
491 if (precision
* 2 > TYPE_PRECISION (type
))
495 /* If OP requires a wider type, switch to that type. The checks
496 above ensure that this is still narrower than the result. */
497 precision
= vect_element_precision (precision
);
498 if (TYPE_PRECISION (*common_type
) < precision
)
499 *common_type
= build_nonstandard_integer_type
500 (precision
, TYPE_UNSIGNED (*common_type
));
504 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
505 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
508 vect_joust_widened_type (tree type
, tree new_type
, tree
*common_type
)
510 if (types_compatible_p (*common_type
, new_type
))
513 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
514 if ((TYPE_PRECISION (new_type
) < TYPE_PRECISION (*common_type
))
515 && (TYPE_UNSIGNED (new_type
) || !TYPE_UNSIGNED (*common_type
)))
518 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
519 if (TYPE_PRECISION (*common_type
) < TYPE_PRECISION (new_type
)
520 && (TYPE_UNSIGNED (*common_type
) || !TYPE_UNSIGNED (new_type
)))
522 *common_type
= new_type
;
526 /* We have mismatched signs, with the signed type being
527 no wider than the unsigned type. In this case we need
528 a wider signed type. */
529 unsigned int precision
= MAX (TYPE_PRECISION (*common_type
),
530 TYPE_PRECISION (new_type
));
533 if (precision
* 2 > TYPE_PRECISION (type
))
536 *common_type
= build_nonstandard_integer_type (precision
, false);
540 /* Check whether STMT_INFO can be viewed as a tree of integer operations
541 in which each node either performs CODE or WIDENED_CODE, and where
542 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
543 specifies the maximum number of leaf operands. SHIFT_P says whether
544 CODE and WIDENED_CODE are some sort of shift.
546 If STMT_INFO is such a tree, return the number of leaf operands
547 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
548 to a type that (a) is narrower than the result of STMT_INFO and
549 (b) can hold all leaf operand values.
551 If SUBTYPE then allow that the signs of the operands
552 may differ in signs but not in precision. SUBTYPE is updated to reflect
555 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
559 vect_widened_op_tree (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree_code code
,
560 tree_code widened_code
, bool shift_p
,
561 unsigned int max_nops
,
562 vect_unpromoted_value
*unprom
, tree
*common_type
,
563 enum optab_subtype
*subtype
= NULL
)
565 /* Check for an integer operation with the right code. */
566 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
570 tree_code rhs_code
= gimple_assign_rhs_code (assign
);
571 if (rhs_code
!= code
&& rhs_code
!= widened_code
)
574 tree type
= TREE_TYPE (gimple_assign_lhs (assign
));
575 if (!INTEGRAL_TYPE_P (type
))
578 /* Assume that both operands will be leaf operands. */
581 /* Check the operands. */
582 unsigned int next_op
= 0;
583 for (unsigned int i
= 0; i
< 2; ++i
)
585 vect_unpromoted_value
*this_unprom
= &unprom
[next_op
];
586 unsigned int nops
= 1;
587 tree op
= gimple_op (assign
, i
+ 1);
588 if (i
== 1 && TREE_CODE (op
) == INTEGER_CST
)
590 /* We already have a common type from earlier operands.
591 Update it to account for OP. */
592 this_unprom
->set_op (op
, vect_constant_def
);
593 if (!vect_joust_widened_integer (type
, shift_p
, op
, common_type
))
598 /* Only allow shifts by constants. */
599 if (shift_p
&& i
== 1)
602 if (!vect_look_through_possible_promotion (vinfo
, op
, this_unprom
))
605 if (TYPE_PRECISION (this_unprom
->type
) == TYPE_PRECISION (type
))
607 /* The operand isn't widened. If STMT_INFO has the code
608 for an unwidened operation, recursively check whether
609 this operand is a node of the tree. */
612 || this_unprom
->dt
!= vect_internal_def
)
615 /* Give back the leaf slot allocated above now that we're
616 not treating this as a leaf operand. */
619 /* Recursively process the definition of the operand. */
620 stmt_vec_info def_stmt_info
621 = vinfo
->lookup_def (this_unprom
->op
);
622 nops
= vect_widened_op_tree (vinfo
, def_stmt_info
, code
,
623 widened_code
, shift_p
, max_nops
,
624 this_unprom
, common_type
,
633 /* Make sure that the operand is narrower than the result. */
634 if (TYPE_PRECISION (this_unprom
->type
) * 2
635 > TYPE_PRECISION (type
))
638 /* Update COMMON_TYPE for the new operand. */
640 *common_type
= this_unprom
->type
;
641 else if (!vect_joust_widened_type (type
, this_unprom
->type
,
646 /* See if we can sign extend the smaller type. */
647 if (TYPE_PRECISION (this_unprom
->type
)
648 > TYPE_PRECISION (*common_type
))
649 *common_type
= this_unprom
->type
;
650 *subtype
= optab_vector_mixed_sign
;
662 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
663 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
666 vect_recog_temp_ssa_var (tree type
, gimple
*stmt
)
668 return make_temp_ssa_name (type
, stmt
, "patt");
671 /* STMT2_INFO describes a type conversion that could be split into STMT1
672 followed by a version of STMT2_INFO that takes NEW_RHS as its first
673 input. Try to do this using pattern statements, returning true on
677 vect_split_statement (vec_info
*vinfo
, stmt_vec_info stmt2_info
, tree new_rhs
,
678 gimple
*stmt1
, tree vectype
)
680 if (is_pattern_stmt_p (stmt2_info
))
682 /* STMT2_INFO is part of a pattern. Get the statement to which
683 the pattern is attached. */
684 stmt_vec_info orig_stmt2_info
= STMT_VINFO_RELATED_STMT (stmt2_info
);
685 vect_init_pattern_stmt (vinfo
, stmt1
, orig_stmt2_info
, vectype
);
687 if (dump_enabled_p ())
688 dump_printf_loc (MSG_NOTE
, vect_location
,
689 "Splitting pattern statement: %G", stmt2_info
->stmt
);
691 /* Since STMT2_INFO is a pattern statement, we can change it
692 in-situ without worrying about changing the code for the
694 gimple_assign_set_rhs1 (stmt2_info
->stmt
, new_rhs
);
696 if (dump_enabled_p ())
698 dump_printf_loc (MSG_NOTE
, vect_location
, "into: %G", stmt1
);
699 dump_printf_loc (MSG_NOTE
, vect_location
, "and: %G",
703 gimple_seq
*def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info
);
704 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info
) == stmt2_info
)
705 /* STMT2_INFO is the actual pattern statement. Add STMT1
706 to the end of the definition sequence. */
707 gimple_seq_add_stmt_without_update (def_seq
, stmt1
);
710 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
712 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt2_info
->stmt
, def_seq
);
713 gsi_insert_before_without_update (&gsi
, stmt1
, GSI_SAME_STMT
);
719 /* STMT2_INFO doesn't yet have a pattern. Try to create a
720 two-statement pattern now. */
721 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info
));
722 tree lhs_type
= TREE_TYPE (gimple_get_lhs (stmt2_info
->stmt
));
723 tree lhs_vectype
= get_vectype_for_scalar_type (vinfo
, lhs_type
);
727 if (dump_enabled_p ())
728 dump_printf_loc (MSG_NOTE
, vect_location
,
729 "Splitting statement: %G", stmt2_info
->stmt
);
731 /* Add STMT1 as a singleton pattern definition sequence. */
732 gimple_seq
*def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info
);
733 vect_init_pattern_stmt (vinfo
, stmt1
, stmt2_info
, vectype
);
734 gimple_seq_add_stmt_without_update (def_seq
, stmt1
);
736 /* Build the second of the two pattern statements. */
737 tree new_lhs
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
738 gassign
*new_stmt2
= gimple_build_assign (new_lhs
, NOP_EXPR
, new_rhs
);
739 vect_set_pattern_stmt (vinfo
, new_stmt2
, stmt2_info
, lhs_vectype
);
741 if (dump_enabled_p ())
743 dump_printf_loc (MSG_NOTE
, vect_location
,
744 "into pattern statements: %G", stmt1
);
745 dump_printf_loc (MSG_NOTE
, vect_location
, "and: %G",
746 (gimple
*) new_stmt2
);
753 /* Convert UNPROM to TYPE and return the result, adding new statements
754 to STMT_INFO's pattern definition statements if no better way is
755 available. VECTYPE is the vector form of TYPE.
757 If SUBTYPE then convert the type based on the subtype. */
760 vect_convert_input (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree type
,
761 vect_unpromoted_value
*unprom
, tree vectype
,
762 enum optab_subtype subtype
= optab_default
)
764 /* Update the type if the signs differ. */
765 if (subtype
== optab_vector_mixed_sign
)
767 gcc_assert (!TYPE_UNSIGNED (type
));
768 if (TYPE_UNSIGNED (TREE_TYPE (unprom
->op
)))
770 type
= unsigned_type_for (type
);
771 vectype
= unsigned_type_for (vectype
);
775 /* Check for a no-op conversion. */
776 if (types_compatible_p (type
, TREE_TYPE (unprom
->op
)))
779 /* Allow the caller to create constant vect_unpromoted_values. */
780 if (TREE_CODE (unprom
->op
) == INTEGER_CST
)
781 return wide_int_to_tree (type
, wi::to_widest (unprom
->op
));
783 tree input
= unprom
->op
;
786 tree lhs
= gimple_get_lhs (unprom
->caster
->stmt
);
787 tree lhs_type
= TREE_TYPE (lhs
);
789 /* If the result of the existing cast is the right width, use it
790 instead of the source of the cast. */
791 if (TYPE_PRECISION (lhs_type
) == TYPE_PRECISION (type
))
793 /* If the precision we want is between the source and result
794 precisions of the existing cast, try splitting the cast into
795 two and tapping into a mid-way point. */
796 else if (TYPE_PRECISION (lhs_type
) > TYPE_PRECISION (type
)
797 && TYPE_PRECISION (type
) > TYPE_PRECISION (unprom
->type
))
799 /* In order to preserve the semantics of the original cast,
800 give the mid-way point the same signedness as the input value.
802 It would be possible to use a signed type here instead if
803 TYPE is signed and UNPROM->TYPE is unsigned, but that would
804 make the sign of the midtype sensitive to the order in
805 which we process the statements, since the signedness of
806 TYPE is the signedness required by just one of possibly
807 many users. Also, unsigned promotions are usually as cheap
808 as or cheaper than signed ones, so it's better to keep an
809 unsigned promotion. */
810 tree midtype
= build_nonstandard_integer_type
811 (TYPE_PRECISION (type
), TYPE_UNSIGNED (unprom
->type
));
812 tree vec_midtype
= get_vectype_for_scalar_type (vinfo
, midtype
);
815 input
= vect_recog_temp_ssa_var (midtype
, NULL
);
816 gassign
*new_stmt
= gimple_build_assign (input
, NOP_EXPR
,
818 if (!vect_split_statement (vinfo
, unprom
->caster
, input
, new_stmt
,
820 append_pattern_def_seq (vinfo
, stmt_info
,
821 new_stmt
, vec_midtype
);
825 /* See if we can reuse an existing result. */
826 if (types_compatible_p (type
, TREE_TYPE (input
)))
830 /* We need a new conversion statement. */
831 tree new_op
= vect_recog_temp_ssa_var (type
, NULL
);
832 gassign
*new_stmt
= gimple_build_assign (new_op
, NOP_EXPR
, input
);
834 /* If OP is an external value, see if we can insert the new statement
835 on an incoming edge. */
836 if (input
== unprom
->op
&& unprom
->dt
== vect_external_def
)
837 if (edge e
= vect_get_external_def_edge (vinfo
, input
))
839 basic_block new_bb
= gsi_insert_on_edge_immediate (e
, new_stmt
);
840 gcc_assert (!new_bb
);
844 /* As a (common) last resort, add the statement to the pattern itself. */
845 append_pattern_def_seq (vinfo
, stmt_info
, new_stmt
, vectype
);
849 /* Invoke vect_convert_input for N elements of UNPROM and store the
850 result in the corresponding elements of RESULT.
852 If SUBTYPE then convert the type based on the subtype. */
855 vect_convert_inputs (vec_info
*vinfo
, stmt_vec_info stmt_info
, unsigned int n
,
856 tree
*result
, tree type
, vect_unpromoted_value
*unprom
,
857 tree vectype
, enum optab_subtype subtype
= optab_default
)
859 for (unsigned int i
= 0; i
< n
; ++i
)
862 for (j
= 0; j
< i
; ++j
)
863 if (unprom
[j
].op
== unprom
[i
].op
)
867 result
[i
] = result
[j
];
869 result
[i
] = vect_convert_input (vinfo
, stmt_info
,
870 type
, &unprom
[i
], vectype
, subtype
);
874 /* The caller has created a (possibly empty) sequence of pattern definition
875 statements followed by a single statement PATTERN_STMT. Cast the result
876 of this final statement to TYPE. If a new statement is needed, add
877 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
878 and return the new statement, otherwise return PATTERN_STMT as-is.
879 VECITYPE is the vector form of PATTERN_STMT's result type. */
882 vect_convert_output (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree type
,
883 gimple
*pattern_stmt
, tree vecitype
)
885 tree lhs
= gimple_get_lhs (pattern_stmt
);
886 if (!types_compatible_p (type
, TREE_TYPE (lhs
)))
888 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
, vecitype
);
889 tree cast_var
= vect_recog_temp_ssa_var (type
, NULL
);
890 pattern_stmt
= gimple_build_assign (cast_var
, NOP_EXPR
, lhs
);
895 /* Return true if STMT_VINFO describes a reduction for which reassociation
896 is allowed. If STMT_INFO is part of a group, assume that it's part of
897 a reduction chain and optimistically assume that all statements
898 except the last allow reassociation.
899 Also require it to have code CODE and to be a reduction
900 in the outermost loop. When returning true, store the operands in
901 *OP0_OUT and *OP1_OUT. */
904 vect_reassociating_reduction_p (vec_info
*vinfo
,
905 stmt_vec_info stmt_info
, tree_code code
,
906 tree
*op0_out
, tree
*op1_out
)
908 loop_vec_info loop_info
= dyn_cast
<loop_vec_info
> (vinfo
);
912 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
913 if (!assign
|| gimple_assign_rhs_code (assign
) != code
)
916 /* We don't allow changing the order of the computation in the inner-loop
917 when doing outer-loop vectorization. */
918 class loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
919 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
))
922 if (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
)
924 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign
)),
928 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info
) == NULL
)
931 *op0_out
= gimple_assign_rhs1 (assign
);
932 *op1_out
= gimple_assign_rhs2 (assign
);
933 if (commutative_tree_code (code
) && STMT_VINFO_REDUC_IDX (stmt_info
) == 0)
934 std::swap (*op0_out
, *op1_out
);
938 /* match.pd function to match
939 (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
941 1) @1, @2, c, d, a, b are all integral type.
942 2) There's single_use for both @1 and @2.
943 3) a, c have same precision.
944 4) c and @1 have different precision.
945 5) c, d are the same type or they can differ in sign when convert is
948 record a and c and d and @3. */
950 extern bool gimple_cond_expr_convert_p (tree
, tree
*, tree (*)(tree
));
952 /* Function vect_recog_cond_expr_convert
954 Try to find the following pattern:
959 TYPE_E op_true = (TYPE_E) A;
960 TYPE_E op_false = (TYPE_E) B;
962 E = C cmp D ? op_true : op_false;
965 TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
966 TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
967 single_use of op_true and op_false.
968 TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
972 * STMT_VINFO: The stmt from which the pattern search begins.
973 here it starts with E = c cmp D ? op_true : op_false;
977 TYPE1 E' = C cmp D ? A : B;
978 TYPE3 E = (TYPE3) E';
980 There may extra nop_convert for A or B to handle different signness.
982 * TYPE_OUT: The vector type of the output of this pattern.
984 * Return value: A new stmt that will be used to replace the sequence of
985 stmts that constitute the pattern. In this case it will be:
987 E' = C cmp D ? A : B; is recorded in pattern definition statements; */
990 vect_recog_cond_expr_convert_pattern (vec_info
*vinfo
,
991 stmt_vec_info stmt_vinfo
, tree
*type_out
)
993 gassign
*last_stmt
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
);
994 tree lhs
, match
[4], temp
, type
, new_lhs
, op2
;
996 gimple
*pattern_stmt
;
1001 lhs
= gimple_assign_lhs (last_stmt
);
1003 /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
1004 TYPE_PRECISION (A) == TYPE_PRECISION (C). */
1005 if (!gimple_cond_expr_convert_p (lhs
, &match
[0], NULL
))
1008 vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt
);
1011 type
= TREE_TYPE (match
[1]);
1012 if (TYPE_SIGN (type
) != TYPE_SIGN (TREE_TYPE (match
[2])))
1014 op2
= vect_recog_temp_ssa_var (type
, NULL
);
1015 gimple
* nop_stmt
= gimple_build_assign (op2
, NOP_EXPR
, match
[2]);
1016 append_pattern_def_seq (vinfo
, stmt_vinfo
, nop_stmt
,
1017 get_vectype_for_scalar_type (vinfo
, type
));
1020 temp
= vect_recog_temp_ssa_var (type
, NULL
);
1021 cond_stmt
= gimple_build_assign (temp
, build3 (COND_EXPR
, type
, match
[3],
1023 append_pattern_def_seq (vinfo
, stmt_vinfo
, cond_stmt
,
1024 get_vectype_for_scalar_type (vinfo
, type
));
1025 new_lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
1026 pattern_stmt
= gimple_build_assign (new_lhs
, NOP_EXPR
, temp
);
1027 *type_out
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1029 if (dump_enabled_p ())
1030 dump_printf_loc (MSG_NOTE
, vect_location
,
1031 "created pattern stmt: %G", pattern_stmt
);
1032 return pattern_stmt
;
1035 /* Function vect_recog_dot_prod_pattern
1037 Try to find the following pattern:
1044 sum_0 = phi <init, sum_1>
1047 S3 x_T = (TYPE1) x_t;
1048 S4 y_T = (TYPE1) y_t;
1049 S5 prod = x_T * y_T;
1050 [S6 prod = (TYPE2) prod; #optional]
1051 S7 sum_1 = prod + sum_0;
1053 where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
1054 the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
1055 'type1a' and 'type1b' can differ.
1059 * STMT_VINFO: The stmt from which the pattern search begins. In the
1060 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
1065 * TYPE_OUT: The type of the output of this pattern.
1067 * Return value: A new stmt that will be used to replace the sequence of
1068 stmts that constitute the pattern. In this case it will be:
1069 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
1071 Note: The dot-prod idiom is a widening reduction pattern that is
1072 vectorized without preserving all the intermediate results. It
1073 produces only N/2 (widened) results (by summing up pairs of
1074 intermediate results) rather than all N results. Therefore, we
1075 cannot allow this pattern when we want to get all the results and in
1076 the correct order (as is the case when this computation is in an
1077 inner-loop nested in an outer-loop that us being vectorized). */
1080 vect_recog_dot_prod_pattern (vec_info
*vinfo
,
1081 stmt_vec_info stmt_vinfo
, tree
*type_out
)
1083 tree oprnd0
, oprnd1
;
1084 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1085 tree type
, half_type
;
1086 gimple
*pattern_stmt
;
1089 /* Look for the following pattern
1093 DDPROD = (TYPE2) DPROD;
1094 sum_1 = DDPROD + sum_0;
1096 - DX is double the size of X
1097 - DY is double the size of Y
1098 - DX, DY, DPROD all have the same type but the sign
1099 between X, Y and DPROD can differ.
1100 - sum is the same size of DPROD or bigger
1101 - sum has been recognized as a reduction variable.
1103 This is equivalent to:
1104 DPROD = X w* Y; #widen mult
1105 sum_1 = DPROD w+ sum_0; #widen summation
1107 DPROD = X w* Y; #widen mult
1108 sum_1 = DPROD + sum_0; #summation
1111 /* Starting from LAST_STMT, follow the defs of its uses in search
1112 of the above pattern. */
1114 if (!vect_reassociating_reduction_p (vinfo
, stmt_vinfo
, PLUS_EXPR
,
1118 type
= TREE_TYPE (gimple_get_lhs (last_stmt
));
1120 vect_unpromoted_value unprom_mult
;
1121 oprnd0
= vect_look_through_possible_promotion (vinfo
, oprnd0
, &unprom_mult
);
1123 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1124 we know that oprnd1 is the reduction variable (defined by a loop-header
1125 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1126 Left to check that oprnd0 is defined by a (widen_)mult_expr */
1130 stmt_vec_info mult_vinfo
= vect_get_internal_def (vinfo
, oprnd0
);
1134 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1135 inside the loop (in case we are analyzing an outer-loop). */
1136 vect_unpromoted_value unprom0
[2];
1137 enum optab_subtype subtype
= optab_vector
;
1138 if (!vect_widened_op_tree (vinfo
, mult_vinfo
, MULT_EXPR
, WIDEN_MULT_EXPR
,
1139 false, 2, unprom0
, &half_type
, &subtype
))
1142 /* If there are two widening operations, make sure they agree on the sign
1143 of the extension. The result of an optab_vector_mixed_sign operation
1144 is signed; otherwise, the result has the same sign as the operands. */
1145 if (TYPE_PRECISION (unprom_mult
.type
) != TYPE_PRECISION (type
)
1146 && (subtype
== optab_vector_mixed_sign
1147 ? TYPE_UNSIGNED (unprom_mult
.type
)
1148 : TYPE_SIGN (unprom_mult
.type
) != TYPE_SIGN (half_type
)))
1151 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt
);
1153 /* If the inputs have mixed signs, canonicalize on using the signed
1154 input type for analysis. This also helps when emulating mixed-sign
1155 operations using signed operations. */
1156 if (subtype
== optab_vector_mixed_sign
)
1157 half_type
= signed_type_for (half_type
);
1160 if (!vect_supportable_direct_optab_p (vinfo
, type
, DOT_PROD_EXPR
, half_type
,
1161 type_out
, &half_vectype
, subtype
))
1163 /* We can emulate a mixed-sign dot-product using a sequence of
1164 signed dot-products; see vect_emulate_mixed_dot_prod for details. */
1165 if (subtype
!= optab_vector_mixed_sign
1166 || !vect_supportable_direct_optab_p (vinfo
, signed_type_for (type
),
1167 DOT_PROD_EXPR
, half_type
,
1168 type_out
, &half_vectype
,
1172 *type_out
= signed_or_unsigned_type_for (TYPE_UNSIGNED (type
),
1176 /* Get the inputs in the appropriate types. */
1178 vect_convert_inputs (vinfo
, stmt_vinfo
, 2, mult_oprnd
, half_type
,
1179 unprom0
, half_vectype
, subtype
);
1181 var
= vect_recog_temp_ssa_var (type
, NULL
);
1182 pattern_stmt
= gimple_build_assign (var
, DOT_PROD_EXPR
,
1183 mult_oprnd
[0], mult_oprnd
[1], oprnd1
);
1185 return pattern_stmt
;
1189 /* Function vect_recog_sad_pattern
1191 Try to find the following Sum of Absolute Difference (SAD) pattern:
1194 signed TYPE1 diff, abs_diff;
1197 sum_0 = phi <init, sum_1>
1200 S3 x_T = (TYPE1) x_t;
1201 S4 y_T = (TYPE1) y_t;
1202 S5 diff = x_T - y_T;
1203 S6 abs_diff = ABS_EXPR <diff>;
1204 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1205 S8 sum_1 = abs_diff + sum_0;
1207 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1208 same size of 'TYPE1' or bigger. This is a special case of a reduction
1213 * STMT_VINFO: The stmt from which the pattern search begins. In the
1214 example, when this function is called with S8, the pattern
1215 {S3,S4,S5,S6,S7,S8} will be detected.
1219 * TYPE_OUT: The type of the output of this pattern.
1221 * Return value: A new stmt that will be used to replace the sequence of
1222 stmts that constitute the pattern. In this case it will be:
1223 SAD_EXPR <x_t, y_t, sum_0>
1227 vect_recog_sad_pattern (vec_info
*vinfo
,
1228 stmt_vec_info stmt_vinfo
, tree
*type_out
)
1230 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1233 /* Look for the following pattern
1237 DAD = ABS_EXPR <DDIFF>;
1238 DDPROD = (TYPE2) DPROD;
1239 sum_1 = DAD + sum_0;
1241 - DX is at least double the size of X
1242 - DY is at least double the size of Y
1243 - DX, DY, DDIFF, DAD all have the same type
1244 - sum is the same size of DAD or bigger
1245 - sum has been recognized as a reduction variable.
1247 This is equivalent to:
1248 DDIFF = X w- Y; #widen sub
1249 DAD = ABS_EXPR <DDIFF>;
1250 sum_1 = DAD w+ sum_0; #widen summation
1252 DDIFF = X w- Y; #widen sub
1253 DAD = ABS_EXPR <DDIFF>;
1254 sum_1 = DAD + sum_0; #summation
1257 /* Starting from LAST_STMT, follow the defs of its uses in search
1258 of the above pattern. */
1260 tree plus_oprnd0
, plus_oprnd1
;
1261 if (!vect_reassociating_reduction_p (vinfo
, stmt_vinfo
, PLUS_EXPR
,
1262 &plus_oprnd0
, &plus_oprnd1
))
1265 tree sum_type
= TREE_TYPE (gimple_get_lhs (last_stmt
));
1267 /* Any non-truncating sequence of conversions is OK here, since
1268 with a successful match, the result of the ABS(U) is known to fit
1269 within the nonnegative range of the result type. (It cannot be the
1270 negative of the minimum signed value due to the range of the widening
1272 vect_unpromoted_value unprom_abs
;
1273 plus_oprnd0
= vect_look_through_possible_promotion (vinfo
, plus_oprnd0
,
1276 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1277 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1278 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1279 Then check that plus_oprnd0 is defined by an abs_expr. */
1284 stmt_vec_info abs_stmt_vinfo
= vect_get_internal_def (vinfo
, plus_oprnd0
);
1285 if (!abs_stmt_vinfo
)
1288 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1289 inside the loop (in case we are analyzing an outer-loop). */
1290 gassign
*abs_stmt
= dyn_cast
<gassign
*> (abs_stmt_vinfo
->stmt
);
1292 || (gimple_assign_rhs_code (abs_stmt
) != ABS_EXPR
1293 && gimple_assign_rhs_code (abs_stmt
) != ABSU_EXPR
))
1296 tree abs_oprnd
= gimple_assign_rhs1 (abs_stmt
);
1297 tree abs_type
= TREE_TYPE (abs_oprnd
);
1298 if (TYPE_UNSIGNED (abs_type
))
1301 /* Peel off conversions from the ABS input. This can involve sign
1302 changes (e.g. from an unsigned subtraction to a signed ABS input)
1303 or signed promotion, but it can't include unsigned promotion.
1304 (Note that ABS of an unsigned promotion should have been folded
1305 away before now anyway.) */
1306 vect_unpromoted_value unprom_diff
;
1307 abs_oprnd
= vect_look_through_possible_promotion (vinfo
, abs_oprnd
,
1311 if (TYPE_PRECISION (unprom_diff
.type
) != TYPE_PRECISION (abs_type
)
1312 && TYPE_UNSIGNED (unprom_diff
.type
))
1315 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1316 stmt_vec_info diff_stmt_vinfo
= vect_get_internal_def (vinfo
, abs_oprnd
);
1317 if (!diff_stmt_vinfo
)
1320 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1321 inside the loop (in case we are analyzing an outer-loop). */
1322 vect_unpromoted_value unprom
[2];
1323 if (!vect_widened_op_tree (vinfo
, diff_stmt_vinfo
, MINUS_EXPR
, WIDEN_MINUS_EXPR
,
1324 false, 2, unprom
, &half_type
))
1327 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt
);
1330 if (!vect_supportable_direct_optab_p (vinfo
, sum_type
, SAD_EXPR
, half_type
,
1331 type_out
, &half_vectype
))
1334 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1336 vect_convert_inputs (vinfo
, stmt_vinfo
, 2, sad_oprnd
, half_type
,
1337 unprom
, half_vectype
);
1339 tree var
= vect_recog_temp_ssa_var (sum_type
, NULL
);
1340 gimple
*pattern_stmt
= gimple_build_assign (var
, SAD_EXPR
, sad_oprnd
[0],
1341 sad_oprnd
[1], plus_oprnd1
);
1343 return pattern_stmt
;
1346 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1347 so that it can be treated as though it had the form:
1351 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1352 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1353 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1354 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1355 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1357 Try to replace the pattern with:
1361 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1362 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1363 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1364 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1366 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1368 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1369 name of the pattern being matched, for dump purposes. */
1372 vect_recog_widen_op_pattern (vec_info
*vinfo
,
1373 stmt_vec_info last_stmt_info
, tree
*type_out
,
1374 tree_code orig_code
, tree_code wide_code
,
1375 bool shift_p
, const char *name
)
1377 gimple
*last_stmt
= last_stmt_info
->stmt
;
1379 vect_unpromoted_value unprom
[2];
1381 if (!vect_widened_op_tree (vinfo
, last_stmt_info
, orig_code
, orig_code
,
1382 shift_p
, 2, unprom
, &half_type
))
1385 /* Pattern detected. */
1386 vect_pattern_detected (name
, last_stmt
);
1388 tree type
= TREE_TYPE (gimple_get_lhs (last_stmt
));
1390 if (TYPE_PRECISION (type
) != TYPE_PRECISION (half_type
) * 2
1391 || TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type
))
1392 itype
= build_nonstandard_integer_type (TYPE_PRECISION (half_type
) * 2,
1393 TYPE_UNSIGNED (half_type
));
1395 /* Check target support */
1396 tree vectype
= get_vectype_for_scalar_type (vinfo
, half_type
);
1397 tree vecitype
= get_vectype_for_scalar_type (vinfo
, itype
);
1399 tree vecctype
= vecitype
;
1400 if (orig_code
== MINUS_EXPR
1401 && TYPE_UNSIGNED (itype
)
1402 && TYPE_PRECISION (type
) > TYPE_PRECISION (itype
))
1404 /* Subtraction is special, even if half_type is unsigned and no matter
1405 whether type is signed or unsigned, if type is wider than itype,
1406 we need to sign-extend from the widening operation result to the
1408 Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1409 itype unsigned short and type either int or unsigned int.
1410 Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1411 (unsigned short) 0xffff, but for type int we want the result -1
1412 and for type unsigned int 0xffffffff rather than 0xffff. */
1413 ctype
= build_nonstandard_integer_type (TYPE_PRECISION (itype
), 0);
1414 vecctype
= get_vectype_for_scalar_type (vinfo
, ctype
);
1417 enum tree_code dummy_code
;
1419 auto_vec
<tree
> dummy_vec
;
1423 || !supportable_widening_operation (vinfo
, wide_code
, last_stmt_info
,
1425 &dummy_code
, &dummy_code
,
1426 &dummy_int
, &dummy_vec
))
1429 *type_out
= get_vectype_for_scalar_type (vinfo
, type
);
1434 vect_convert_inputs (vinfo
, last_stmt_info
,
1435 2, oprnd
, half_type
, unprom
, vectype
);
1437 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
1438 gimple
*pattern_stmt
= gimple_build_assign (var
, wide_code
,
1439 oprnd
[0], oprnd
[1]);
1441 if (vecctype
!= vecitype
)
1442 pattern_stmt
= vect_convert_output (vinfo
, last_stmt_info
, ctype
,
1443 pattern_stmt
, vecitype
);
1445 return vect_convert_output (vinfo
, last_stmt_info
,
1446 type
, pattern_stmt
, vecctype
);
1449 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1450 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1453 vect_recog_widen_mult_pattern (vec_info
*vinfo
, stmt_vec_info last_stmt_info
,
1456 return vect_recog_widen_op_pattern (vinfo
, last_stmt_info
, type_out
,
1457 MULT_EXPR
, WIDEN_MULT_EXPR
, false,
1458 "vect_recog_widen_mult_pattern");
1461 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1462 to WIDEN_PLUS_EXPR. See vect_recog_widen_op_pattern for details. */
1465 vect_recog_widen_plus_pattern (vec_info
*vinfo
, stmt_vec_info last_stmt_info
,
1468 return vect_recog_widen_op_pattern (vinfo
, last_stmt_info
, type_out
,
1469 PLUS_EXPR
, WIDEN_PLUS_EXPR
, false,
1470 "vect_recog_widen_plus_pattern");
1473 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1474 to WIDEN_MINUS_EXPR. See vect_recog_widen_op_pattern for details. */
1476 vect_recog_widen_minus_pattern (vec_info
*vinfo
, stmt_vec_info last_stmt_info
,
1479 return vect_recog_widen_op_pattern (vinfo
, last_stmt_info
, type_out
,
1480 MINUS_EXPR
, WIDEN_MINUS_EXPR
, false,
1481 "vect_recog_widen_minus_pattern");
1484 /* Function vect_recog_popcount_pattern
1486 Try to find the following pattern:
1492 temp_in = (UTYPE2)A;
1494 temp_out = __builtin_popcount{,l,ll} (temp_in);
1495 B = (TYPE1) temp_out;
1497 TYPE2 may or may not be equal to TYPE3.
1498 i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
1499 i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
1503 * STMT_VINFO: The stmt from which the pattern search begins.
1504 here it starts with B = (TYPE1) temp_out;
1508 * TYPE_OUT: The vector type of the output of this pattern.
1510 * Return value: A new stmt that will be used to replace the sequence of
1511 stmts that constitute the pattern. In this case it will be:
1516 vect_recog_popcount_pattern (vec_info
*vinfo
,
1517 stmt_vec_info stmt_vinfo
, tree
*type_out
)
1519 gassign
*last_stmt
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
);
1520 gimple
*popcount_stmt
, *pattern_stmt
;
1521 tree rhs_oprnd
, rhs_origin
, lhs_oprnd
, lhs_type
, vec_type
, new_var
;
1522 auto_vec
<tree
> vargs
;
1524 /* Find B = (TYPE1) temp_out. */
1527 tree_code code
= gimple_assign_rhs_code (last_stmt
);
1528 if (!CONVERT_EXPR_CODE_P (code
))
1531 lhs_oprnd
= gimple_assign_lhs (last_stmt
);
1532 lhs_type
= TREE_TYPE (lhs_oprnd
);
1533 if (!INTEGRAL_TYPE_P (lhs_type
))
1536 rhs_oprnd
= gimple_assign_rhs1 (last_stmt
);
1537 if (TREE_CODE (rhs_oprnd
) != SSA_NAME
1538 || !has_single_use (rhs_oprnd
))
1540 popcount_stmt
= SSA_NAME_DEF_STMT (rhs_oprnd
);
1542 /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
1543 if (!is_gimple_call (popcount_stmt
))
1545 switch (gimple_call_combined_fn (popcount_stmt
))
1553 if (gimple_call_num_args (popcount_stmt
) != 1)
1556 rhs_oprnd
= gimple_call_arg (popcount_stmt
, 0);
1557 vect_unpromoted_value unprom_diff
;
1558 rhs_origin
= vect_look_through_possible_promotion (vinfo
, rhs_oprnd
,
1564 /* Input and output of .POPCOUNT should be same-precision integer.
1565 Also A should be unsigned or same precision as temp_in,
1566 otherwise there would be sign_extend from A to temp_in. */
1567 if (TYPE_PRECISION (unprom_diff
.type
) != TYPE_PRECISION (lhs_type
)
1568 || (!TYPE_UNSIGNED (unprom_diff
.type
)
1569 && (TYPE_PRECISION (unprom_diff
.type
)
1570 != TYPE_PRECISION (TREE_TYPE (rhs_oprnd
)))))
1572 vargs
.safe_push (unprom_diff
.op
);
1574 vect_pattern_detected ("vec_regcog_popcount_pattern", popcount_stmt
);
1575 vec_type
= get_vectype_for_scalar_type (vinfo
, lhs_type
);
1576 /* Do it only if the backend has popcount<vector_mode>2 pattern. */
1578 || !direct_internal_fn_supported_p (IFN_POPCOUNT
, vec_type
,
1579 OPTIMIZE_FOR_SPEED
))
1582 /* Create B = .POPCOUNT (A). */
1583 new_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
1584 pattern_stmt
= gimple_build_call_internal_vec (IFN_POPCOUNT
, vargs
);
1585 gimple_call_set_lhs (pattern_stmt
, new_var
);
1586 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
1587 *type_out
= vec_type
;
1589 if (dump_enabled_p ())
1590 dump_printf_loc (MSG_NOTE
, vect_location
,
1591 "created pattern stmt: %G", pattern_stmt
);
1592 return pattern_stmt
;
1595 /* Function vect_recog_pow_pattern
1597 Try to find the following pattern:
1601 with POW being one of pow, powf, powi, powif and N being
1606 * STMT_VINFO: The stmt from which the pattern search begins.
1610 * TYPE_OUT: The type of the output of this pattern.
1612 * Return value: A new stmt that will be used to replace the sequence of
1613 stmts that constitute the pattern. In this case it will be:
1620 vect_recog_pow_pattern (vec_info
*vinfo
,
1621 stmt_vec_info stmt_vinfo
, tree
*type_out
)
1623 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1628 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
1631 switch (gimple_call_combined_fn (last_stmt
))
1641 base
= gimple_call_arg (last_stmt
, 0);
1642 exp
= gimple_call_arg (last_stmt
, 1);
1643 if (TREE_CODE (exp
) != REAL_CST
1644 && TREE_CODE (exp
) != INTEGER_CST
)
1646 if (flag_unsafe_math_optimizations
1647 && TREE_CODE (base
) == REAL_CST
1648 && gimple_call_builtin_p (last_stmt
, BUILT_IN_NORMAL
))
1650 combined_fn log_cfn
;
1651 built_in_function exp_bfn
;
1652 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt
)))
1655 log_cfn
= CFN_BUILT_IN_LOG
;
1656 exp_bfn
= BUILT_IN_EXP
;
1659 log_cfn
= CFN_BUILT_IN_LOGF
;
1660 exp_bfn
= BUILT_IN_EXPF
;
1663 log_cfn
= CFN_BUILT_IN_LOGL
;
1664 exp_bfn
= BUILT_IN_EXPL
;
1669 tree logc
= fold_const_call (log_cfn
, TREE_TYPE (base
), base
);
1670 tree exp_decl
= builtin_decl_implicit (exp_bfn
);
1671 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1672 does that, but if C is a power of 2, we want to use
1673 exp2 (log2 (C) * x) in the non-vectorized version, but for
1674 vectorization we don't have vectorized exp2. */
1676 && TREE_CODE (logc
) == REAL_CST
1678 && lookup_attribute ("omp declare simd",
1679 DECL_ATTRIBUTES (exp_decl
)))
1681 cgraph_node
*node
= cgraph_node::get_create (exp_decl
);
1682 if (node
->simd_clones
== NULL
)
1684 if (targetm
.simd_clone
.compute_vecsize_and_simdlen
== NULL
1685 || node
->definition
)
1687 expand_simd_clones (node
);
1688 if (node
->simd_clones
== NULL
)
1691 *type_out
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (base
));
1694 tree def
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1695 gimple
*g
= gimple_build_assign (def
, MULT_EXPR
, exp
, logc
);
1696 append_pattern_def_seq (vinfo
, stmt_vinfo
, g
);
1697 tree res
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1698 g
= gimple_build_call (exp_decl
, 1, def
);
1699 gimple_call_set_lhs (g
, res
);
1707 /* We now have a pow or powi builtin function call with a constant
1710 /* Catch squaring. */
1711 if ((tree_fits_shwi_p (exp
)
1712 && tree_to_shwi (exp
) == 2)
1713 || (TREE_CODE (exp
) == REAL_CST
1714 && real_equal (&TREE_REAL_CST (exp
), &dconst2
)))
1716 if (!vect_supportable_direct_optab_p (vinfo
, TREE_TYPE (base
), MULT_EXPR
,
1717 TREE_TYPE (base
), type_out
))
1720 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1721 stmt
= gimple_build_assign (var
, MULT_EXPR
, base
, base
);
1725 /* Catch square root. */
1726 if (TREE_CODE (exp
) == REAL_CST
1727 && real_equal (&TREE_REAL_CST (exp
), &dconsthalf
))
1729 *type_out
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (base
));
1731 && direct_internal_fn_supported_p (IFN_SQRT
, *type_out
,
1732 OPTIMIZE_FOR_SPEED
))
1734 gcall
*stmt
= gimple_build_call_internal (IFN_SQRT
, 1, base
);
1735 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
1736 gimple_call_set_lhs (stmt
, var
);
1737 gimple_call_set_nothrow (stmt
, true);
1746 /* Function vect_recog_widen_sum_pattern
1748 Try to find the following pattern:
1751 TYPE x_T, sum = init;
1753 sum_0 = phi <init, sum_1>
1755 S2 x_T = (TYPE) x_t;
1756 S3 sum_1 = x_T + sum_0;
1758 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1759 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1760 a special case of a reduction computation.
1764 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1765 when this function is called with S3, the pattern {S2,S3} will be detected.
1769 * TYPE_OUT: The type of the output of this pattern.
1771 * Return value: A new stmt that will be used to replace the sequence of
1772 stmts that constitute the pattern. In this case it will be:
1773 WIDEN_SUM <x_t, sum_0>
1775 Note: The widening-sum idiom is a widening reduction pattern that is
1776 vectorized without preserving all the intermediate results. It
1777 produces only N/2 (widened) results (by summing up pairs of
1778 intermediate results) rather than all N results. Therefore, we
1779 cannot allow this pattern when we want to get all the results and in
1780 the correct order (as is the case when this computation is in an
1781 inner-loop nested in an outer-loop that us being vectorized). */
1784 vect_recog_widen_sum_pattern (vec_info
*vinfo
,
1785 stmt_vec_info stmt_vinfo
, tree
*type_out
)
1787 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1788 tree oprnd0
, oprnd1
;
1790 gimple
*pattern_stmt
;
1793 /* Look for the following pattern
1796 In which DX is at least double the size of X, and sum_1 has been
1797 recognized as a reduction variable.
1800 /* Starting from LAST_STMT, follow the defs of its uses in search
1801 of the above pattern. */
1803 if (!vect_reassociating_reduction_p (vinfo
, stmt_vinfo
, PLUS_EXPR
,
1807 type
= TREE_TYPE (gimple_get_lhs (last_stmt
));
1809 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1810 we know that oprnd1 is the reduction variable (defined by a loop-header
1811 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1812 Left to check that oprnd0 is defined by a cast from type 'type' to type
1815 vect_unpromoted_value unprom0
;
1816 if (!vect_look_through_possible_promotion (vinfo
, oprnd0
, &unprom0
)
1817 || TYPE_PRECISION (unprom0
.type
) * 2 > TYPE_PRECISION (type
))
1820 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt
);
1822 if (!vect_supportable_direct_optab_p (vinfo
, type
, WIDEN_SUM_EXPR
,
1823 unprom0
.type
, type_out
))
1826 var
= vect_recog_temp_ssa_var (type
, NULL
);
1827 pattern_stmt
= gimple_build_assign (var
, WIDEN_SUM_EXPR
, unprom0
.op
, oprnd1
);
1829 return pattern_stmt
;
1832 /* Recognize cases in which an operation is performed in one type WTYPE
1833 but could be done more efficiently in a narrower type NTYPE. For example,
1836 ATYPE a; // narrower than NTYPE
1837 BTYPE b; // narrower than NTYPE
1838 WTYPE aw = (WTYPE) a;
1839 WTYPE bw = (WTYPE) b;
1840 WTYPE res = aw + bw; // only uses of aw and bw
1842 then it would be more efficient to do:
1844 NTYPE an = (NTYPE) a;
1845 NTYPE bn = (NTYPE) b;
1846 NTYPE resn = an + bn;
1847 WTYPE res = (WTYPE) resn;
1849 Other situations include things like:
1851 ATYPE a; // NTYPE or narrower
1852 WTYPE aw = (WTYPE) a;
1855 when only "(NTYPE) res" is significant. In that case it's more efficient
1856 to truncate "b" and do the operation on NTYPE instead:
1858 NTYPE an = (NTYPE) a;
1859 NTYPE bn = (NTYPE) b; // truncation
1860 NTYPE resn = an + bn;
1861 WTYPE res = (WTYPE) resn;
1863 All users of "res" should then use "resn" instead, making the final
1864 statement dead (not marked as relevant). The final statement is still
1865 needed to maintain the type correctness of the IR.
1867 vect_determine_precisions has already determined the minimum
1868 precison of the operation and the minimum precision required
1869 by users of the result. */
1872 vect_recog_over_widening_pattern (vec_info
*vinfo
,
1873 stmt_vec_info last_stmt_info
, tree
*type_out
)
1875 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
1879 /* See whether we have found that this operation can be done on a
1880 narrower type without changing its semantics. */
1881 unsigned int new_precision
= last_stmt_info
->operation_precision
;
1885 tree lhs
= gimple_assign_lhs (last_stmt
);
1886 tree type
= TREE_TYPE (lhs
);
1887 tree_code code
= gimple_assign_rhs_code (last_stmt
);
1889 /* Punt for reductions where we don't handle the type conversions. */
1890 if (STMT_VINFO_DEF_TYPE (last_stmt_info
) == vect_reduction_def
)
1893 /* Keep the first operand of a COND_EXPR as-is: only the other two
1894 operands are interesting. */
1895 unsigned int first_op
= (code
== COND_EXPR
? 2 : 1);
1897 /* Check the operands. */
1898 unsigned int nops
= gimple_num_ops (last_stmt
) - first_op
;
1899 auto_vec
<vect_unpromoted_value
, 3> unprom (nops
);
1900 unprom
.quick_grow (nops
);
1901 unsigned int min_precision
= 0;
1902 bool single_use_p
= false;
1903 for (unsigned int i
= 0; i
< nops
; ++i
)
1905 tree op
= gimple_op (last_stmt
, first_op
+ i
);
1906 if (TREE_CODE (op
) == INTEGER_CST
)
1907 unprom
[i
].set_op (op
, vect_constant_def
);
1908 else if (TREE_CODE (op
) == SSA_NAME
)
1910 bool op_single_use_p
= true;
1911 if (!vect_look_through_possible_promotion (vinfo
, op
, &unprom
[i
],
1916 (1) N bits of the result are needed;
1917 (2) all inputs are widened from M<N bits; and
1918 (3) one operand OP is a single-use SSA name
1920 we can shift the M->N widening from OP to the output
1921 without changing the number or type of extensions involved.
1922 This then reduces the number of copies of STMT_INFO.
1924 If instead of (3) more than one operand is a single-use SSA name,
1925 shifting the extension to the output is even more of a win.
1929 (1) N bits of the result are needed;
1930 (2) one operand OP2 is widened from M2<N bits;
1931 (3) another operand OP1 is widened from M1<M2 bits; and
1932 (4) both OP1 and OP2 are single-use
1934 the choice is between:
1936 (a) truncating OP2 to M1, doing the operation on M1,
1937 and then widening the result to N
1939 (b) widening OP1 to M2, doing the operation on M2, and then
1940 widening the result to N
1942 Both shift the M2->N widening of the inputs to the output.
1943 (a) additionally shifts the M1->M2 widening to the output;
1944 it requires fewer copies of STMT_INFO but requires an extra
1947 Which is better will depend on the complexity and cost of
1948 STMT_INFO, which is hard to predict at this stage. However,
1949 a clear tie-breaker in favor of (b) is the fact that the
1950 truncation in (a) increases the length of the operation chain.
1952 If instead of (4) only one of OP1 or OP2 is single-use,
1953 (b) is still a win over doing the operation in N bits:
1954 it still shifts the M2->N widening on the single-use operand
1955 to the output and reduces the number of STMT_INFO copies.
1957 If neither operand is single-use then operating on fewer than
1958 N bits might lead to more extensions overall. Whether it does
1959 or not depends on global information about the vectorization
1960 region, and whether that's a good trade-off would again
1961 depend on the complexity and cost of the statements involved,
1962 as well as things like register pressure that are not normally
1963 modelled at this stage. We therefore ignore these cases
1964 and just optimize the clear single-use wins above.
1966 Thus we take the maximum precision of the unpromoted operands
1967 and record whether any operand is single-use. */
1968 if (unprom
[i
].dt
== vect_internal_def
)
1970 min_precision
= MAX (min_precision
,
1971 TYPE_PRECISION (unprom
[i
].type
));
1972 single_use_p
|= op_single_use_p
;
1979 /* Although the operation could be done in operation_precision, we have
1980 to balance that against introducing extra truncations or extensions.
1981 Calculate the minimum precision that can be handled efficiently.
1983 The loop above determined that the operation could be handled
1984 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1985 extension from the inputs to the output without introducing more
1986 instructions, and would reduce the number of instructions required
1987 for STMT_INFO itself.
1989 vect_determine_precisions has also determined that the result only
1990 needs min_output_precision bits. Truncating by a factor of N times
1991 requires a tree of N - 1 instructions, so if TYPE is N times wider
1992 than min_output_precision, doing the operation in TYPE and truncating
1993 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1996 - truncating the input to a unary operation and doing the operation
1997 in the new type requires at most N - 1 + 1 = N instructions per
2000 - doing the same for a binary operation requires at most
2001 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
2003 Both unary and binary operations require fewer instructions than
2004 this if the operands were extended from a suitable truncated form.
2005 Thus there is usually nothing to lose by doing operations in
2006 min_output_precision bits, but there can be something to gain. */
2008 min_precision
= last_stmt_info
->min_output_precision
;
2010 min_precision
= MIN (min_precision
, last_stmt_info
->min_output_precision
);
2012 /* Apply the minimum efficient precision we just calculated. */
2013 if (new_precision
< min_precision
)
2014 new_precision
= min_precision
;
2015 new_precision
= vect_element_precision (new_precision
);
2016 if (new_precision
>= TYPE_PRECISION (type
))
2019 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt
);
2021 *type_out
= get_vectype_for_scalar_type (vinfo
, type
);
2025 /* We've found a viable pattern. Get the new type of the operation. */
2026 bool unsigned_p
= (last_stmt_info
->operation_sign
== UNSIGNED
);
2027 tree new_type
= build_nonstandard_integer_type (new_precision
, unsigned_p
);
2029 /* If we're truncating an operation, we need to make sure that we
2030 don't introduce new undefined overflow. The codes tested here are
2031 a subset of those accepted by vect_truncatable_operation_p. */
2032 tree op_type
= new_type
;
2033 if (TYPE_OVERFLOW_UNDEFINED (new_type
)
2034 && (code
== PLUS_EXPR
|| code
== MINUS_EXPR
|| code
== MULT_EXPR
))
2035 op_type
= build_nonstandard_integer_type (new_precision
, true);
2037 /* We specifically don't check here whether the target supports the
2038 new operation, since it might be something that a later pattern
2039 wants to rewrite anyway. If targets have a minimum element size
2040 for some optabs, we should pattern-match smaller ops to larger ops
2041 where beneficial. */
2042 tree new_vectype
= get_vectype_for_scalar_type (vinfo
, new_type
);
2043 tree op_vectype
= get_vectype_for_scalar_type (vinfo
, op_type
);
2044 if (!new_vectype
|| !op_vectype
)
2047 if (dump_enabled_p ())
2048 dump_printf_loc (MSG_NOTE
, vect_location
, "demoting %T to %T\n",
2051 /* Calculate the rhs operands for an operation on OP_TYPE. */
2053 for (unsigned int i
= 1; i
< first_op
; ++i
)
2054 ops
[i
- 1] = gimple_op (last_stmt
, i
);
2055 vect_convert_inputs (vinfo
, last_stmt_info
, nops
, &ops
[first_op
- 1],
2056 op_type
, &unprom
[0], op_vectype
);
2058 /* Use the operation to produce a result of type OP_TYPE. */
2059 tree new_var
= vect_recog_temp_ssa_var (op_type
, NULL
);
2060 gimple
*pattern_stmt
= gimple_build_assign (new_var
, code
,
2061 ops
[0], ops
[1], ops
[2]);
2062 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
2064 if (dump_enabled_p ())
2065 dump_printf_loc (MSG_NOTE
, vect_location
,
2066 "created pattern stmt: %G", pattern_stmt
);
2068 /* Convert back to the original signedness, if OP_TYPE is different
2070 if (op_type
!= new_type
)
2071 pattern_stmt
= vect_convert_output (vinfo
, last_stmt_info
, new_type
,
2072 pattern_stmt
, op_vectype
);
2074 /* Promote the result to the original type. */
2075 pattern_stmt
= vect_convert_output (vinfo
, last_stmt_info
, type
,
2076 pattern_stmt
, new_vectype
);
2078 return pattern_stmt
;
2081 /* Recognize the following patterns:
2083 ATYPE a; // narrower than TYPE
2084 BTYPE b; // narrower than TYPE
2086 1) Multiply high with scaling
2087 TYPE res = ((TYPE) a * (TYPE) b) >> c;
2088 Here, c is bitsize (TYPE) / 2 - 1.
2090 2) ... or also with rounding
2091 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
2092 Here, d is bitsize (TYPE) / 2 - 2.
2094 3) Normal multiply high
2095 TYPE res = ((TYPE) a * (TYPE) b) >> e;
2096 Here, e is bitsize (TYPE) / 2.
2098 where only the bottom half of res is used. */
2101 vect_recog_mulhs_pattern (vec_info
*vinfo
,
2102 stmt_vec_info last_stmt_info
, tree
*type_out
)
2104 /* Check for a right shift. */
2105 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
2107 || gimple_assign_rhs_code (last_stmt
) != RSHIFT_EXPR
)
2110 /* Check that the shift result is wider than the users of the
2111 result need (i.e. that narrowing would be a natural choice). */
2112 tree lhs_type
= TREE_TYPE (gimple_assign_lhs (last_stmt
));
2113 unsigned int target_precision
2114 = vect_element_precision (last_stmt_info
->min_output_precision
);
2115 if (!INTEGRAL_TYPE_P (lhs_type
)
2116 || target_precision
>= TYPE_PRECISION (lhs_type
))
2119 /* Look through any change in sign on the outer shift input. */
2120 vect_unpromoted_value unprom_rshift_input
;
2121 tree rshift_input
= vect_look_through_possible_promotion
2122 (vinfo
, gimple_assign_rhs1 (last_stmt
), &unprom_rshift_input
);
2124 || TYPE_PRECISION (TREE_TYPE (rshift_input
))
2125 != TYPE_PRECISION (lhs_type
))
2128 /* Get the definition of the shift input. */
2129 stmt_vec_info rshift_input_stmt_info
2130 = vect_get_internal_def (vinfo
, rshift_input
);
2131 if (!rshift_input_stmt_info
)
2133 gassign
*rshift_input_stmt
2134 = dyn_cast
<gassign
*> (rshift_input_stmt_info
->stmt
);
2135 if (!rshift_input_stmt
)
2138 stmt_vec_info mulh_stmt_info
;
2140 bool rounding_p
= false;
2142 /* Check for the presence of the rounding term. */
2143 if (gimple_assign_rhs_code (rshift_input_stmt
) == PLUS_EXPR
)
2145 /* Check that the outer shift was by 1. */
2146 if (!integer_onep (gimple_assign_rhs2 (last_stmt
)))
2149 /* Check that the second operand of the PLUS_EXPR is 1. */
2150 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt
)))
2153 /* Look through any change in sign on the addition input. */
2154 vect_unpromoted_value unprom_plus_input
;
2155 tree plus_input
= vect_look_through_possible_promotion
2156 (vinfo
, gimple_assign_rhs1 (rshift_input_stmt
), &unprom_plus_input
);
2158 || TYPE_PRECISION (TREE_TYPE (plus_input
))
2159 != TYPE_PRECISION (TREE_TYPE (rshift_input
)))
2162 /* Get the definition of the multiply-high-scale part. */
2163 stmt_vec_info plus_input_stmt_info
2164 = vect_get_internal_def (vinfo
, plus_input
);
2165 if (!plus_input_stmt_info
)
2167 gassign
*plus_input_stmt
2168 = dyn_cast
<gassign
*> (plus_input_stmt_info
->stmt
);
2169 if (!plus_input_stmt
2170 || gimple_assign_rhs_code (plus_input_stmt
) != RSHIFT_EXPR
)
2173 /* Look through any change in sign on the scaling input. */
2174 vect_unpromoted_value unprom_scale_input
;
2175 tree scale_input
= vect_look_through_possible_promotion
2176 (vinfo
, gimple_assign_rhs1 (plus_input_stmt
), &unprom_scale_input
);
2178 || TYPE_PRECISION (TREE_TYPE (scale_input
))
2179 != TYPE_PRECISION (TREE_TYPE (plus_input
)))
2182 /* Get the definition of the multiply-high part. */
2183 mulh_stmt_info
= vect_get_internal_def (vinfo
, scale_input
);
2184 if (!mulh_stmt_info
)
2187 /* Get the scaling term. */
2188 scale_term
= gimple_assign_rhs2 (plus_input_stmt
);
2193 mulh_stmt_info
= rshift_input_stmt_info
;
2194 scale_term
= gimple_assign_rhs2 (last_stmt
);
2197 /* Check that the scaling factor is constant. */
2198 if (TREE_CODE (scale_term
) != INTEGER_CST
)
2201 /* Check whether the scaling input term can be seen as two widened
2202 inputs multiplied together. */
2203 vect_unpromoted_value unprom_mult
[2];
2206 = vect_widened_op_tree (vinfo
, mulh_stmt_info
, MULT_EXPR
, WIDEN_MULT_EXPR
,
2207 false, 2, unprom_mult
, &new_type
);
2211 /* Adjust output precision. */
2212 if (TYPE_PRECISION (new_type
) < target_precision
)
2213 new_type
= build_nonstandard_integer_type
2214 (target_precision
, TYPE_UNSIGNED (new_type
));
2216 unsigned mult_precision
= TYPE_PRECISION (new_type
);
2218 /* Check that the scaling factor is expected. Instead of
2219 target_precision, we should use the one that we actually
2220 use for internal function. */
2223 /* Check pattern 2). */
2224 if (wi::to_widest (scale_term
) + mult_precision
+ 2
2225 != TYPE_PRECISION (lhs_type
))
2232 /* Check for pattern 1). */
2233 if (wi::to_widest (scale_term
) + mult_precision
+ 1
2234 == TYPE_PRECISION (lhs_type
))
2236 /* Check for pattern 3). */
2237 else if (wi::to_widest (scale_term
) + mult_precision
2238 == TYPE_PRECISION (lhs_type
))
2244 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt
);
2246 /* Check for target support. */
2247 tree new_vectype
= get_vectype_for_scalar_type (vinfo
, new_type
);
2249 || !direct_internal_fn_supported_p
2250 (ifn
, new_vectype
, OPTIMIZE_FOR_SPEED
))
2253 /* The IR requires a valid vector type for the cast result, even though
2254 it's likely to be discarded. */
2255 *type_out
= get_vectype_for_scalar_type (vinfo
, lhs_type
);
2259 /* Generate the IFN_MULHRS call. */
2260 tree new_var
= vect_recog_temp_ssa_var (new_type
, NULL
);
2262 vect_convert_inputs (vinfo
, last_stmt_info
, 2, new_ops
, new_type
,
2263 unprom_mult
, new_vectype
);
2265 = gimple_build_call_internal (ifn
, 2, new_ops
[0], new_ops
[1]);
2266 gimple_call_set_lhs (mulhrs_stmt
, new_var
);
2267 gimple_set_location (mulhrs_stmt
, gimple_location (last_stmt
));
2269 if (dump_enabled_p ())
2270 dump_printf_loc (MSG_NOTE
, vect_location
,
2271 "created pattern stmt: %G", (gimple
*) mulhrs_stmt
);
2273 return vect_convert_output (vinfo
, last_stmt_info
, lhs_type
,
2274 mulhrs_stmt
, new_vectype
);
2277 /* Recognize the patterns:
2279 ATYPE a; // narrower than TYPE
2280 BTYPE b; // narrower than TYPE
2281 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
2282 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
2284 where only the bottom half of avg is used. Try to transform them into:
2286 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
2287 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
2291 TYPE avg = (TYPE) avg';
2293 where NTYPE is no wider than half of TYPE. Since only the bottom half
2294 of avg is used, all or part of the cast of avg' should become redundant.
2296 If there is no target support available, generate code to distribute rshift
2297 over plus and add a carry. */
2300 vect_recog_average_pattern (vec_info
*vinfo
,
2301 stmt_vec_info last_stmt_info
, tree
*type_out
)
2303 /* Check for a shift right by one bit. */
2304 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
2306 || gimple_assign_rhs_code (last_stmt
) != RSHIFT_EXPR
2307 || !integer_onep (gimple_assign_rhs2 (last_stmt
)))
2310 /* Check that the shift result is wider than the users of the
2311 result need (i.e. that narrowing would be a natural choice). */
2312 tree lhs
= gimple_assign_lhs (last_stmt
);
2313 tree type
= TREE_TYPE (lhs
);
2314 unsigned int target_precision
2315 = vect_element_precision (last_stmt_info
->min_output_precision
);
2316 if (!INTEGRAL_TYPE_P (type
) || target_precision
>= TYPE_PRECISION (type
))
2319 /* Look through any change in sign on the shift input. */
2320 tree rshift_rhs
= gimple_assign_rhs1 (last_stmt
);
2321 vect_unpromoted_value unprom_plus
;
2322 rshift_rhs
= vect_look_through_possible_promotion (vinfo
, rshift_rhs
,
2325 || TYPE_PRECISION (TREE_TYPE (rshift_rhs
)) != TYPE_PRECISION (type
))
2328 /* Get the definition of the shift input. */
2329 stmt_vec_info plus_stmt_info
= vect_get_internal_def (vinfo
, rshift_rhs
);
2330 if (!plus_stmt_info
)
2333 /* Check whether the shift input can be seen as a tree of additions on
2334 2 or 3 widened inputs.
2336 Note that the pattern should be a win even if the result of one or
2337 more additions is reused elsewhere: if the pattern matches, we'd be
2338 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
2339 internal_fn ifn
= IFN_AVG_FLOOR
;
2340 vect_unpromoted_value unprom
[3];
2342 unsigned int nops
= vect_widened_op_tree (vinfo
, plus_stmt_info
, PLUS_EXPR
,
2343 WIDEN_PLUS_EXPR
, false, 3,
2349 /* Check that one operand is 1. */
2351 for (i
= 0; i
< 3; ++i
)
2352 if (integer_onep (unprom
[i
].op
))
2356 /* Throw away the 1 operand and keep the other two. */
2358 unprom
[i
] = unprom
[2];
2362 vect_pattern_detected ("vect_recog_average_pattern", last_stmt
);
2366 (a) the operation can be viewed as:
2368 TYPE widened0 = (TYPE) UNPROM[0];
2369 TYPE widened1 = (TYPE) UNPROM[1];
2370 TYPE tmp1 = widened0 + widened1 {+ 1};
2371 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
2373 (b) the first two statements are equivalent to:
2375 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
2376 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
2378 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
2381 (d) all the operations can be performed correctly at twice the width of
2382 NEW_TYPE, due to the nature of the average operation; and
2384 (e) users of the result of the right shift need only TARGET_PRECISION
2385 bits, where TARGET_PRECISION is no more than half of TYPE's
2388 Under these circumstances, the only situation in which NEW_TYPE
2389 could be narrower than TARGET_PRECISION is if widened0, widened1
2390 and an addition result are all used more than once. Thus we can
2391 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
2392 as "free", whereas widening the result of the average instruction
2393 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
2394 therefore better not to go narrower than TARGET_PRECISION. */
2395 if (TYPE_PRECISION (new_type
) < target_precision
)
2396 new_type
= build_nonstandard_integer_type (target_precision
,
2397 TYPE_UNSIGNED (new_type
));
2399 /* Check for target support. */
2400 tree new_vectype
= get_vectype_for_scalar_type (vinfo
, new_type
);
2404 bool fallback_p
= false;
2406 if (direct_internal_fn_supported_p (ifn
, new_vectype
, OPTIMIZE_FOR_SPEED
))
2408 else if (TYPE_UNSIGNED (new_type
)
2409 && optab_for_tree_code (RSHIFT_EXPR
, new_vectype
, optab_scalar
)
2410 && optab_for_tree_code (PLUS_EXPR
, new_vectype
, optab_default
)
2411 && optab_for_tree_code (BIT_IOR_EXPR
, new_vectype
, optab_default
)
2412 && optab_for_tree_code (BIT_AND_EXPR
, new_vectype
, optab_default
))
2417 /* The IR requires a valid vector type for the cast result, even though
2418 it's likely to be discarded. */
2419 *type_out
= get_vectype_for_scalar_type (vinfo
, type
);
2423 tree new_var
= vect_recog_temp_ssa_var (new_type
, NULL
);
2425 vect_convert_inputs (vinfo
, last_stmt_info
, 2, new_ops
, new_type
,
2426 unprom
, new_vectype
);
2430 /* As a fallback, generate code for following sequence:
2432 shifted_op0 = new_ops[0] >> 1;
2433 shifted_op1 = new_ops[1] >> 1;
2434 sum_of_shifted = shifted_op0 + shifted_op1;
2435 unmasked_carry = new_ops[0] and/or new_ops[1];
2436 carry = unmasked_carry & 1;
2437 new_var = sum_of_shifted + carry;
2440 tree one_cst
= build_one_cst (new_type
);
2443 tree shifted_op0
= vect_recog_temp_ssa_var (new_type
, NULL
);
2444 g
= gimple_build_assign (shifted_op0
, RSHIFT_EXPR
, new_ops
[0], one_cst
);
2445 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
2447 tree shifted_op1
= vect_recog_temp_ssa_var (new_type
, NULL
);
2448 g
= gimple_build_assign (shifted_op1
, RSHIFT_EXPR
, new_ops
[1], one_cst
);
2449 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
2451 tree sum_of_shifted
= vect_recog_temp_ssa_var (new_type
, NULL
);
2452 g
= gimple_build_assign (sum_of_shifted
, PLUS_EXPR
,
2453 shifted_op0
, shifted_op1
);
2454 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
2456 tree unmasked_carry
= vect_recog_temp_ssa_var (new_type
, NULL
);
2457 tree_code c
= (ifn
== IFN_AVG_CEIL
) ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
2458 g
= gimple_build_assign (unmasked_carry
, c
, new_ops
[0], new_ops
[1]);
2459 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
2461 tree carry
= vect_recog_temp_ssa_var (new_type
, NULL
);
2462 g
= gimple_build_assign (carry
, BIT_AND_EXPR
, unmasked_carry
, one_cst
);
2463 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
2465 g
= gimple_build_assign (new_var
, PLUS_EXPR
, sum_of_shifted
, carry
);
2466 return vect_convert_output (vinfo
, last_stmt_info
, type
, g
, new_vectype
);
2469 /* Generate the IFN_AVG* call. */
2470 gcall
*average_stmt
= gimple_build_call_internal (ifn
, 2, new_ops
[0],
2472 gimple_call_set_lhs (average_stmt
, new_var
);
2473 gimple_set_location (average_stmt
, gimple_location (last_stmt
));
2475 if (dump_enabled_p ())
2476 dump_printf_loc (MSG_NOTE
, vect_location
,
2477 "created pattern stmt: %G", (gimple
*) average_stmt
);
2479 return vect_convert_output (vinfo
, last_stmt_info
,
2480 type
, average_stmt
, new_vectype
);
2483 /* Recognize cases in which the input to a cast is wider than its
2484 output, and the input is fed by a widening operation. Fold this
2485 by removing the unnecessary intermediate widening. E.g.:
2488 unsigned int b = (unsigned int) a;
2489 unsigned short c = (unsigned short) b;
2493 unsigned short c = (unsigned short) a;
2495 Although this is rare in input IR, it is an expected side-effect
2496 of the over-widening pattern above.
2498 This is beneficial also for integer-to-float conversions, if the
2499 widened integer has more bits than the float, and if the unwidened
2503 vect_recog_cast_forwprop_pattern (vec_info
*vinfo
,
2504 stmt_vec_info last_stmt_info
, tree
*type_out
)
2506 /* Check for a cast, including an integer-to-float conversion. */
2507 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
2510 tree_code code
= gimple_assign_rhs_code (last_stmt
);
2511 if (!CONVERT_EXPR_CODE_P (code
) && code
!= FLOAT_EXPR
)
2514 /* Make sure that the rhs is a scalar with a natural bitsize. */
2515 tree lhs
= gimple_assign_lhs (last_stmt
);
2518 tree lhs_type
= TREE_TYPE (lhs
);
2519 scalar_mode lhs_mode
;
2520 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type
)
2521 || !is_a
<scalar_mode
> (TYPE_MODE (lhs_type
), &lhs_mode
))
2524 /* Check for a narrowing operation (from a vector point of view). */
2525 tree rhs
= gimple_assign_rhs1 (last_stmt
);
2526 tree rhs_type
= TREE_TYPE (rhs
);
2527 if (!INTEGRAL_TYPE_P (rhs_type
)
2528 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type
)
2529 || TYPE_PRECISION (rhs_type
) <= GET_MODE_BITSIZE (lhs_mode
))
2532 /* Try to find an unpromoted input. */
2533 vect_unpromoted_value unprom
;
2534 if (!vect_look_through_possible_promotion (vinfo
, rhs
, &unprom
)
2535 || TYPE_PRECISION (unprom
.type
) >= TYPE_PRECISION (rhs_type
))
2538 /* If the bits above RHS_TYPE matter, make sure that they're the
2539 same when extending from UNPROM as they are when extending from RHS. */
2540 if (!INTEGRAL_TYPE_P (lhs_type
)
2541 && TYPE_SIGN (rhs_type
) != TYPE_SIGN (unprom
.type
))
2544 /* We can get the same result by casting UNPROM directly, to avoid
2545 the unnecessary widening and narrowing. */
2546 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt
);
2548 *type_out
= get_vectype_for_scalar_type (vinfo
, lhs_type
);
2552 tree new_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
2553 gimple
*pattern_stmt
= gimple_build_assign (new_var
, code
, unprom
.op
);
2554 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
2556 return pattern_stmt
;
2559 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
2560 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
2563 vect_recog_widen_shift_pattern (vec_info
*vinfo
,
2564 stmt_vec_info last_stmt_info
, tree
*type_out
)
2566 return vect_recog_widen_op_pattern (vinfo
, last_stmt_info
, type_out
,
2567 LSHIFT_EXPR
, WIDEN_LSHIFT_EXPR
, true,
2568 "vect_recog_widen_shift_pattern");
2571 /* Detect a rotate pattern wouldn't be otherwise vectorized:
2575 S0 a_t = b_t r<< c_t;
2579 * STMT_VINFO: The stmt from which the pattern search begins,
2580 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
2584 S2 e_t = d_t & (B - 1);
2585 S3 f_t = b_t << c_t;
2586 S4 g_t = b_t >> e_t;
2589 where B is element bitsize of type.
2593 * TYPE_OUT: The type of the output of this pattern.
2595 * Return value: A new stmt that will be used to replace the rotate
2599 vect_recog_rotate_pattern (vec_info
*vinfo
,
2600 stmt_vec_info stmt_vinfo
, tree
*type_out
)
2602 gimple
*last_stmt
= stmt_vinfo
->stmt
;
2603 tree oprnd0
, oprnd1
, lhs
, var
, var1
, var2
, vectype
, type
, stype
, def
, def2
;
2604 gimple
*pattern_stmt
, *def_stmt
;
2605 enum tree_code rhs_code
;
2606 enum vect_def_type dt
;
2607 optab optab1
, optab2
;
2608 edge ext_def
= NULL
;
2609 bool bswap16_p
= false;
2611 if (is_gimple_assign (last_stmt
))
2613 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2623 lhs
= gimple_assign_lhs (last_stmt
);
2624 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2625 type
= TREE_TYPE (oprnd0
);
2626 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2628 else if (gimple_call_builtin_p (last_stmt
, BUILT_IN_BSWAP16
))
2630 /* __builtin_bswap16 (x) is another form of x r>> 8.
2631 The vectorizer has bswap support, but only if the argument isn't
2633 lhs
= gimple_call_lhs (last_stmt
);
2634 oprnd0
= gimple_call_arg (last_stmt
, 0);
2635 type
= TREE_TYPE (oprnd0
);
2637 || TYPE_PRECISION (TREE_TYPE (lhs
)) != 16
2638 || TYPE_PRECISION (type
) <= 16
2639 || TREE_CODE (oprnd0
) != SSA_NAME
2640 || BITS_PER_UNIT
!= 8)
2643 stmt_vec_info def_stmt_info
;
2644 if (!vect_is_simple_use (oprnd0
, vinfo
, &dt
, &def_stmt_info
, &def_stmt
))
2647 if (dt
!= vect_internal_def
)
2650 if (gimple_assign_cast_p (def_stmt
))
2652 def
= gimple_assign_rhs1 (def_stmt
);
2653 if (INTEGRAL_TYPE_P (TREE_TYPE (def
))
2654 && TYPE_PRECISION (TREE_TYPE (def
)) == 16)
2658 type
= TREE_TYPE (lhs
);
2659 vectype
= get_vectype_for_scalar_type (vinfo
, type
);
2660 if (vectype
== NULL_TREE
)
2663 if (tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype
))
2665 /* The encoding uses one stepped pattern for each byte in the
2667 vec_perm_builder
elts (TYPE_VECTOR_SUBPARTS (char_vectype
), 2, 3);
2668 for (unsigned i
= 0; i
< 3; ++i
)
2669 for (unsigned j
= 0; j
< 2; ++j
)
2670 elts
.quick_push ((i
+ 1) * 2 - j
- 1);
2672 vec_perm_indices
indices (elts
, 1,
2673 TYPE_VECTOR_SUBPARTS (char_vectype
));
2674 machine_mode vmode
= TYPE_MODE (char_vectype
);
2675 if (can_vec_perm_const_p (vmode
, vmode
, indices
))
2677 /* vectorizable_bswap can handle the __builtin_bswap16 if we
2678 undo the argument promotion. */
2679 if (!useless_type_conversion_p (type
, TREE_TYPE (oprnd0
)))
2681 def
= vect_recog_temp_ssa_var (type
, NULL
);
2682 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd0
);
2683 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
2687 /* Pattern detected. */
2688 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt
);
2690 *type_out
= vectype
;
2692 /* Pattern supported. Create a stmt to be used to replace the
2693 pattern, with the unpromoted argument. */
2694 var
= vect_recog_temp_ssa_var (type
, NULL
);
2695 pattern_stmt
= gimple_build_call (gimple_call_fndecl (last_stmt
),
2697 gimple_call_set_lhs (pattern_stmt
, var
);
2698 gimple_call_set_fntype (as_a
<gcall
*> (pattern_stmt
),
2699 gimple_call_fntype (last_stmt
));
2700 return pattern_stmt
;
2704 oprnd1
= build_int_cst (integer_type_node
, 8);
2705 rhs_code
= LROTATE_EXPR
;
2711 if (TREE_CODE (oprnd0
) != SSA_NAME
2712 || TYPE_PRECISION (TREE_TYPE (lhs
)) != TYPE_PRECISION (type
)
2713 || !INTEGRAL_TYPE_P (type
))
2716 stmt_vec_info def_stmt_info
;
2717 if (!vect_is_simple_use (oprnd1
, vinfo
, &dt
, &def_stmt_info
, &def_stmt
))
2720 if (dt
!= vect_internal_def
2721 && dt
!= vect_constant_def
2722 && dt
!= vect_external_def
)
2725 vectype
= get_vectype_for_scalar_type (vinfo
, type
);
2726 if (vectype
== NULL_TREE
)
2729 /* If vector/vector or vector/scalar rotate is supported by the target,
2730 don't do anything here. */
2731 optab1
= optab_for_tree_code (rhs_code
, vectype
, optab_vector
);
2733 && optab_handler (optab1
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
2738 if (!useless_type_conversion_p (type
, TREE_TYPE (oprnd0
)))
2740 def
= vect_recog_temp_ssa_var (type
, NULL
);
2741 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd0
);
2742 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
2746 /* Pattern detected. */
2747 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt
);
2749 *type_out
= vectype
;
2751 /* Pattern supported. Create a stmt to be used to replace the
2753 var
= vect_recog_temp_ssa_var (type
, NULL
);
2754 pattern_stmt
= gimple_build_assign (var
, LROTATE_EXPR
, oprnd0
,
2756 return pattern_stmt
;
2761 if (is_a
<bb_vec_info
> (vinfo
) || dt
!= vect_internal_def
)
2763 optab2
= optab_for_tree_code (rhs_code
, vectype
, optab_scalar
);
2765 && optab_handler (optab2
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
2769 tree utype
= unsigned_type_for (type
);
2770 tree uvectype
= get_vectype_for_scalar_type (vinfo
, utype
);
2774 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2775 don't do anything here either. */
2776 optab1
= optab_for_tree_code (LSHIFT_EXPR
, uvectype
, optab_vector
);
2777 optab2
= optab_for_tree_code (RSHIFT_EXPR
, uvectype
, optab_vector
);
2779 || optab_handler (optab1
, TYPE_MODE (uvectype
)) == CODE_FOR_nothing
2781 || optab_handler (optab2
, TYPE_MODE (uvectype
)) == CODE_FOR_nothing
)
2783 if (! is_a
<bb_vec_info
> (vinfo
) && dt
== vect_internal_def
)
2785 optab1
= optab_for_tree_code (LSHIFT_EXPR
, uvectype
, optab_scalar
);
2786 optab2
= optab_for_tree_code (RSHIFT_EXPR
, uvectype
, optab_scalar
);
2788 || optab_handler (optab1
, TYPE_MODE (uvectype
)) == CODE_FOR_nothing
2790 || optab_handler (optab2
, TYPE_MODE (uvectype
)) == CODE_FOR_nothing
)
2794 *type_out
= vectype
;
2796 if (!useless_type_conversion_p (utype
, TREE_TYPE (oprnd0
)))
2798 def
= vect_recog_temp_ssa_var (utype
, NULL
);
2799 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd0
);
2800 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
2804 if (dt
== vect_external_def
&& TREE_CODE (oprnd1
) == SSA_NAME
)
2805 ext_def
= vect_get_external_def_edge (vinfo
, oprnd1
);
2808 scalar_int_mode mode
= SCALAR_INT_TYPE_MODE (utype
);
2809 if (dt
!= vect_internal_def
|| TYPE_MODE (TREE_TYPE (oprnd1
)) == mode
)
2811 else if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
2813 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
2814 if (TYPE_MODE (TREE_TYPE (rhs1
)) == mode
2815 && TYPE_PRECISION (TREE_TYPE (rhs1
))
2816 == TYPE_PRECISION (type
))
2820 if (def
== NULL_TREE
)
2822 def
= vect_recog_temp_ssa_var (utype
, NULL
);
2823 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
2824 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
2826 stype
= TREE_TYPE (def
);
2828 if (TREE_CODE (def
) == INTEGER_CST
)
2830 if (!tree_fits_uhwi_p (def
)
2831 || tree_to_uhwi (def
) >= GET_MODE_PRECISION (mode
)
2832 || integer_zerop (def
))
2834 def2
= build_int_cst (stype
,
2835 GET_MODE_PRECISION (mode
) - tree_to_uhwi (def
));
2839 tree vecstype
= get_vectype_for_scalar_type (vinfo
, stype
);
2841 if (vecstype
== NULL_TREE
)
2843 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
2844 def_stmt
= gimple_build_assign (def2
, NEGATE_EXPR
, def
);
2848 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
2849 gcc_assert (!new_bb
);
2852 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecstype
);
2854 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
2855 tree mask
= build_int_cst (stype
, GET_MODE_PRECISION (mode
) - 1);
2856 def_stmt
= gimple_build_assign (def2
, BIT_AND_EXPR
,
2857 gimple_assign_lhs (def_stmt
), mask
);
2861 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
2862 gcc_assert (!new_bb
);
2865 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecstype
);
2868 var1
= vect_recog_temp_ssa_var (utype
, NULL
);
2869 def_stmt
= gimple_build_assign (var1
, rhs_code
== LROTATE_EXPR
2870 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
2872 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
2874 var2
= vect_recog_temp_ssa_var (utype
, NULL
);
2875 def_stmt
= gimple_build_assign (var2
, rhs_code
== LROTATE_EXPR
2876 ? RSHIFT_EXPR
: LSHIFT_EXPR
,
2878 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
2880 /* Pattern detected. */
2881 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt
);
2883 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2884 var
= vect_recog_temp_ssa_var (utype
, NULL
);
2885 pattern_stmt
= gimple_build_assign (var
, BIT_IOR_EXPR
, var1
, var2
);
2887 if (!useless_type_conversion_p (type
, utype
))
2889 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
);
2890 tree result
= vect_recog_temp_ssa_var (type
, NULL
);
2891 pattern_stmt
= gimple_build_assign (result
, NOP_EXPR
, var
);
2893 return pattern_stmt
;
2896 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2904 S3 res_T = b_T op a_t;
2906 where type 'TYPE' is a type with different size than 'type',
2907 and op is <<, >> or rotate.
2912 TYPE b_T, c_T, res_T;
2915 S1 a_t = (type) c_T;
2917 S3 res_T = b_T op a_t;
2921 * STMT_VINFO: The stmt from which the pattern search begins,
2922 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2923 with a shift/rotate which has same type on both operands, in the
2924 second case just b_T op c_T, in the first case with added cast
2925 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2929 * TYPE_OUT: The type of the output of this pattern.
2931 * Return value: A new stmt that will be used to replace the shift/rotate
2935 vect_recog_vector_vector_shift_pattern (vec_info
*vinfo
,
2936 stmt_vec_info stmt_vinfo
,
2939 gimple
*last_stmt
= stmt_vinfo
->stmt
;
2940 tree oprnd0
, oprnd1
, lhs
, var
;
2941 gimple
*pattern_stmt
;
2942 enum tree_code rhs_code
;
2944 if (!is_gimple_assign (last_stmt
))
2947 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2959 lhs
= gimple_assign_lhs (last_stmt
);
2960 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2961 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2962 if (TREE_CODE (oprnd0
) != SSA_NAME
2963 || TREE_CODE (oprnd1
) != SSA_NAME
2964 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
2965 || !type_has_mode_precision_p (TREE_TYPE (oprnd1
))
2966 || TYPE_PRECISION (TREE_TYPE (lhs
))
2967 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2970 stmt_vec_info def_vinfo
= vect_get_internal_def (vinfo
, oprnd1
);
2974 *type_out
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (oprnd0
));
2975 if (*type_out
== NULL_TREE
)
2978 tree def
= NULL_TREE
;
2979 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_vinfo
->stmt
);
2980 if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
2982 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
2983 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
2984 && TYPE_PRECISION (TREE_TYPE (rhs1
))
2985 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2987 if (TYPE_PRECISION (TREE_TYPE (oprnd1
))
2988 >= TYPE_PRECISION (TREE_TYPE (rhs1
)))
2993 = build_low_bits_mask (TREE_TYPE (rhs1
),
2994 TYPE_PRECISION (TREE_TYPE (oprnd1
)));
2995 def
= vect_recog_temp_ssa_var (TREE_TYPE (rhs1
), NULL
);
2996 def_stmt
= gimple_build_assign (def
, BIT_AND_EXPR
, rhs1
, mask
);
2997 tree vecstype
= get_vectype_for_scalar_type (vinfo
,
2999 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecstype
);
3004 if (def
== NULL_TREE
)
3006 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
3007 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
3008 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3011 /* Pattern detected. */
3012 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt
);
3014 /* Pattern supported. Create a stmt to be used to replace the pattern. */
3015 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
3016 pattern_stmt
= gimple_build_assign (var
, rhs_code
, oprnd0
, def
);
3018 return pattern_stmt
;
3021 /* Return true iff the target has a vector optab implementing the operation
3022 CODE on type VECTYPE. */
3025 target_has_vecop_for_code (tree_code code
, tree vectype
)
3027 optab voptab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3029 && optab_handler (voptab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
;
3032 /* Verify that the target has optabs of VECTYPE to perform all the steps
3033 needed by the multiplication-by-immediate synthesis algorithm described by
3034 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
3035 present. Return true iff the target supports all the steps. */
3038 target_supports_mult_synth_alg (struct algorithm
*alg
, mult_variant var
,
3039 tree vectype
, bool synth_shift_p
)
3041 if (alg
->op
[0] != alg_zero
&& alg
->op
[0] != alg_m
)
3044 bool supports_vminus
= target_has_vecop_for_code (MINUS_EXPR
, vectype
);
3045 bool supports_vplus
= target_has_vecop_for_code (PLUS_EXPR
, vectype
);
3047 if (var
== negate_variant
3048 && !target_has_vecop_for_code (NEGATE_EXPR
, vectype
))
3051 /* If we must synthesize shifts with additions make sure that vector
3052 addition is available. */
3053 if ((var
== add_variant
|| synth_shift_p
) && !supports_vplus
)
3056 for (int i
= 1; i
< alg
->ops
; i
++)
3064 case alg_add_factor
:
3065 if (!supports_vplus
)
3070 case alg_sub_factor
:
3071 if (!supports_vminus
)
3077 case alg_impossible
:
3087 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
3088 putting the final result in DEST. Append all statements but the last into
3089 VINFO. Return the last statement. */
3092 synth_lshift_by_additions (vec_info
*vinfo
,
3093 tree dest
, tree op
, HOST_WIDE_INT amnt
,
3094 stmt_vec_info stmt_info
)
3097 tree itype
= TREE_TYPE (op
);
3099 gcc_assert (amnt
>= 0);
3100 for (i
= 0; i
< amnt
; i
++)
3102 tree tmp_var
= (i
< amnt
- 1) ? vect_recog_temp_ssa_var (itype
, NULL
)
3105 = gimple_build_assign (tmp_var
, PLUS_EXPR
, prev_res
, prev_res
);
3108 append_pattern_def_seq (vinfo
, stmt_info
, stmt
);
3116 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
3117 CODE to operands OP1 and OP2, creating a new temporary SSA var in
3118 the process if necessary. Append the resulting assignment statements
3119 to the sequence in STMT_VINFO. Return the SSA variable that holds the
3120 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
3121 left shifts using additions. */
3124 apply_binop_and_append_stmt (vec_info
*vinfo
,
3125 tree_code code
, tree op1
, tree op2
,
3126 stmt_vec_info stmt_vinfo
, bool synth_shift_p
)
3128 if (integer_zerop (op2
)
3129 && (code
== LSHIFT_EXPR
3130 || code
== PLUS_EXPR
))
3132 gcc_assert (TREE_CODE (op1
) == SSA_NAME
);
3137 tree itype
= TREE_TYPE (op1
);
3138 tree tmp_var
= vect_recog_temp_ssa_var (itype
, NULL
);
3140 if (code
== LSHIFT_EXPR
3143 stmt
= synth_lshift_by_additions (vinfo
, tmp_var
, op1
,
3144 TREE_INT_CST_LOW (op2
), stmt_vinfo
);
3145 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
3149 stmt
= gimple_build_assign (tmp_var
, code
, op1
, op2
);
3150 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
3154 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
3155 and simple arithmetic operations to be vectorized. Record the statements
3156 produced in STMT_VINFO and return the last statement in the sequence or
3157 NULL if it's not possible to synthesize such a multiplication.
3158 This function mirrors the behavior of expand_mult_const in expmed.cc but
3159 works on tree-ssa form. */
3162 vect_synth_mult_by_constant (vec_info
*vinfo
, tree op
, tree val
,
3163 stmt_vec_info stmt_vinfo
)
3165 tree itype
= TREE_TYPE (op
);
3166 machine_mode mode
= TYPE_MODE (itype
);
3167 struct algorithm alg
;
3168 mult_variant variant
;
3169 if (!tree_fits_shwi_p (val
))
3172 /* Multiplication synthesis by shifts, adds and subs can introduce
3173 signed overflow where the original operation didn't. Perform the
3174 operations on an unsigned type and cast back to avoid this.
3175 In the future we may want to relax this for synthesis algorithms
3176 that we can prove do not cause unexpected overflow. */
3177 bool cast_to_unsigned_p
= !TYPE_OVERFLOW_WRAPS (itype
);
3179 tree multtype
= cast_to_unsigned_p
? unsigned_type_for (itype
) : itype
;
3180 tree vectype
= get_vectype_for_scalar_type (vinfo
, multtype
);
3184 /* Targets that don't support vector shifts but support vector additions
3185 can synthesize shifts that way. */
3186 bool synth_shift_p
= !vect_supportable_shift (vinfo
, LSHIFT_EXPR
, multtype
);
3188 HOST_WIDE_INT hwval
= tree_to_shwi (val
);
3189 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
3190 The vectorizer's benefit analysis will decide whether it's beneficial
3192 bool possible
= choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype
))
3193 ? TYPE_MODE (vectype
) : mode
,
3194 hwval
, &alg
, &variant
, MAX_COST
);
3198 if (!target_supports_mult_synth_alg (&alg
, variant
, vectype
, synth_shift_p
))
3203 /* Clear out the sequence of statements so we can populate it below. */
3204 gimple
*stmt
= NULL
;
3206 if (cast_to_unsigned_p
)
3208 tree tmp_op
= vect_recog_temp_ssa_var (multtype
, NULL
);
3209 stmt
= gimple_build_assign (tmp_op
, CONVERT_EXPR
, op
);
3210 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
3214 if (alg
.op
[0] == alg_zero
)
3215 accumulator
= build_int_cst (multtype
, 0);
3219 bool needs_fixup
= (variant
== negate_variant
)
3220 || (variant
== add_variant
);
3222 for (int i
= 1; i
< alg
.ops
; i
++)
3224 tree shft_log
= build_int_cst (multtype
, alg
.log
[i
]);
3225 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
3226 tree tmp_var
= NULL_TREE
;
3233 = synth_lshift_by_additions (vinfo
, accum_tmp
, accumulator
,
3234 alg
.log
[i
], stmt_vinfo
);
3236 stmt
= gimple_build_assign (accum_tmp
, LSHIFT_EXPR
, accumulator
,
3241 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, op
, shft_log
,
3242 stmt_vinfo
, synth_shift_p
);
3243 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
,
3247 tmp_var
= apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, op
,
3248 shft_log
, stmt_vinfo
,
3250 /* In some algorithms the first step involves zeroing the
3251 accumulator. If subtracting from such an accumulator
3252 just emit the negation directly. */
3253 if (integer_zerop (accumulator
))
3254 stmt
= gimple_build_assign (accum_tmp
, NEGATE_EXPR
, tmp_var
);
3256 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, accumulator
,
3261 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, accumulator
,
3262 shft_log
, stmt_vinfo
, synth_shift_p
);
3263 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, tmp_var
, op
);
3267 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, accumulator
,
3268 shft_log
, stmt_vinfo
, synth_shift_p
);
3269 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, tmp_var
, op
);
3271 case alg_add_factor
:
3273 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, accumulator
,
3274 shft_log
, stmt_vinfo
, synth_shift_p
);
3275 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
,
3278 case alg_sub_factor
:
3280 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, accumulator
,
3281 shft_log
, stmt_vinfo
, synth_shift_p
);
3282 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, tmp_var
,
3288 /* We don't want to append the last stmt in the sequence to stmt_vinfo
3289 but rather return it directly. */
3291 if ((i
< alg
.ops
- 1) || needs_fixup
|| cast_to_unsigned_p
)
3292 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
3293 accumulator
= accum_tmp
;
3295 if (variant
== negate_variant
)
3297 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
3298 stmt
= gimple_build_assign (accum_tmp
, NEGATE_EXPR
, accumulator
);
3299 accumulator
= accum_tmp
;
3300 if (cast_to_unsigned_p
)
3301 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
3303 else if (variant
== add_variant
)
3305 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
3306 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
, op
);
3307 accumulator
= accum_tmp
;
3308 if (cast_to_unsigned_p
)
3309 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
3311 /* Move back to a signed if needed. */
3312 if (cast_to_unsigned_p
)
3314 tree accum_tmp
= vect_recog_temp_ssa_var (itype
, NULL
);
3315 stmt
= gimple_build_assign (accum_tmp
, CONVERT_EXPR
, accumulator
);
3321 /* Detect multiplication by constant and convert it into a sequence of
3322 shifts and additions, subtractions, negations. We reuse the
3323 choose_mult_variant algorithms from expmed.cc
3327 STMT_VINFO: The stmt from which the pattern search begins,
3332 * TYPE_OUT: The type of the output of this pattern.
3334 * Return value: A new stmt that will be used to replace
3335 the multiplication. */
3338 vect_recog_mult_pattern (vec_info
*vinfo
,
3339 stmt_vec_info stmt_vinfo
, tree
*type_out
)
3341 gimple
*last_stmt
= stmt_vinfo
->stmt
;
3342 tree oprnd0
, oprnd1
, vectype
, itype
;
3343 gimple
*pattern_stmt
;
3345 if (!is_gimple_assign (last_stmt
))
3348 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
3351 oprnd0
= gimple_assign_rhs1 (last_stmt
);
3352 oprnd1
= gimple_assign_rhs2 (last_stmt
);
3353 itype
= TREE_TYPE (oprnd0
);
3355 if (TREE_CODE (oprnd0
) != SSA_NAME
3356 || TREE_CODE (oprnd1
) != INTEGER_CST
3357 || !INTEGRAL_TYPE_P (itype
)
3358 || !type_has_mode_precision_p (itype
))
3361 vectype
= get_vectype_for_scalar_type (vinfo
, itype
);
3362 if (vectype
== NULL_TREE
)
3365 /* If the target can handle vectorized multiplication natively,
3366 don't attempt to optimize this. */
3367 optab mul_optab
= optab_for_tree_code (MULT_EXPR
, vectype
, optab_default
);
3368 if (mul_optab
!= unknown_optab
)
3370 machine_mode vec_mode
= TYPE_MODE (vectype
);
3371 int icode
= (int) optab_handler (mul_optab
, vec_mode
);
3372 if (icode
!= CODE_FOR_nothing
)
3376 pattern_stmt
= vect_synth_mult_by_constant (vinfo
,
3377 oprnd0
, oprnd1
, stmt_vinfo
);
3381 /* Pattern detected. */
3382 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt
);
3384 *type_out
= vectype
;
3386 return pattern_stmt
;
3389 /* Detect a signed division by a constant that wouldn't be
3390 otherwise vectorized:
3396 where type 'type' is an integral type and N is a constant.
3398 Similarly handle modulo by a constant:
3404 * STMT_VINFO: The stmt from which the pattern search begins,
3405 i.e. the division stmt. S1 is replaced by if N is a power
3406 of two constant and type is signed:
3407 S3 y_t = b_t < 0 ? N - 1 : 0;
3409 S1' a_t = x_t >> log2 (N);
3411 S4 is replaced if N is a power of two constant and
3412 type is signed by (where *_T temporaries have unsigned type):
3413 S9 y_T = b_t < 0 ? -1U : 0U;
3414 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
3415 S7 z_t = (type) z_T;
3417 S5 x_t = w_t & (N - 1);
3418 S4' a_t = x_t - z_t;
3422 * TYPE_OUT: The type of the output of this pattern.
3424 * Return value: A new stmt that will be used to replace the division
3425 S1 or modulo S4 stmt. */
3428 vect_recog_divmod_pattern (vec_info
*vinfo
,
3429 stmt_vec_info stmt_vinfo
, tree
*type_out
)
3431 gimple
*last_stmt
= stmt_vinfo
->stmt
;
3432 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
3433 gimple
*pattern_stmt
, *def_stmt
;
3434 enum tree_code rhs_code
;
3437 int dummy_int
, prec
;
3439 if (!is_gimple_assign (last_stmt
))
3442 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3445 case TRUNC_DIV_EXPR
:
3446 case EXACT_DIV_EXPR
:
3447 case TRUNC_MOD_EXPR
:
3453 oprnd0
= gimple_assign_rhs1 (last_stmt
);
3454 oprnd1
= gimple_assign_rhs2 (last_stmt
);
3455 itype
= TREE_TYPE (oprnd0
);
3456 if (TREE_CODE (oprnd0
) != SSA_NAME
3457 || TREE_CODE (oprnd1
) != INTEGER_CST
3458 || TREE_CODE (itype
) != INTEGER_TYPE
3459 || !type_has_mode_precision_p (itype
))
3462 scalar_int_mode itype_mode
= SCALAR_INT_TYPE_MODE (itype
);
3463 vectype
= get_vectype_for_scalar_type (vinfo
, itype
);
3464 if (vectype
== NULL_TREE
)
3467 if (optimize_bb_for_size_p (gimple_bb (last_stmt
)))
3469 /* If the target can handle vectorized division or modulo natively,
3470 don't attempt to optimize this, since native division is likely
3471 to give smaller code. */
3472 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
3473 if (optab
!= unknown_optab
)
3475 machine_mode vec_mode
= TYPE_MODE (vectype
);
3476 int icode
= (int) optab_handler (optab
, vec_mode
);
3477 if (icode
!= CODE_FOR_nothing
)
3482 prec
= TYPE_PRECISION (itype
);
3483 if (integer_pow2p (oprnd1
))
3485 if (TYPE_UNSIGNED (itype
) || tree_int_cst_sgn (oprnd1
) != 1)
3488 /* Pattern detected. */
3489 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt
);
3491 *type_out
= vectype
;
3493 /* Check if the target supports this internal function. */
3494 internal_fn ifn
= IFN_DIV_POW2
;
3495 if (direct_internal_fn_supported_p (ifn
, vectype
, OPTIMIZE_FOR_SPEED
))
3497 tree shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
3499 tree var_div
= vect_recog_temp_ssa_var (itype
, NULL
);
3500 gimple
*div_stmt
= gimple_build_call_internal (ifn
, 2, oprnd0
, shift
);
3501 gimple_call_set_lhs (div_stmt
, var_div
);
3503 if (rhs_code
== TRUNC_MOD_EXPR
)
3505 append_pattern_def_seq (vinfo
, stmt_vinfo
, div_stmt
);
3507 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3508 LSHIFT_EXPR
, var_div
, shift
);
3509 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3511 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3513 gimple_assign_lhs (def_stmt
));
3516 pattern_stmt
= div_stmt
;
3517 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
3519 return pattern_stmt
;
3522 cond
= build2 (LT_EXPR
, boolean_type_node
, oprnd0
,
3523 build_int_cst (itype
, 0));
3524 if (rhs_code
== TRUNC_DIV_EXPR
3525 || rhs_code
== EXACT_DIV_EXPR
)
3527 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
3530 = gimple_build_assign (var
, COND_EXPR
, cond
,
3531 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
3532 build_int_cst (itype
, 1)),
3533 build_int_cst (itype
, 0));
3534 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3535 var
= vect_recog_temp_ssa_var (itype
, NULL
);
3537 = gimple_build_assign (var
, PLUS_EXPR
, oprnd0
,
3538 gimple_assign_lhs (def_stmt
));
3539 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3541 shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
3543 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3544 RSHIFT_EXPR
, var
, shift
);
3549 if (compare_tree_int (oprnd1
, 2) == 0)
3551 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
3552 def_stmt
= gimple_build_assign (signmask
, COND_EXPR
, cond
,
3553 build_int_cst (itype
, 1),
3554 build_int_cst (itype
, 0));
3555 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3560 = build_nonstandard_integer_type (prec
, 1);
3561 tree vecutype
= get_vectype_for_scalar_type (vinfo
, utype
);
3563 = build_int_cst (utype
, GET_MODE_BITSIZE (itype_mode
)
3564 - tree_log2 (oprnd1
));
3565 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
3567 def_stmt
= gimple_build_assign (var
, COND_EXPR
, cond
,
3568 build_int_cst (utype
, -1),
3569 build_int_cst (utype
, 0));
3570 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecutype
);
3571 var
= vect_recog_temp_ssa_var (utype
, NULL
);
3572 def_stmt
= gimple_build_assign (var
, RSHIFT_EXPR
,
3573 gimple_assign_lhs (def_stmt
),
3575 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecutype
);
3576 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
3578 = gimple_build_assign (signmask
, NOP_EXPR
, var
);
3579 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3582 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3583 PLUS_EXPR
, oprnd0
, signmask
);
3584 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3586 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3587 BIT_AND_EXPR
, gimple_assign_lhs (def_stmt
),
3588 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
3589 build_int_cst (itype
, 1)));
3590 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3593 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3594 MINUS_EXPR
, gimple_assign_lhs (def_stmt
),
3598 return pattern_stmt
;
3601 if (prec
> HOST_BITS_PER_WIDE_INT
3602 || integer_zerop (oprnd1
))
3605 if (!can_mult_highpart_p (TYPE_MODE (vectype
), TYPE_UNSIGNED (itype
)))
3608 if (TYPE_UNSIGNED (itype
))
3610 unsigned HOST_WIDE_INT mh
, ml
;
3611 int pre_shift
, post_shift
;
3612 unsigned HOST_WIDE_INT d
= (TREE_INT_CST_LOW (oprnd1
)
3613 & GET_MODE_MASK (itype_mode
));
3614 tree t1
, t2
, t3
, t4
;
3616 if (d
>= (HOST_WIDE_INT_1U
<< (prec
- 1)))
3617 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
3620 /* Find a suitable multiplier and right shift count
3621 instead of multiplying with D. */
3622 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
3624 /* If the suggested multiplier is more than SIZE bits, we can do better
3625 for even divisors, using an initial right shift. */
3626 if (mh
!= 0 && (d
& 1) == 0)
3628 pre_shift
= ctz_or_zero (d
);
3629 mh
= choose_multiplier (d
>> pre_shift
, prec
, prec
- pre_shift
,
3630 &ml
, &post_shift
, &dummy_int
);
3638 if (post_shift
- 1 >= prec
)
3641 /* t1 = oprnd0 h* ml;
3645 q = t4 >> (post_shift - 1); */
3646 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
3647 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
3648 build_int_cst (itype
, ml
));
3649 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3651 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
3653 = gimple_build_assign (t2
, MINUS_EXPR
, oprnd0
, t1
);
3654 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3656 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
3658 = gimple_build_assign (t3
, RSHIFT_EXPR
, t2
, integer_one_node
);
3659 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3661 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
3663 = gimple_build_assign (t4
, PLUS_EXPR
, t1
, t3
);
3665 if (post_shift
!= 1)
3667 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3669 q
= vect_recog_temp_ssa_var (itype
, NULL
);
3671 = gimple_build_assign (q
, RSHIFT_EXPR
, t4
,
3672 build_int_cst (itype
, post_shift
- 1));
3677 pattern_stmt
= def_stmt
;
3682 if (pre_shift
>= prec
|| post_shift
>= prec
)
3685 /* t1 = oprnd0 >> pre_shift;
3687 q = t2 >> post_shift; */
3690 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
3692 = gimple_build_assign (t1
, RSHIFT_EXPR
, oprnd0
,
3693 build_int_cst (NULL
, pre_shift
));
3694 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3699 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
3700 def_stmt
= gimple_build_assign (t2
, MULT_HIGHPART_EXPR
, t1
,
3701 build_int_cst (itype
, ml
));
3705 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3707 q
= vect_recog_temp_ssa_var (itype
, NULL
);
3709 = gimple_build_assign (q
, RSHIFT_EXPR
, t2
,
3710 build_int_cst (itype
, post_shift
));
3715 pattern_stmt
= def_stmt
;
3720 unsigned HOST_WIDE_INT ml
;
3722 HOST_WIDE_INT d
= TREE_INT_CST_LOW (oprnd1
);
3723 unsigned HOST_WIDE_INT abs_d
;
3725 tree t1
, t2
, t3
, t4
;
3727 /* Give up for -1. */
3731 /* Since d might be INT_MIN, we have to cast to
3732 unsigned HOST_WIDE_INT before negating to avoid
3733 undefined signed overflow. */
3735 ? (unsigned HOST_WIDE_INT
) d
3736 : - (unsigned HOST_WIDE_INT
) d
);
3738 /* n rem d = n rem -d */
3739 if (rhs_code
== TRUNC_MOD_EXPR
&& d
< 0)
3742 oprnd1
= build_int_cst (itype
, abs_d
);
3744 if (HOST_BITS_PER_WIDE_INT
>= prec
3745 && abs_d
== HOST_WIDE_INT_1U
<< (prec
- 1))
3746 /* This case is not handled correctly below. */
3749 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
, &post_shift
, &dummy_int
);
3750 if (ml
>= HOST_WIDE_INT_1U
<< (prec
- 1))
3753 ml
|= HOST_WIDE_INT_M1U
<< (prec
- 1);
3755 if (post_shift
>= prec
)
3758 /* t1 = oprnd0 h* ml; */
3759 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
3760 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
3761 build_int_cst (itype
, ml
));
3765 /* t2 = t1 + oprnd0; */
3766 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3767 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
3768 def_stmt
= gimple_build_assign (t2
, PLUS_EXPR
, t1
, oprnd0
);
3775 /* t3 = t2 >> post_shift; */
3776 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3777 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
3778 def_stmt
= gimple_build_assign (t3
, RSHIFT_EXPR
, t2
,
3779 build_int_cst (itype
, post_shift
));
3786 get_range_query (cfun
)->range_of_expr (r
, oprnd0
);
3787 if (r
.kind () == VR_RANGE
)
3789 if (!wi::neg_p (r
.lower_bound (), TYPE_SIGN (itype
)))
3791 else if (wi::neg_p (r
.upper_bound (), TYPE_SIGN (itype
)))
3795 if (msb
== 0 && d
>= 0)
3799 pattern_stmt
= def_stmt
;
3803 /* t4 = oprnd0 >> (prec - 1);
3804 or if we know from VRP that oprnd0 >= 0
3806 or if we know from VRP that oprnd0 < 0
3808 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3809 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
3811 def_stmt
= gimple_build_assign (t4
, INTEGER_CST
,
3812 build_int_cst (itype
, msb
));
3814 def_stmt
= gimple_build_assign (t4
, RSHIFT_EXPR
, oprnd0
,
3815 build_int_cst (itype
, prec
- 1));
3816 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3818 /* q = t3 - t4; or q = t4 - t3; */
3819 q
= vect_recog_temp_ssa_var (itype
, NULL
);
3820 pattern_stmt
= gimple_build_assign (q
, MINUS_EXPR
, d
< 0 ? t4
: t3
,
3825 if (rhs_code
== TRUNC_MOD_EXPR
)
3829 /* We divided. Now finish by:
3832 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
);
3834 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
3835 def_stmt
= gimple_build_assign (t1
, MULT_EXPR
, q
, oprnd1
);
3836 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3838 r
= vect_recog_temp_ssa_var (itype
, NULL
);
3839 pattern_stmt
= gimple_build_assign (r
, MINUS_EXPR
, oprnd0
, t1
);
3842 /* Pattern detected. */
3843 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt
);
3845 *type_out
= vectype
;
3846 return pattern_stmt
;
3849 /* Function vect_recog_mixed_size_cond_pattern
3851 Try to find the following pattern:
3856 S1 a_T = x_t CMP y_t ? b_T : c_T;
3858 where type 'TYPE' is an integral type which has different size
3859 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3860 than 'type', the constants need to fit into an integer type
3861 with the same width as 'type') or results of conversion from 'type'.
3865 * STMT_VINFO: The stmt from which the pattern search begins.
3869 * TYPE_OUT: The type of the output of this pattern.
3871 * Return value: A new stmt that will be used to replace the pattern.
3872 Additionally a def_stmt is added.
3874 a_it = x_t CMP y_t ? b_it : c_it;
3875 a_T = (TYPE) a_it; */
3878 vect_recog_mixed_size_cond_pattern (vec_info
*vinfo
,
3879 stmt_vec_info stmt_vinfo
, tree
*type_out
)
3881 gimple
*last_stmt
= stmt_vinfo
->stmt
;
3882 tree cond_expr
, then_clause
, else_clause
;
3883 tree type
, vectype
, comp_vectype
, itype
= NULL_TREE
, vecitype
;
3884 gimple
*pattern_stmt
, *def_stmt
;
3885 tree orig_type0
= NULL_TREE
, orig_type1
= NULL_TREE
;
3886 gimple
*def_stmt0
= NULL
, *def_stmt1
= NULL
;
3888 tree comp_scalar_type
;
3890 if (!is_gimple_assign (last_stmt
)
3891 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
3892 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
3895 cond_expr
= gimple_assign_rhs1 (last_stmt
);
3896 then_clause
= gimple_assign_rhs2 (last_stmt
);
3897 else_clause
= gimple_assign_rhs3 (last_stmt
);
3899 if (!COMPARISON_CLASS_P (cond_expr
))
3902 comp_scalar_type
= TREE_TYPE (TREE_OPERAND (cond_expr
, 0));
3903 comp_vectype
= get_vectype_for_scalar_type (vinfo
, comp_scalar_type
);
3904 if (comp_vectype
== NULL_TREE
)
3907 type
= TREE_TYPE (gimple_assign_lhs (last_stmt
));
3908 if (types_compatible_p (type
, comp_scalar_type
)
3909 || ((TREE_CODE (then_clause
) != INTEGER_CST
3910 || TREE_CODE (else_clause
) != INTEGER_CST
)
3911 && !INTEGRAL_TYPE_P (comp_scalar_type
))
3912 || !INTEGRAL_TYPE_P (type
))
3915 if ((TREE_CODE (then_clause
) != INTEGER_CST
3916 && !type_conversion_p (vinfo
, then_clause
, false,
3917 &orig_type0
, &def_stmt0
, &promotion
))
3918 || (TREE_CODE (else_clause
) != INTEGER_CST
3919 && !type_conversion_p (vinfo
, else_clause
, false,
3920 &orig_type1
, &def_stmt1
, &promotion
)))
3923 if (orig_type0
&& orig_type1
3924 && !types_compatible_p (orig_type0
, orig_type1
))
3929 if (!types_compatible_p (orig_type0
, comp_scalar_type
))
3931 then_clause
= gimple_assign_rhs1 (def_stmt0
);
3937 if (!types_compatible_p (orig_type1
, comp_scalar_type
))
3939 else_clause
= gimple_assign_rhs1 (def_stmt1
);
3944 HOST_WIDE_INT cmp_mode_size
3945 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype
));
3947 scalar_int_mode type_mode
= SCALAR_INT_TYPE_MODE (type
);
3948 if (GET_MODE_BITSIZE (type_mode
) == cmp_mode_size
)
3951 vectype
= get_vectype_for_scalar_type (vinfo
, type
);
3952 if (vectype
== NULL_TREE
)
3955 if (expand_vec_cond_expr_p (vectype
, comp_vectype
, TREE_CODE (cond_expr
)))
3958 if (itype
== NULL_TREE
)
3959 itype
= build_nonstandard_integer_type (cmp_mode_size
,
3960 TYPE_UNSIGNED (type
));
3962 if (itype
== NULL_TREE
3963 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype
)) != cmp_mode_size
)
3966 vecitype
= get_vectype_for_scalar_type (vinfo
, itype
);
3967 if (vecitype
== NULL_TREE
)
3970 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
, TREE_CODE (cond_expr
)))
3973 if (GET_MODE_BITSIZE (type_mode
) > cmp_mode_size
)
3975 if ((TREE_CODE (then_clause
) == INTEGER_CST
3976 && !int_fits_type_p (then_clause
, itype
))
3977 || (TREE_CODE (else_clause
) == INTEGER_CST
3978 && !int_fits_type_p (else_clause
, itype
)))
3982 def_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3983 COND_EXPR
, unshare_expr (cond_expr
),
3984 fold_convert (itype
, then_clause
),
3985 fold_convert (itype
, else_clause
));
3986 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
3987 NOP_EXPR
, gimple_assign_lhs (def_stmt
));
3989 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecitype
);
3990 *type_out
= vectype
;
3992 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt
);
3994 return pattern_stmt
;
3998 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3999 true if bool VAR can and should be optimized that way. Assume it shouldn't
4000 in case it's a result of a comparison which can be directly vectorized into
4001 a vector comparison. Fills in STMTS with all stmts visited during the
4005 check_bool_pattern (tree var
, vec_info
*vinfo
, hash_set
<gimple
*> &stmts
)
4008 enum tree_code rhs_code
;
4010 stmt_vec_info def_stmt_info
= vect_get_internal_def (vinfo
, var
);
4014 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_stmt_info
->stmt
);
4018 if (stmts
.contains (def_stmt
))
4021 rhs1
= gimple_assign_rhs1 (def_stmt
);
4022 rhs_code
= gimple_assign_rhs_code (def_stmt
);
4026 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
4031 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
4033 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
4038 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
4045 if (! check_bool_pattern (rhs1
, vinfo
, stmts
)
4046 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt
), vinfo
, stmts
))
4051 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
4053 tree vecitype
, comp_vectype
;
4055 /* If the comparison can throw, then is_gimple_condexpr will be
4056 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
4057 if (stmt_could_throw_p (cfun
, def_stmt
))
4060 comp_vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs1
));
4061 if (comp_vectype
== NULL_TREE
)
4064 tree mask_type
= get_mask_type_for_scalar_type (vinfo
,
4067 && expand_vec_cmp_expr_p (comp_vectype
, mask_type
, rhs_code
))
4070 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
4072 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
4074 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
4075 vecitype
= get_vectype_for_scalar_type (vinfo
, itype
);
4076 if (vecitype
== NULL_TREE
)
4080 vecitype
= comp_vectype
;
4081 if (! expand_vec_cond_expr_p (vecitype
, comp_vectype
, rhs_code
))
4089 bool res
= stmts
.add (def_stmt
);
4090 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
4097 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
4098 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
4099 pattern sequence. */
4102 adjust_bool_pattern_cast (vec_info
*vinfo
,
4103 tree type
, tree var
, stmt_vec_info stmt_info
)
4105 gimple
*cast_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
4107 append_pattern_def_seq (vinfo
, stmt_info
, cast_stmt
,
4108 get_vectype_for_scalar_type (vinfo
, type
));
4109 return gimple_assign_lhs (cast_stmt
);
4112 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
4113 VAR is an SSA_NAME that should be transformed from bool to a wider integer
4114 type, OUT_TYPE is the desired final integer type of the whole pattern.
4115 STMT_INFO is the info of the pattern root and is where pattern stmts should
4116 be associated with. DEFS is a map of pattern defs. */
4119 adjust_bool_pattern (vec_info
*vinfo
, tree var
, tree out_type
,
4120 stmt_vec_info stmt_info
, hash_map
<tree
, tree
> &defs
)
4122 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
4123 enum tree_code rhs_code
, def_rhs_code
;
4124 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
4126 gimple
*pattern_stmt
, *def_stmt
;
4127 tree trueval
= NULL_TREE
;
4129 rhs1
= gimple_assign_rhs1 (stmt
);
4130 rhs2
= gimple_assign_rhs2 (stmt
);
4131 rhs_code
= gimple_assign_rhs_code (stmt
);
4132 loc
= gimple_location (stmt
);
4137 irhs1
= *defs
.get (rhs1
);
4138 itype
= TREE_TYPE (irhs1
);
4140 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4145 irhs1
= *defs
.get (rhs1
);
4146 itype
= TREE_TYPE (irhs1
);
4148 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4149 BIT_XOR_EXPR
, irhs1
, build_int_cst (itype
, 1));
4153 /* Try to optimize x = y & (a < b ? 1 : 0); into
4154 x = (a < b ? y : 0);
4160 S1 a_b = x1 CMP1 y1;
4161 S2 b_b = x2 CMP2 y2;
4163 S4 d_T = (TYPE) c_b;
4165 we would normally emit:
4167 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4168 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4169 S3' c_T = a_T & b_T;
4172 but we can save one stmt by using the
4173 result of one of the COND_EXPRs in the other COND_EXPR and leave
4174 BIT_AND_EXPR stmt out:
4176 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4177 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4180 At least when VEC_COND_EXPR is implemented using masks
4181 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
4182 computes the comparison masks and ands it, in one case with
4183 all ones vector, in the other case with a vector register.
4184 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
4185 often more expensive. */
4186 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
4187 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
4188 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
4190 irhs1
= *defs
.get (rhs1
);
4191 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
4192 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
4193 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1
))))
4195 rhs_code
= def_rhs_code
;
4197 rhs2
= gimple_assign_rhs2 (def_stmt
);
4202 irhs2
= *defs
.get (rhs2
);
4205 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
4206 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
4207 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
4209 irhs2
= *defs
.get (rhs2
);
4210 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
4211 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
4212 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1
))))
4214 rhs_code
= def_rhs_code
;
4216 rhs2
= gimple_assign_rhs2 (def_stmt
);
4221 irhs1
= *defs
.get (rhs1
);
4227 irhs1
= *defs
.get (rhs1
);
4228 irhs2
= *defs
.get (rhs2
);
4230 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
4231 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
4233 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
4234 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
4235 int out_prec
= TYPE_PRECISION (out_type
);
4236 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
4237 irhs2
= adjust_bool_pattern_cast (vinfo
, TREE_TYPE (irhs1
), irhs2
,
4239 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
4240 irhs1
= adjust_bool_pattern_cast (vinfo
, TREE_TYPE (irhs2
), irhs1
,
4244 irhs1
= adjust_bool_pattern_cast (vinfo
,
4245 out_type
, irhs1
, stmt_info
);
4246 irhs2
= adjust_bool_pattern_cast (vinfo
,
4247 out_type
, irhs2
, stmt_info
);
4250 itype
= TREE_TYPE (irhs1
);
4252 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4253 rhs_code
, irhs1
, irhs2
);
4258 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
4259 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
4260 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
))
4261 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1
)),
4262 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1
)))))
4264 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
4266 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
4269 itype
= TREE_TYPE (rhs1
);
4270 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
4271 if (trueval
== NULL_TREE
)
4272 trueval
= build_int_cst (itype
, 1);
4274 gcc_checking_assert (useless_type_conversion_p (itype
,
4275 TREE_TYPE (trueval
)));
4277 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4278 COND_EXPR
, cond_expr
, trueval
,
4279 build_int_cst (itype
, 0));
4283 gimple_set_location (pattern_stmt
, loc
);
4284 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
,
4285 get_vectype_for_scalar_type (vinfo
, itype
));
4286 defs
.put (var
, gimple_assign_lhs (pattern_stmt
));
4289 /* Comparison function to qsort a vector of gimple stmts after UID. */
4292 sort_after_uid (const void *p1
, const void *p2
)
4294 const gimple
*stmt1
= *(const gimple
* const *)p1
;
4295 const gimple
*stmt2
= *(const gimple
* const *)p2
;
4296 return gimple_uid (stmt1
) - gimple_uid (stmt2
);
4299 /* Create pattern stmts for all stmts participating in the bool pattern
4300 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
4301 OUT_TYPE. Return the def of the pattern root. */
4304 adjust_bool_stmts (vec_info
*vinfo
, hash_set
<gimple
*> &bool_stmt_set
,
4305 tree out_type
, stmt_vec_info stmt_info
)
4307 /* Gather original stmts in the bool pattern in their order of appearance
4309 auto_vec
<gimple
*> bool_stmts (bool_stmt_set
.elements ());
4310 for (hash_set
<gimple
*>::iterator i
= bool_stmt_set
.begin ();
4311 i
!= bool_stmt_set
.end (); ++i
)
4312 bool_stmts
.quick_push (*i
);
4313 bool_stmts
.qsort (sort_after_uid
);
4315 /* Now process them in that order, producing pattern stmts. */
4316 hash_map
<tree
, tree
> defs
;
4317 for (unsigned i
= 0; i
< bool_stmts
.length (); ++i
)
4318 adjust_bool_pattern (vinfo
, gimple_assign_lhs (bool_stmts
[i
]),
4319 out_type
, stmt_info
, defs
);
4321 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
4322 gimple
*pattern_stmt
4323 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
4324 return gimple_assign_lhs (pattern_stmt
);
4327 /* Return the proper type for converting bool VAR into
4328 an integer value or NULL_TREE if no such type exists.
4329 The type is chosen so that the converted value has the
4330 same number of elements as VAR's vector type. */
4333 integer_type_for_mask (tree var
, vec_info
*vinfo
)
4335 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var
)))
4338 stmt_vec_info def_stmt_info
= vect_get_internal_def (vinfo
, var
);
4339 if (!def_stmt_info
|| !vect_use_mask_type_p (def_stmt_info
))
4342 return build_nonstandard_integer_type (def_stmt_info
->mask_precision
, 1);
4345 /* Function vect_recog_bool_pattern
4347 Try to find pattern like following:
4349 bool a_b, b_b, c_b, d_b, e_b;
4352 S1 a_b = x1 CMP1 y1;
4353 S2 b_b = x2 CMP2 y2;
4355 S4 d_b = x3 CMP3 y3;
4357 S6 f_T = (TYPE) e_b;
4359 where type 'TYPE' is an integral type. Or a similar pattern
4362 S6 f_Y = e_b ? r_Y : s_Y;
4364 as results from if-conversion of a complex condition.
4368 * STMT_VINFO: The stmt at the end from which the pattern
4369 search begins, i.e. cast of a bool to
4374 * TYPE_OUT: The type of the output of this pattern.
4376 * Return value: A new stmt that will be used to replace the pattern.
4378 Assuming size of TYPE is the same as size of all comparisons
4379 (otherwise some casts would be added where needed), the above
4380 sequence we create related pattern stmts:
4381 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4382 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4383 S4' d_T = x3 CMP3 y3 ? 1 : 0;
4384 S5' e_T = c_T | d_T;
4387 Instead of the above S3' we could emit:
4388 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4389 S3' c_T = a_T | b_T;
4390 but the above is more efficient. */
4393 vect_recog_bool_pattern (vec_info
*vinfo
,
4394 stmt_vec_info stmt_vinfo
, tree
*type_out
)
4396 gimple
*last_stmt
= stmt_vinfo
->stmt
;
4397 enum tree_code rhs_code
;
4398 tree var
, lhs
, rhs
, vectype
;
4399 gimple
*pattern_stmt
;
4401 if (!is_gimple_assign (last_stmt
))
4404 var
= gimple_assign_rhs1 (last_stmt
);
4405 lhs
= gimple_assign_lhs (last_stmt
);
4406 rhs_code
= gimple_assign_rhs_code (last_stmt
);
4408 if (rhs_code
== VIEW_CONVERT_EXPR
)
4409 var
= TREE_OPERAND (var
, 0);
4411 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var
)))
4414 hash_set
<gimple
*> bool_stmts
;
4416 if (CONVERT_EXPR_CODE_P (rhs_code
)
4417 || rhs_code
== VIEW_CONVERT_EXPR
)
4419 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
4420 || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs
)))
4422 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
4424 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
4426 rhs
= adjust_bool_stmts (vinfo
, bool_stmts
,
4427 TREE_TYPE (lhs
), stmt_vinfo
);
4428 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
4429 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
4430 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
4433 = gimple_build_assign (lhs
, NOP_EXPR
, rhs
);
4437 tree type
= integer_type_for_mask (var
, vinfo
);
4438 tree cst0
, cst1
, tmp
;
4443 /* We may directly use cond with narrowed type to avoid
4444 multiple cond exprs with following result packing and
4445 perform single cond with packed mask instead. In case
4446 of widening we better make cond first and then extract
4448 if (TYPE_MODE (type
) == TYPE_MODE (TREE_TYPE (lhs
)))
4449 type
= TREE_TYPE (lhs
);
4451 cst0
= build_int_cst (type
, 0);
4452 cst1
= build_int_cst (type
, 1);
4453 tmp
= vect_recog_temp_ssa_var (type
, NULL
);
4454 pattern_stmt
= gimple_build_assign (tmp
, COND_EXPR
, var
, cst1
, cst0
);
4456 if (!useless_type_conversion_p (type
, TREE_TYPE (lhs
)))
4458 tree new_vectype
= get_vectype_for_scalar_type (vinfo
, type
);
4459 append_pattern_def_seq (vinfo
, stmt_vinfo
,
4460 pattern_stmt
, new_vectype
);
4462 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
4463 pattern_stmt
= gimple_build_assign (lhs
, CONVERT_EXPR
, tmp
);
4467 *type_out
= vectype
;
4468 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
4470 return pattern_stmt
;
4472 else if (rhs_code
== COND_EXPR
4473 && TREE_CODE (var
) == SSA_NAME
)
4475 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
4476 if (vectype
== NULL_TREE
)
4479 /* Build a scalar type for the boolean result that when
4480 vectorized matches the vector type of the result in
4481 size and number of elements. */
4483 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype
)),
4484 TYPE_VECTOR_SUBPARTS (vectype
));
4487 = build_nonstandard_integer_type (prec
,
4488 TYPE_UNSIGNED (TREE_TYPE (var
)));
4489 if (get_vectype_for_scalar_type (vinfo
, type
) == NULL_TREE
)
4492 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
4493 var
= adjust_bool_stmts (vinfo
, bool_stmts
, type
, stmt_vinfo
);
4494 else if (integer_type_for_mask (var
, vinfo
))
4497 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
4499 = gimple_build_assign (lhs
, COND_EXPR
,
4500 build2 (NE_EXPR
, boolean_type_node
,
4501 var
, build_int_cst (TREE_TYPE (var
), 0)),
4502 gimple_assign_rhs2 (last_stmt
),
4503 gimple_assign_rhs3 (last_stmt
));
4504 *type_out
= vectype
;
4505 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
4507 return pattern_stmt
;
4509 else if (rhs_code
== SSA_NAME
4510 && STMT_VINFO_DATA_REF (stmt_vinfo
))
4512 stmt_vec_info pattern_stmt_info
;
4513 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
4514 if (!vectype
|| !VECTOR_MODE_P (TYPE_MODE (vectype
)))
4517 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
4518 rhs
= adjust_bool_stmts (vinfo
, bool_stmts
,
4519 TREE_TYPE (vectype
), stmt_vinfo
);
4522 tree type
= integer_type_for_mask (var
, vinfo
);
4523 tree cst0
, cst1
, new_vectype
;
4528 if (TYPE_MODE (type
) == TYPE_MODE (TREE_TYPE (vectype
)))
4529 type
= TREE_TYPE (vectype
);
4531 cst0
= build_int_cst (type
, 0);
4532 cst1
= build_int_cst (type
, 1);
4533 new_vectype
= get_vectype_for_scalar_type (vinfo
, type
);
4535 rhs
= vect_recog_temp_ssa_var (type
, NULL
);
4536 pattern_stmt
= gimple_build_assign (rhs
, COND_EXPR
, var
, cst1
, cst0
);
4537 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, new_vectype
);
4540 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
4541 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
4543 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
4544 gimple
*cast_stmt
= gimple_build_assign (rhs2
, NOP_EXPR
, rhs
);
4545 append_pattern_def_seq (vinfo
, stmt_vinfo
, cast_stmt
);
4548 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
4549 pattern_stmt_info
= vinfo
->add_stmt (pattern_stmt
);
4550 vinfo
->move_dr (pattern_stmt_info
, stmt_vinfo
);
4551 *type_out
= vectype
;
4552 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
4554 return pattern_stmt
;
4561 /* A helper for vect_recog_mask_conversion_pattern. Build
4562 conversion of MASK to a type suitable for masking VECTYPE.
4563 Built statement gets required vectype and is appended to
4564 a pattern sequence of STMT_VINFO.
4566 Return converted mask. */
4569 build_mask_conversion (vec_info
*vinfo
,
4570 tree mask
, tree vectype
, stmt_vec_info stmt_vinfo
)
4575 masktype
= truth_type_for (vectype
);
4576 tmp
= vect_recog_temp_ssa_var (TREE_TYPE (masktype
), NULL
);
4577 stmt
= gimple_build_assign (tmp
, CONVERT_EXPR
, mask
);
4578 append_pattern_def_seq (vinfo
, stmt_vinfo
,
4579 stmt
, masktype
, TREE_TYPE (vectype
));
4585 /* Function vect_recog_mask_conversion_pattern
4587 Try to find statements which require boolean type
4588 converison. Additional conversion statements are
4589 added to handle such cases. For example:
4599 S4 c_1 = m_3 ? c_2 : c_3;
4601 Will be transformed into:
4605 S3'' m_2' = (_Bool[bitsize=32])m_2
4606 S3' m_3' = m_1 & m_2';
4607 S4'' m_3'' = (_Bool[bitsize=8])m_3'
4608 S4' c_1' = m_3'' ? c_2 : c_3; */
4611 vect_recog_mask_conversion_pattern (vec_info
*vinfo
,
4612 stmt_vec_info stmt_vinfo
, tree
*type_out
)
4614 gimple
*last_stmt
= stmt_vinfo
->stmt
;
4615 enum tree_code rhs_code
;
4616 tree lhs
= NULL_TREE
, rhs1
, rhs2
, tmp
, rhs1_type
, rhs2_type
;
4617 tree vectype1
, vectype2
;
4618 stmt_vec_info pattern_stmt_info
;
4619 tree rhs1_op0
= NULL_TREE
, rhs1_op1
= NULL_TREE
;
4620 tree rhs1_op0_type
= NULL_TREE
, rhs1_op1_type
= NULL_TREE
;
4622 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
4623 if (is_gimple_call (last_stmt
)
4624 && gimple_call_internal_p (last_stmt
))
4626 gcall
*pattern_stmt
;
4628 internal_fn ifn
= gimple_call_internal_fn (last_stmt
);
4629 int mask_argno
= internal_fn_mask_index (ifn
);
4633 bool store_p
= internal_store_fn_p (ifn
);
4636 int rhs_index
= internal_fn_stored_value_index (ifn
);
4637 tree rhs
= gimple_call_arg (last_stmt
, rhs_index
);
4638 vectype1
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs
));
4642 lhs
= gimple_call_lhs (last_stmt
);
4643 vectype1
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
4646 tree mask_arg
= gimple_call_arg (last_stmt
, mask_argno
);
4647 tree mask_arg_type
= integer_type_for_mask (mask_arg
, vinfo
);
4650 vectype2
= get_mask_type_for_scalar_type (vinfo
, mask_arg_type
);
4652 if (!vectype1
|| !vectype2
4653 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1
),
4654 TYPE_VECTOR_SUBPARTS (vectype2
)))
4657 tmp
= build_mask_conversion (vinfo
, mask_arg
, vectype1
, stmt_vinfo
);
4659 auto_vec
<tree
, 8> args
;
4660 unsigned int nargs
= gimple_call_num_args (last_stmt
);
4661 args
.safe_grow (nargs
, true);
4662 for (unsigned int i
= 0; i
< nargs
; ++i
)
4663 args
[i
] = ((int) i
== mask_argno
4665 : gimple_call_arg (last_stmt
, i
));
4666 pattern_stmt
= gimple_build_call_internal_vec (ifn
, args
);
4670 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
4671 gimple_call_set_lhs (pattern_stmt
, lhs
);
4673 gimple_call_set_nothrow (pattern_stmt
, true);
4675 pattern_stmt_info
= vinfo
->add_stmt (pattern_stmt
);
4676 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
4677 vinfo
->move_dr (pattern_stmt_info
, stmt_vinfo
);
4679 *type_out
= vectype1
;
4680 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
4682 return pattern_stmt
;
4685 if (!is_gimple_assign (last_stmt
))
4688 gimple
*pattern_stmt
;
4689 lhs
= gimple_assign_lhs (last_stmt
);
4690 rhs1
= gimple_assign_rhs1 (last_stmt
);
4691 rhs_code
= gimple_assign_rhs_code (last_stmt
);
4693 /* Check for cond expression requiring mask conversion. */
4694 if (rhs_code
== COND_EXPR
)
4696 vectype1
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
4698 if (TREE_CODE (rhs1
) == SSA_NAME
)
4700 rhs1_type
= integer_type_for_mask (rhs1
, vinfo
);
4704 else if (COMPARISON_CLASS_P (rhs1
))
4706 /* Check whether we're comparing scalar booleans and (if so)
4707 whether a better mask type exists than the mask associated
4708 with boolean-sized elements. This avoids unnecessary packs
4709 and unpacks if the booleans are set from comparisons of
4710 wider types. E.g. in:
4712 int x1, x2, x3, x4, y1, y1;
4714 bool b1 = (x1 == x2);
4715 bool b2 = (x3 == x4);
4716 ... = b1 == b2 ? y1 : y2;
4718 it is better for b1 and b2 to use the mask type associated
4719 with int elements rather bool (byte) elements. */
4720 rhs1_op0
= TREE_OPERAND (rhs1
, 0);
4721 rhs1_op1
= TREE_OPERAND (rhs1
, 1);
4722 if (!rhs1_op0
|| !rhs1_op1
)
4724 rhs1_op0_type
= integer_type_for_mask (rhs1_op0
, vinfo
);
4725 rhs1_op1_type
= integer_type_for_mask (rhs1_op1
, vinfo
);
4728 rhs1_type
= TREE_TYPE (rhs1_op0
);
4729 else if (!rhs1_op1_type
)
4730 rhs1_type
= TREE_TYPE (rhs1_op1
);
4731 else if (TYPE_PRECISION (rhs1_op0_type
)
4732 != TYPE_PRECISION (rhs1_op1_type
))
4734 int tmp0
= (int) TYPE_PRECISION (rhs1_op0_type
)
4735 - (int) TYPE_PRECISION (TREE_TYPE (lhs
));
4736 int tmp1
= (int) TYPE_PRECISION (rhs1_op1_type
)
4737 - (int) TYPE_PRECISION (TREE_TYPE (lhs
));
4738 if ((tmp0
> 0 && tmp1
> 0) || (tmp0
< 0 && tmp1
< 0))
4740 if (abs (tmp0
) > abs (tmp1
))
4741 rhs1_type
= rhs1_op1_type
;
4743 rhs1_type
= rhs1_op0_type
;
4746 rhs1_type
= build_nonstandard_integer_type
4747 (TYPE_PRECISION (TREE_TYPE (lhs
)), 1);
4750 rhs1_type
= rhs1_op0_type
;
4755 vectype2
= get_mask_type_for_scalar_type (vinfo
, rhs1_type
);
4757 if (!vectype1
|| !vectype2
)
4760 /* Continue if a conversion is needed. Also continue if we have
4761 a comparison whose vector type would normally be different from
4762 VECTYPE2 when considered in isolation. In that case we'll
4763 replace the comparison with an SSA name (so that we can record
4764 its vector type) and behave as though the comparison was an SSA
4765 name from the outset. */
4766 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1
),
4767 TYPE_VECTOR_SUBPARTS (vectype2
))
4772 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4773 in place, we can handle it in vectorizable_condition. This avoids
4774 unnecessary promotion stmts and increased vectorization factor. */
4775 if (COMPARISON_CLASS_P (rhs1
)
4776 && INTEGRAL_TYPE_P (rhs1_type
)
4777 && known_le (TYPE_VECTOR_SUBPARTS (vectype1
),
4778 TYPE_VECTOR_SUBPARTS (vectype2
)))
4780 enum vect_def_type dt
;
4781 if (vect_is_simple_use (TREE_OPERAND (rhs1
, 0), vinfo
, &dt
)
4782 && dt
== vect_external_def
4783 && vect_is_simple_use (TREE_OPERAND (rhs1
, 1), vinfo
, &dt
)
4784 && (dt
== vect_external_def
4785 || dt
== vect_constant_def
))
4787 tree wide_scalar_type
= build_nonstandard_integer_type
4788 (vector_element_bits (vectype1
), TYPE_UNSIGNED (rhs1_type
));
4789 tree vectype3
= get_vectype_for_scalar_type (vinfo
,
4791 if (expand_vec_cond_expr_p (vectype1
, vectype3
, TREE_CODE (rhs1
)))
4796 /* If rhs1 is a comparison we need to move it into a
4797 separate statement. */
4798 if (TREE_CODE (rhs1
) != SSA_NAME
)
4800 tmp
= vect_recog_temp_ssa_var (TREE_TYPE (rhs1
), NULL
);
4802 && TYPE_PRECISION (rhs1_op0_type
) != TYPE_PRECISION (rhs1_type
))
4803 rhs1_op0
= build_mask_conversion (vinfo
, rhs1_op0
,
4804 vectype2
, stmt_vinfo
);
4806 && TYPE_PRECISION (rhs1_op1_type
) != TYPE_PRECISION (rhs1_type
))
4807 rhs1_op1
= build_mask_conversion (vinfo
, rhs1_op1
,
4808 vectype2
, stmt_vinfo
);
4809 pattern_stmt
= gimple_build_assign (tmp
, TREE_CODE (rhs1
),
4810 rhs1_op0
, rhs1_op1
);
4812 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vectype2
,
4816 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
4817 TYPE_VECTOR_SUBPARTS (vectype2
)))
4818 tmp
= build_mask_conversion (vinfo
, rhs1
, vectype1
, stmt_vinfo
);
4822 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
4823 pattern_stmt
= gimple_build_assign (lhs
, COND_EXPR
, tmp
,
4824 gimple_assign_rhs2 (last_stmt
),
4825 gimple_assign_rhs3 (last_stmt
));
4827 *type_out
= vectype1
;
4828 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
4830 return pattern_stmt
;
4833 /* Now check for binary boolean operations requiring conversion for
4835 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs
)))
4838 if (rhs_code
!= BIT_IOR_EXPR
4839 && rhs_code
!= BIT_XOR_EXPR
4840 && rhs_code
!= BIT_AND_EXPR
4841 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
)
4844 rhs2
= gimple_assign_rhs2 (last_stmt
);
4846 rhs1_type
= integer_type_for_mask (rhs1
, vinfo
);
4847 rhs2_type
= integer_type_for_mask (rhs2
, vinfo
);
4849 if (!rhs1_type
|| !rhs2_type
4850 || TYPE_PRECISION (rhs1_type
) == TYPE_PRECISION (rhs2_type
))
4853 if (TYPE_PRECISION (rhs1_type
) < TYPE_PRECISION (rhs2_type
))
4855 vectype1
= get_mask_type_for_scalar_type (vinfo
, rhs1_type
);
4858 rhs2
= build_mask_conversion (vinfo
, rhs2
, vectype1
, stmt_vinfo
);
4862 vectype1
= get_mask_type_for_scalar_type (vinfo
, rhs2_type
);
4865 rhs1
= build_mask_conversion (vinfo
, rhs1
, vectype1
, stmt_vinfo
);
4868 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
4869 pattern_stmt
= gimple_build_assign (lhs
, rhs_code
, rhs1
, rhs2
);
4871 *type_out
= vectype1
;
4872 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
4874 return pattern_stmt
;
4877 /* STMT_INFO is a load or store. If the load or store is conditional, return
4878 the boolean condition under which it occurs, otherwise return null. */
4881 vect_get_load_store_mask (stmt_vec_info stmt_info
)
4883 if (gassign
*def_assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
4885 gcc_assert (gimple_assign_single_p (def_assign
));
4889 if (gcall
*def_call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
4891 internal_fn ifn
= gimple_call_internal_fn (def_call
);
4892 int mask_index
= internal_fn_mask_index (ifn
);
4893 return gimple_call_arg (def_call
, mask_index
);
4899 /* Return MASK if MASK is suitable for masking an operation on vectors
4900 of type VECTYPE, otherwise convert it into such a form and return
4901 the result. Associate any conversion statements with STMT_INFO's
4905 vect_convert_mask_for_vectype (tree mask
, tree vectype
,
4906 stmt_vec_info stmt_info
, vec_info
*vinfo
)
4908 tree mask_type
= integer_type_for_mask (mask
, vinfo
);
4911 tree mask_vectype
= get_mask_type_for_scalar_type (vinfo
, mask_type
);
4913 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
),
4914 TYPE_VECTOR_SUBPARTS (mask_vectype
)))
4915 mask
= build_mask_conversion (vinfo
, mask
, vectype
, stmt_info
);
4920 /* Return the equivalent of:
4922 fold_convert (TYPE, VALUE)
4924 with the expectation that the operation will be vectorized.
4925 If new statements are needed, add them as pattern statements
4929 vect_add_conversion_to_pattern (vec_info
*vinfo
,
4930 tree type
, tree value
, stmt_vec_info stmt_info
)
4932 if (useless_type_conversion_p (type
, TREE_TYPE (value
)))
4935 tree new_value
= vect_recog_temp_ssa_var (type
, NULL
);
4936 gassign
*conversion
= gimple_build_assign (new_value
, CONVERT_EXPR
, value
);
4937 append_pattern_def_seq (vinfo
, stmt_info
, conversion
,
4938 get_vectype_for_scalar_type (vinfo
, type
));
4942 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4943 internal function. Return the final statement on success and set
4944 *TYPE_OUT to the vector type being loaded or stored.
4946 This function only handles gathers and scatters that were recognized
4947 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4950 vect_recog_gather_scatter_pattern (vec_info
*vinfo
,
4951 stmt_vec_info stmt_info
, tree
*type_out
)
4953 /* Currently we only support this for loop vectorization. */
4954 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
4958 /* Make sure that we're looking at a gather load or scatter store. */
4959 data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
4960 if (!dr
|| !STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
4963 /* Get the boolean that controls whether the load or store happens.
4964 This is null if the operation is unconditional. */
4965 tree mask
= vect_get_load_store_mask (stmt_info
);
4967 /* Make sure that the target supports an appropriate internal
4968 function for the gather/scatter operation. */
4969 gather_scatter_info gs_info
;
4970 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, &gs_info
)
4971 || gs_info
.ifn
== IFN_LAST
)
4974 /* Convert the mask to the right form. */
4975 tree gs_vectype
= get_vectype_for_scalar_type (loop_vinfo
,
4976 gs_info
.element_type
);
4978 mask
= vect_convert_mask_for_vectype (mask
, gs_vectype
, stmt_info
,
4980 else if (gs_info
.ifn
== IFN_MASK_SCATTER_STORE
4981 || gs_info
.ifn
== IFN_MASK_GATHER_LOAD
)
4982 mask
= build_int_cst (TREE_TYPE (truth_type_for (gs_vectype
)), -1);
4984 /* Get the invariant base and non-invariant offset, converting the
4985 latter to the same width as the vector elements. */
4986 tree base
= gs_info
.base
;
4987 tree offset_type
= TREE_TYPE (gs_info
.offset_vectype
);
4988 tree offset
= vect_add_conversion_to_pattern (vinfo
, offset_type
,
4989 gs_info
.offset
, stmt_info
);
4991 /* Build the new pattern statement. */
4992 tree scale
= size_int (gs_info
.scale
);
4993 gcall
*pattern_stmt
;
4994 if (DR_IS_READ (dr
))
4996 tree zero
= build_zero_cst (gs_info
.element_type
);
4998 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 5, base
,
4999 offset
, scale
, zero
, mask
);
5001 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 4, base
,
5002 offset
, scale
, zero
);
5003 tree load_lhs
= vect_recog_temp_ssa_var (gs_info
.element_type
, NULL
);
5004 gimple_call_set_lhs (pattern_stmt
, load_lhs
);
5008 tree rhs
= vect_get_store_rhs (stmt_info
);
5010 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 5,
5011 base
, offset
, scale
, rhs
,
5014 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 4,
5015 base
, offset
, scale
, rhs
);
5017 gimple_call_set_nothrow (pattern_stmt
, true);
5019 /* Copy across relevant vectorization info and associate DR with the
5020 new pattern statement instead of the original statement. */
5021 stmt_vec_info pattern_stmt_info
= loop_vinfo
->add_stmt (pattern_stmt
);
5022 loop_vinfo
->move_dr (pattern_stmt_info
, stmt_info
);
5024 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5025 *type_out
= vectype
;
5026 vect_pattern_detected ("gather/scatter pattern", stmt_info
->stmt
);
5028 return pattern_stmt
;
5031 /* Return true if TYPE is a non-boolean integer type. These are the types
5032 that we want to consider for narrowing. */
5035 vect_narrowable_type_p (tree type
)
5037 return INTEGRAL_TYPE_P (type
) && !VECT_SCALAR_BOOLEAN_TYPE_P (type
);
5040 /* Return true if the operation given by CODE can be truncated to N bits
5041 when only N bits of the output are needed. This is only true if bit N+1
5042 of the inputs has no effect on the low N bits of the result. */
5045 vect_truncatable_operation_p (tree_code code
)
5063 /* Record that STMT_INFO could be changed from operating on TYPE to
5064 operating on a type with the precision and sign given by PRECISION
5065 and SIGN respectively. PRECISION is an arbitrary bit precision;
5066 it might not be a whole number of bytes. */
5069 vect_set_operation_type (stmt_vec_info stmt_info
, tree type
,
5070 unsigned int precision
, signop sign
)
5072 /* Round the precision up to a whole number of bytes. */
5073 precision
= vect_element_precision (precision
);
5074 if (precision
< TYPE_PRECISION (type
)
5075 && (!stmt_info
->operation_precision
5076 || stmt_info
->operation_precision
> precision
))
5078 stmt_info
->operation_precision
= precision
;
5079 stmt_info
->operation_sign
= sign
;
5083 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
5084 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
5085 is an arbitrary bit precision; it might not be a whole number of bytes. */
5088 vect_set_min_input_precision (stmt_vec_info stmt_info
, tree type
,
5089 unsigned int min_input_precision
)
5091 /* This operation in isolation only requires the inputs to have
5092 MIN_INPUT_PRECISION of precision, However, that doesn't mean
5093 that MIN_INPUT_PRECISION is a natural precision for the chain
5094 as a whole. E.g. consider something like:
5096 unsigned short *x, *y;
5097 *y = ((*x & 0xf0) >> 4) | (*y << 4);
5099 The right shift can be done on unsigned chars, and only requires the
5100 result of "*x & 0xf0" to be done on unsigned chars. But taking that
5101 approach would mean turning a natural chain of single-vector unsigned
5102 short operations into one that truncates "*x" and then extends
5103 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
5104 operation and one vector for each unsigned char operation.
5105 This would be a significant pessimization.
5107 Instead only propagate the maximum of this precision and the precision
5108 required by the users of the result. This means that we don't pessimize
5109 the case above but continue to optimize things like:
5113 *y = ((*x & 0xf0) >> 4) | (*y << 4);
5115 Here we would truncate two vectors of *x to a single vector of
5116 unsigned chars and use single-vector unsigned char operations for
5117 everything else, rather than doing two unsigned short copies of
5118 "(*x & 0xf0) >> 4" and then truncating the result. */
5119 min_input_precision
= MAX (min_input_precision
,
5120 stmt_info
->min_output_precision
);
5122 if (min_input_precision
< TYPE_PRECISION (type
)
5123 && (!stmt_info
->min_input_precision
5124 || stmt_info
->min_input_precision
> min_input_precision
))
5125 stmt_info
->min_input_precision
= min_input_precision
;
5128 /* Subroutine of vect_determine_min_output_precision. Return true if
5129 we can calculate a reduced number of output bits for STMT_INFO,
5130 whose result is LHS. */
5133 vect_determine_min_output_precision_1 (vec_info
*vinfo
,
5134 stmt_vec_info stmt_info
, tree lhs
)
5136 /* Take the maximum precision required by users of the result. */
5137 unsigned int precision
= 0;
5138 imm_use_iterator iter
;
5140 FOR_EACH_IMM_USE_FAST (use
, iter
, lhs
)
5142 gimple
*use_stmt
= USE_STMT (use
);
5143 if (is_gimple_debug (use_stmt
))
5145 stmt_vec_info use_stmt_info
= vinfo
->lookup_stmt (use_stmt
);
5146 if (!use_stmt_info
|| !use_stmt_info
->min_input_precision
)
5148 /* The input precision recorded for COND_EXPRs applies only to the
5149 "then" and "else" values. */
5150 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5152 && gimple_assign_rhs_code (assign
) == COND_EXPR
5153 && use
->use
!= gimple_assign_rhs2_ptr (assign
)
5154 && use
->use
!= gimple_assign_rhs3_ptr (assign
))
5156 precision
= MAX (precision
, use_stmt_info
->min_input_precision
);
5159 if (dump_enabled_p ())
5160 dump_printf_loc (MSG_NOTE
, vect_location
,
5161 "only the low %d bits of %T are significant\n",
5163 stmt_info
->min_output_precision
= precision
;
5167 /* Calculate min_output_precision for STMT_INFO. */
5170 vect_determine_min_output_precision (vec_info
*vinfo
, stmt_vec_info stmt_info
)
5172 /* We're only interested in statements with a narrowable result. */
5173 tree lhs
= gimple_get_lhs (stmt_info
->stmt
);
5175 || TREE_CODE (lhs
) != SSA_NAME
5176 || !vect_narrowable_type_p (TREE_TYPE (lhs
)))
5179 if (!vect_determine_min_output_precision_1 (vinfo
, stmt_info
, lhs
))
5180 stmt_info
->min_output_precision
= TYPE_PRECISION (TREE_TYPE (lhs
));
5183 /* Use range information to decide whether STMT (described by STMT_INFO)
5184 could be done in a narrower type. This is effectively a forward
5185 propagation, since it uses context-independent information that applies
5186 to all users of an SSA name. */
5189 vect_determine_precisions_from_range (stmt_vec_info stmt_info
, gassign
*stmt
)
5191 tree lhs
= gimple_assign_lhs (stmt
);
5192 if (!lhs
|| TREE_CODE (lhs
) != SSA_NAME
)
5195 tree type
= TREE_TYPE (lhs
);
5196 if (!vect_narrowable_type_p (type
))
5199 /* First see whether we have any useful range information for the result. */
5200 unsigned int precision
= TYPE_PRECISION (type
);
5201 signop sign
= TYPE_SIGN (type
);
5202 wide_int min_value
, max_value
;
5203 if (!vect_get_range_info (lhs
, &min_value
, &max_value
))
5206 tree_code code
= gimple_assign_rhs_code (stmt
);
5207 unsigned int nops
= gimple_num_ops (stmt
);
5209 if (!vect_truncatable_operation_p (code
))
5210 /* Check that all relevant input operands are compatible, and update
5211 [MIN_VALUE, MAX_VALUE] to include their ranges. */
5212 for (unsigned int i
= 1; i
< nops
; ++i
)
5214 tree op
= gimple_op (stmt
, i
);
5215 if (TREE_CODE (op
) == INTEGER_CST
)
5217 /* Don't require the integer to have RHS_TYPE (which it might
5218 not for things like shift amounts, etc.), but do require it
5220 if (!int_fits_type_p (op
, type
))
5223 min_value
= wi::min (min_value
, wi::to_wide (op
, precision
), sign
);
5224 max_value
= wi::max (max_value
, wi::to_wide (op
, precision
), sign
);
5226 else if (TREE_CODE (op
) == SSA_NAME
)
5228 /* Ignore codes that don't take uniform arguments. */
5229 if (!types_compatible_p (TREE_TYPE (op
), type
))
5232 wide_int op_min_value
, op_max_value
;
5233 if (!vect_get_range_info (op
, &op_min_value
, &op_max_value
))
5236 min_value
= wi::min (min_value
, op_min_value
, sign
);
5237 max_value
= wi::max (max_value
, op_max_value
, sign
);
5243 /* Try to switch signed types for unsigned types if we can.
5244 This is better for two reasons. First, unsigned ops tend
5245 to be cheaper than signed ops. Second, it means that we can
5249 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
5254 unsigned short res_1 = (unsigned short) c & 0xff00;
5255 int res = (int) res_1;
5257 where the intermediate result res_1 has unsigned rather than
5259 if (sign
== SIGNED
&& !wi::neg_p (min_value
))
5262 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
5263 unsigned int precision1
= wi::min_precision (min_value
, sign
);
5264 unsigned int precision2
= wi::min_precision (max_value
, sign
);
5265 unsigned int value_precision
= MAX (precision1
, precision2
);
5266 if (value_precision
>= precision
)
5269 if (dump_enabled_p ())
5270 dump_printf_loc (MSG_NOTE
, vect_location
, "can narrow to %s:%d"
5271 " without loss of precision: %G",
5272 sign
== SIGNED
? "signed" : "unsigned",
5273 value_precision
, (gimple
*) stmt
);
5275 vect_set_operation_type (stmt_info
, type
, value_precision
, sign
);
5276 vect_set_min_input_precision (stmt_info
, type
, value_precision
);
5279 /* Use information about the users of STMT's result to decide whether
5280 STMT (described by STMT_INFO) could be done in a narrower type.
5281 This is effectively a backward propagation. */
5284 vect_determine_precisions_from_users (stmt_vec_info stmt_info
, gassign
*stmt
)
5286 tree_code code
= gimple_assign_rhs_code (stmt
);
5287 unsigned int opno
= (code
== COND_EXPR
? 2 : 1);
5288 tree type
= TREE_TYPE (gimple_op (stmt
, opno
));
5289 if (!vect_narrowable_type_p (type
))
5292 unsigned int precision
= TYPE_PRECISION (type
);
5293 unsigned int operation_precision
, min_input_precision
;
5297 /* Only the bits that contribute to the output matter. Don't change
5298 the precision of the operation itself. */
5299 operation_precision
= precision
;
5300 min_input_precision
= stmt_info
->min_output_precision
;
5306 tree shift
= gimple_assign_rhs2 (stmt
);
5307 if (TREE_CODE (shift
) != INTEGER_CST
5308 || !wi::ltu_p (wi::to_widest (shift
), precision
))
5310 unsigned int const_shift
= TREE_INT_CST_LOW (shift
);
5311 if (code
== LSHIFT_EXPR
)
5313 /* Avoid creating an undefined shift.
5315 ??? We could instead use min_output_precision as-is and
5316 optimize out-of-range shifts to zero. However, only
5317 degenerate testcases shift away all their useful input data,
5318 and it isn't natural to drop input operations in the middle
5319 of vectorization. This sort of thing should really be
5320 handled before vectorization. */
5321 operation_precision
= MAX (stmt_info
->min_output_precision
,
5323 /* We need CONST_SHIFT fewer bits of the input. */
5324 min_input_precision
= (MAX (operation_precision
, const_shift
)
5329 /* We need CONST_SHIFT extra bits to do the operation. */
5330 operation_precision
= (stmt_info
->min_output_precision
5332 min_input_precision
= operation_precision
;
5338 if (vect_truncatable_operation_p (code
))
5340 /* Input bit N has no effect on output bits N-1 and lower. */
5341 operation_precision
= stmt_info
->min_output_precision
;
5342 min_input_precision
= operation_precision
;
5348 if (operation_precision
< precision
)
5350 if (dump_enabled_p ())
5351 dump_printf_loc (MSG_NOTE
, vect_location
, "can narrow to %s:%d"
5352 " without affecting users: %G",
5353 TYPE_UNSIGNED (type
) ? "unsigned" : "signed",
5354 operation_precision
, (gimple
*) stmt
);
5355 vect_set_operation_type (stmt_info
, type
, operation_precision
,
5358 vect_set_min_input_precision (stmt_info
, type
, min_input_precision
);
5361 /* Return true if the statement described by STMT_INFO sets a boolean
5362 SSA_NAME and if we know how to vectorize this kind of statement using
5363 vector mask types. */
5366 possible_vector_mask_operation_p (stmt_vec_info stmt_info
)
5368 tree lhs
= gimple_get_lhs (stmt_info
->stmt
);
5370 || TREE_CODE (lhs
) != SSA_NAME
5371 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs
)))
5374 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
5376 tree_code rhs_code
= gimple_assign_rhs_code (assign
);
5388 return TREE_CODE_CLASS (rhs_code
) == tcc_comparison
;
5391 else if (is_a
<gphi
*> (stmt_info
->stmt
))
5396 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
5397 a vector mask type instead of a normal vector type. Record the
5398 result in STMT_INFO->mask_precision. */
5401 vect_determine_mask_precision (vec_info
*vinfo
, stmt_vec_info stmt_info
)
5403 if (!possible_vector_mask_operation_p (stmt_info
))
5406 /* If at least one boolean input uses a vector mask type,
5407 pick the mask type with the narrowest elements.
5409 ??? This is the traditional behavior. It should always produce
5410 the smallest number of operations, but isn't necessarily the
5411 optimal choice. For example, if we have:
5417 - the user of a wants it to have a mask type for 16-bit elements (M16)
5419 - c uses a mask type for 8-bit elements (M8)
5421 then picking M8 gives:
5423 - 1 M16->M8 pack for b
5425 - 2 M8->M16 unpacks for the user of a
5427 whereas picking M16 would have given:
5429 - 2 M8->M16 unpacks for c
5432 The number of operations are equal, but M16 would have given
5433 a shorter dependency chain and allowed more ILP. */
5434 unsigned int precision
= ~0U;
5435 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
5437 unsigned int nops
= gimple_num_ops (assign
);
5438 for (unsigned int i
= 1; i
< nops
; ++i
)
5440 tree rhs
= gimple_op (assign
, i
);
5441 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs
)))
5444 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (rhs
);
5446 /* Don't let external or constant operands influence the choice.
5447 We can convert them to whichever vector type we pick. */
5450 if (def_stmt_info
->mask_precision
)
5452 if (precision
> def_stmt_info
->mask_precision
)
5453 precision
= def_stmt_info
->mask_precision
;
5457 /* If the statement compares two values that shouldn't use vector masks,
5458 try comparing the values as normal scalars instead. */
5459 tree_code rhs_code
= gimple_assign_rhs_code (assign
);
5460 if (precision
== ~0U
5461 && TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
5463 tree rhs1_type
= TREE_TYPE (gimple_assign_rhs1 (assign
));
5465 tree vectype
, mask_type
;
5466 if (is_a
<scalar_mode
> (TYPE_MODE (rhs1_type
), &mode
)
5467 && (vectype
= get_vectype_for_scalar_type (vinfo
, rhs1_type
))
5468 && (mask_type
= get_mask_type_for_scalar_type (vinfo
, rhs1_type
))
5469 && expand_vec_cmp_expr_p (vectype
, mask_type
, rhs_code
))
5470 precision
= GET_MODE_BITSIZE (mode
);
5475 gphi
*phi
= as_a
<gphi
*> (stmt_info
->stmt
);
5476 for (unsigned i
= 0; i
< gimple_phi_num_args (phi
); ++i
)
5478 tree rhs
= gimple_phi_arg_def (phi
, i
);
5480 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (rhs
);
5482 /* Don't let external or constant operands influence the choice.
5483 We can convert them to whichever vector type we pick. */
5486 if (def_stmt_info
->mask_precision
)
5488 if (precision
> def_stmt_info
->mask_precision
)
5489 precision
= def_stmt_info
->mask_precision
;
5494 if (dump_enabled_p ())
5496 if (precision
== ~0U)
5497 dump_printf_loc (MSG_NOTE
, vect_location
,
5498 "using normal nonmask vectors for %G",
5501 dump_printf_loc (MSG_NOTE
, vect_location
,
5502 "using boolean precision %d for %G",
5503 precision
, stmt_info
->stmt
);
5506 stmt_info
->mask_precision
= precision
;
5509 /* Handle vect_determine_precisions for STMT_INFO, given that we
5510 have already done so for the users of its result. */
5513 vect_determine_stmt_precisions (vec_info
*vinfo
, stmt_vec_info stmt_info
)
5515 vect_determine_min_output_precision (vinfo
, stmt_info
);
5516 if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
5518 vect_determine_precisions_from_range (stmt_info
, stmt
);
5519 vect_determine_precisions_from_users (stmt_info
, stmt
);
5523 /* Walk backwards through the vectorizable region to determine the
5524 values of these fields:
5526 - min_output_precision
5527 - min_input_precision
5528 - operation_precision
5529 - operation_sign. */
5532 vect_determine_precisions (vec_info
*vinfo
)
5534 DUMP_VECT_SCOPE ("vect_determine_precisions");
5536 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
5538 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5539 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
5540 unsigned int nbbs
= loop
->num_nodes
;
5542 for (unsigned int i
= 0; i
< nbbs
; i
++)
5544 basic_block bb
= bbs
[i
];
5545 for (auto gsi
= gsi_start_phis (bb
);
5546 !gsi_end_p (gsi
); gsi_next (&gsi
))
5548 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi
.phi ());
5550 vect_determine_mask_precision (vinfo
, stmt_info
);
5552 for (auto si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
5553 if (!is_gimple_debug (gsi_stmt (si
)))
5554 vect_determine_mask_precision
5555 (vinfo
, vinfo
->lookup_stmt (gsi_stmt (si
)));
5557 for (unsigned int i
= 0; i
< nbbs
; i
++)
5559 basic_block bb
= bbs
[nbbs
- i
- 1];
5560 for (gimple_stmt_iterator si
= gsi_last_bb (bb
);
5561 !gsi_end_p (si
); gsi_prev (&si
))
5562 if (!is_gimple_debug (gsi_stmt (si
)))
5563 vect_determine_stmt_precisions
5564 (vinfo
, vinfo
->lookup_stmt (gsi_stmt (si
)));
5565 for (auto gsi
= gsi_start_phis (bb
);
5566 !gsi_end_p (gsi
); gsi_next (&gsi
))
5568 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi
.phi ());
5570 vect_determine_stmt_precisions (vinfo
, stmt_info
);
5576 bb_vec_info bb_vinfo
= as_a
<bb_vec_info
> (vinfo
);
5577 for (unsigned i
= 0; i
< bb_vinfo
->bbs
.length (); ++i
)
5579 basic_block bb
= bb_vinfo
->bbs
[i
];
5580 for (auto gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
5582 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi
.phi ());
5583 if (stmt_info
&& STMT_VINFO_VECTORIZABLE (stmt_info
))
5584 vect_determine_mask_precision (vinfo
, stmt_info
);
5586 for (auto gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
5588 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi_stmt (gsi
));
5589 if (stmt_info
&& STMT_VINFO_VECTORIZABLE (stmt_info
))
5590 vect_determine_mask_precision (vinfo
, stmt_info
);
5593 for (int i
= bb_vinfo
->bbs
.length () - 1; i
!= -1; --i
)
5595 for (gimple_stmt_iterator gsi
= gsi_last_bb (bb_vinfo
->bbs
[i
]);
5596 !gsi_end_p (gsi
); gsi_prev (&gsi
))
5598 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi_stmt (gsi
));
5599 if (stmt_info
&& STMT_VINFO_VECTORIZABLE (stmt_info
))
5600 vect_determine_stmt_precisions (vinfo
, stmt_info
);
5602 for (auto gsi
= gsi_start_phis (bb_vinfo
->bbs
[i
]);
5603 !gsi_end_p (gsi
); gsi_next (&gsi
))
5605 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi
.phi ());
5606 if (stmt_info
&& STMT_VINFO_VECTORIZABLE (stmt_info
))
5607 vect_determine_stmt_precisions (vinfo
, stmt_info
);
5613 typedef gimple
*(*vect_recog_func_ptr
) (vec_info
*, stmt_vec_info
, tree
*);
5615 struct vect_recog_func
5617 vect_recog_func_ptr fn
;
5621 /* Note that ordering matters - the first pattern matching on a stmt is
5622 taken which means usually the more complex one needs to preceed the
5623 less comples onex (widen_sum only after dot_prod or sad for example). */
5624 static vect_recog_func vect_vect_recog_func_ptrs
[] = {
5625 { vect_recog_over_widening_pattern
, "over_widening" },
5626 /* Must come after over_widening, which narrows the shift as much as
5627 possible beforehand. */
5628 { vect_recog_average_pattern
, "average" },
5629 { vect_recog_cond_expr_convert_pattern
, "cond_expr_convert" },
5630 { vect_recog_mulhs_pattern
, "mult_high" },
5631 { vect_recog_cast_forwprop_pattern
, "cast_forwprop" },
5632 { vect_recog_widen_mult_pattern
, "widen_mult" },
5633 { vect_recog_dot_prod_pattern
, "dot_prod" },
5634 { vect_recog_sad_pattern
, "sad" },
5635 { vect_recog_widen_sum_pattern
, "widen_sum" },
5636 { vect_recog_pow_pattern
, "pow" },
5637 { vect_recog_popcount_pattern
, "popcount" },
5638 { vect_recog_widen_shift_pattern
, "widen_shift" },
5639 { vect_recog_rotate_pattern
, "rotate" },
5640 { vect_recog_vector_vector_shift_pattern
, "vector_vector_shift" },
5641 { vect_recog_divmod_pattern
, "divmod" },
5642 { vect_recog_mult_pattern
, "mult" },
5643 { vect_recog_mixed_size_cond_pattern
, "mixed_size_cond" },
5644 { vect_recog_bool_pattern
, "bool" },
5645 /* This must come before mask conversion, and includes the parts
5646 of mask conversion that are needed for gather and scatter
5647 internal functions. */
5648 { vect_recog_gather_scatter_pattern
, "gather_scatter" },
5649 { vect_recog_mask_conversion_pattern
, "mask_conversion" },
5650 { vect_recog_widen_plus_pattern
, "widen_plus" },
5651 { vect_recog_widen_minus_pattern
, "widen_minus" },
5654 const unsigned int NUM_PATTERNS
= ARRAY_SIZE (vect_vect_recog_func_ptrs
);
5656 /* Mark statements that are involved in a pattern. */
5659 vect_mark_pattern_stmts (vec_info
*vinfo
,
5660 stmt_vec_info orig_stmt_info
, gimple
*pattern_stmt
,
5661 tree pattern_vectype
)
5663 stmt_vec_info orig_stmt_info_saved
= orig_stmt_info
;
5664 gimple
*def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
5666 gimple
*orig_pattern_stmt
= NULL
;
5667 if (is_pattern_stmt_p (orig_stmt_info
))
5669 /* We're replacing a statement in an existing pattern definition
5671 orig_pattern_stmt
= orig_stmt_info
->stmt
;
5672 if (dump_enabled_p ())
5673 dump_printf_loc (MSG_NOTE
, vect_location
,
5674 "replacing earlier pattern %G", orig_pattern_stmt
);
5676 /* To keep the book-keeping simple, just swap the lhs of the
5677 old and new statements, so that the old one has a valid but
5679 tree old_lhs
= gimple_get_lhs (orig_pattern_stmt
);
5680 gimple_set_lhs (orig_pattern_stmt
, gimple_get_lhs (pattern_stmt
));
5681 gimple_set_lhs (pattern_stmt
, old_lhs
);
5683 if (dump_enabled_p ())
5684 dump_printf_loc (MSG_NOTE
, vect_location
, "with %G", pattern_stmt
);
5686 /* Switch to the statement that ORIG replaces. */
5687 orig_stmt_info
= STMT_VINFO_RELATED_STMT (orig_stmt_info
);
5689 /* We shouldn't be replacing the main pattern statement. */
5690 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info
)->stmt
5691 != orig_pattern_stmt
);
5695 for (gimple_stmt_iterator si
= gsi_start (def_seq
);
5696 !gsi_end_p (si
); gsi_next (&si
))
5698 if (dump_enabled_p ())
5699 dump_printf_loc (MSG_NOTE
, vect_location
,
5700 "extra pattern stmt: %G", gsi_stmt (si
));
5701 stmt_vec_info pattern_stmt_info
5702 = vect_init_pattern_stmt (vinfo
, gsi_stmt (si
),
5703 orig_stmt_info
, pattern_vectype
);
5704 /* Stmts in the def sequence are not vectorizable cycle or
5705 induction defs, instead they should all be vect_internal_def
5706 feeding the main pattern stmt which retains this def type. */
5707 STMT_VINFO_DEF_TYPE (pattern_stmt_info
) = vect_internal_def
;
5710 if (orig_pattern_stmt
)
5712 vect_init_pattern_stmt (vinfo
, pattern_stmt
,
5713 orig_stmt_info
, pattern_vectype
);
5715 /* Insert all the new pattern statements before the original one. */
5716 gimple_seq
*orig_def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
5717 gimple_stmt_iterator gsi
= gsi_for_stmt (orig_pattern_stmt
,
5719 gsi_insert_seq_before_without_update (&gsi
, def_seq
, GSI_SAME_STMT
);
5720 gsi_insert_before_without_update (&gsi
, pattern_stmt
, GSI_SAME_STMT
);
5722 /* Remove the pattern statement that this new pattern replaces. */
5723 gsi_remove (&gsi
, false);
5726 vect_set_pattern_stmt (vinfo
,
5727 pattern_stmt
, orig_stmt_info
, pattern_vectype
);
5729 /* Transfer reduction path info to the pattern. */
5730 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved
) != -1)
5733 if (!gimple_extract_op (orig_stmt_info_saved
->stmt
, &op
))
5735 tree lookfor
= op
.ops
[STMT_VINFO_REDUC_IDX (orig_stmt_info
)];
5736 /* Search the pattern def sequence and the main pattern stmt. Note
5737 we may have inserted all into a containing pattern def sequence
5738 so the following is a bit awkward. */
5739 gimple_stmt_iterator si
;
5743 si
= gsi_start (def_seq
);
5755 if (gimple_extract_op (s
, &op
))
5756 for (unsigned i
= 0; i
< op
.num_ops
; ++i
)
5757 if (op
.ops
[i
] == lookfor
)
5759 STMT_VINFO_REDUC_IDX (vinfo
->lookup_stmt (s
)) = i
;
5760 lookfor
= gimple_get_lhs (s
);
5764 if (s
== pattern_stmt
)
5766 if (!found
&& dump_enabled_p ())
5767 dump_printf_loc (MSG_NOTE
, vect_location
,
5768 "failed to update reduction index.\n");
5776 if (s
== pattern_stmt
)
5777 /* Found the end inside a bigger pattern def seq. */
5786 /* Function vect_pattern_recog_1
5789 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
5790 computation pattern.
5791 STMT_INFO: A stmt from which the pattern search should start.
5793 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
5794 a sequence of statements that has the same functionality and can be
5795 used to replace STMT_INFO. It returns the last statement in the sequence
5796 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
5797 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
5798 statement, having first checked that the target supports the new operation
5801 This function also does some bookkeeping, as explained in the documentation
5802 for vect_recog_pattern. */
5805 vect_pattern_recog_1 (vec_info
*vinfo
,
5806 vect_recog_func
*recog_func
, stmt_vec_info stmt_info
)
5808 gimple
*pattern_stmt
;
5809 loop_vec_info loop_vinfo
;
5810 tree pattern_vectype
;
5812 /* If this statement has already been replaced with pattern statements,
5813 leave the original statement alone, since the first match wins.
5814 Instead try to match against the definition statements that feed
5815 the main pattern statement. */
5816 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
5818 gimple_stmt_iterator gsi
;
5819 for (gsi
= gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
5820 !gsi_end_p (gsi
); gsi_next (&gsi
))
5821 vect_pattern_recog_1 (vinfo
, recog_func
,
5822 vinfo
->lookup_stmt (gsi_stmt (gsi
)));
5826 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
5827 pattern_stmt
= recog_func
->fn (vinfo
, stmt_info
, &pattern_vectype
);
5830 /* Clear any half-formed pattern definition sequence. */
5831 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
5835 loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5837 /* Found a vectorizable pattern. */
5838 if (dump_enabled_p ())
5839 dump_printf_loc (MSG_NOTE
, vect_location
,
5840 "%s pattern recognized: %G",
5841 recog_func
->name
, pattern_stmt
);
5843 /* Mark the stmts that are involved in the pattern. */
5844 vect_mark_pattern_stmts (vinfo
, stmt_info
, pattern_stmt
, pattern_vectype
);
5846 /* Patterns cannot be vectorized using SLP, because they change the order of
5851 stmt_vec_info
*elem_ptr
;
5852 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo
), ix
, ix2
,
5853 elem_ptr
, *elem_ptr
== stmt_info
);
5858 /* Function vect_pattern_recog
5861 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
5864 Output - for each computation idiom that is detected we create a new stmt
5865 that provides the same functionality and that can be vectorized. We
5866 also record some information in the struct_stmt_info of the relevant
5867 stmts, as explained below:
5869 At the entry to this function we have the following stmts, with the
5870 following initial value in the STMT_VINFO fields:
5872 stmt in_pattern_p related_stmt vec_stmt
5873 S1: a_i = .... - - -
5874 S2: a_2 = ..use(a_i).. - - -
5875 S3: a_1 = ..use(a_2).. - - -
5876 S4: a_0 = ..use(a_1).. - - -
5877 S5: ... = ..use(a_0).. - - -
5879 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
5880 represented by a single stmt. We then:
5881 - create a new stmt S6 equivalent to the pattern (the stmt is not
5882 inserted into the code)
5883 - fill in the STMT_VINFO fields as follows:
5885 in_pattern_p related_stmt vec_stmt
5886 S1: a_i = .... - - -
5887 S2: a_2 = ..use(a_i).. - - -
5888 S3: a_1 = ..use(a_2).. - - -
5889 S4: a_0 = ..use(a_1).. true S6 -
5890 '---> S6: a_new = .... - S4 -
5891 S5: ... = ..use(a_0).. - - -
5893 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
5894 to each other through the RELATED_STMT field).
5896 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
5897 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
5898 remain irrelevant unless used by stmts other than S4.
5900 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
5901 (because they are marked as irrelevant). It will vectorize S6, and record
5902 a pointer to the new vector stmt VS6 from S6 (as usual).
5903 S4 will be skipped, and S5 will be vectorized as usual:
5905 in_pattern_p related_stmt vec_stmt
5906 S1: a_i = .... - - -
5907 S2: a_2 = ..use(a_i).. - - -
5908 S3: a_1 = ..use(a_2).. - - -
5909 > VS6: va_new = .... - - -
5910 S4: a_0 = ..use(a_1).. true S6 VS6
5911 '---> S6: a_new = .... - S4 VS6
5912 > VS5: ... = ..vuse(va_new).. - - -
5913 S5: ... = ..use(a_0).. - - -
5915 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
5916 elsewhere), and we'll end up with:
5919 VS5: ... = ..vuse(va_new)..
5921 In case of more than one pattern statements, e.g., widen-mult with
5925 S2 a_T = (TYPE) a_t;
5926 '--> S3: a_it = (interm_type) a_t;
5927 S4 prod_T = a_T * CONST;
5928 '--> S5: prod_T' = a_it w* CONST;
5930 there may be other users of a_T outside the pattern. In that case S2 will
5931 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
5932 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
5933 be recorded in S3. */
5936 vect_pattern_recog (vec_info
*vinfo
)
5941 gimple_stmt_iterator si
;
5944 vect_determine_precisions (vinfo
);
5946 DUMP_VECT_SCOPE ("vect_pattern_recog");
5948 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
5950 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5951 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
5952 nbbs
= loop
->num_nodes
;
5954 /* Scan through the loop stmts, applying the pattern recognition
5955 functions starting at each stmt visited: */
5956 for (i
= 0; i
< nbbs
; i
++)
5958 basic_block bb
= bbs
[i
];
5959 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
5961 if (is_gimple_debug (gsi_stmt (si
)))
5963 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi_stmt (si
));
5964 /* Scan over all generic vect_recog_xxx_pattern functions. */
5965 for (j
= 0; j
< NUM_PATTERNS
; j
++)
5966 vect_pattern_recog_1 (vinfo
, &vect_vect_recog_func_ptrs
[j
],
5973 bb_vec_info bb_vinfo
= as_a
<bb_vec_info
> (vinfo
);
5974 for (unsigned i
= 0; i
< bb_vinfo
->bbs
.length (); ++i
)
5975 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb_vinfo
->bbs
[i
]);
5976 !gsi_end_p (gsi
); gsi_next (&gsi
))
5978 stmt_vec_info stmt_info
= bb_vinfo
->lookup_stmt (gsi_stmt (gsi
));
5979 if (!stmt_info
|| !STMT_VINFO_VECTORIZABLE (stmt_info
))
5982 /* Scan over all generic vect_recog_xxx_pattern functions. */
5983 for (j
= 0; j
< NUM_PATTERNS
; j
++)
5984 vect_pattern_recog_1 (vinfo
,
5985 &vect_vect_recog_func_ptrs
[j
], stmt_info
);
5989 /* After this no more add_stmt calls are allowed. */
5990 vinfo
->stmt_vec_info_ro
= true;