1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2023 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
28 #include "gimple-iterator.h"
29 #include "gimple-fold.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "fold-const.h"
36 #include "stor-layout.h"
39 #include "gimple-iterator.h"
40 #include "gimple-fold.h"
41 #include "gimplify-me.h"
43 #include "tree-vectorizer.h"
46 #include "internal-fn.h"
47 #include "case-cfn-macros.h"
48 #include "fold-const-call.h"
51 #include "omp-simd-clone.h"
53 #include "tree-vector-builder.h"
54 #include "vec-perm-indices.h"
55 #include "gimple-range.h"
58 /* TODO: Note the vectorizer still builds COND_EXPRs with GENERIC compares
59 in the first operand. Disentangling this is future work, the
60 IL is properly transfered to VEC_COND_EXPRs with separate compares. */
63 /* Return true if we have a useful VR_RANGE range for VAR, storing it
64 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
67 vect_get_range_info (tree var
, wide_int
*min_value
, wide_int
*max_value
)
71 get_range_query (cfun
)->range_of_expr (vr
, var
);
72 if (vr
.undefined_p ())
73 vr
.set_varying (TREE_TYPE (var
));
74 value_range_kind vr_type
= get_legacy_range (vr
, vr_min
, vr_max
);
75 *min_value
= wi::to_wide (vr_min
);
76 *max_value
= wi::to_wide (vr_max
);
77 wide_int nonzero
= get_nonzero_bits (var
);
78 signop sgn
= TYPE_SIGN (TREE_TYPE (var
));
79 if (intersect_range_with_nonzero_bits (vr_type
, min_value
, max_value
,
80 nonzero
, sgn
) == VR_RANGE
)
82 if (dump_enabled_p ())
84 dump_generic_expr_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, var
);
85 dump_printf (MSG_NOTE
, " has range [");
86 dump_hex (MSG_NOTE
, *min_value
);
87 dump_printf (MSG_NOTE
, ", ");
88 dump_hex (MSG_NOTE
, *max_value
);
89 dump_printf (MSG_NOTE
, "]\n");
95 if (dump_enabled_p ())
97 dump_generic_expr_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, var
);
98 dump_printf (MSG_NOTE
, " has no range info\n");
104 /* Report that we've found an instance of pattern PATTERN in
108 vect_pattern_detected (const char *name
, gimple
*stmt
)
110 if (dump_enabled_p ())
111 dump_printf_loc (MSG_NOTE
, vect_location
, "%s: detected: %G", name
, stmt
);
114 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
115 return the pattern statement's stmt_vec_info. Set its vector type to
116 VECTYPE if it doesn't have one already. */
119 vect_init_pattern_stmt (vec_info
*vinfo
, gimple
*pattern_stmt
,
120 stmt_vec_info orig_stmt_info
, tree vectype
)
122 stmt_vec_info pattern_stmt_info
= vinfo
->lookup_stmt (pattern_stmt
);
123 if (pattern_stmt_info
== NULL
)
124 pattern_stmt_info
= vinfo
->add_stmt (pattern_stmt
);
125 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt_info
->stmt
));
127 pattern_stmt_info
->pattern_stmt_p
= true;
128 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt_info
;
129 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
130 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
131 STMT_VINFO_TYPE (pattern_stmt_info
) = STMT_VINFO_TYPE (orig_stmt_info
);
132 if (!STMT_VINFO_VECTYPE (pattern_stmt_info
))
135 || (VECTOR_BOOLEAN_TYPE_P (vectype
)
136 == vect_use_mask_type_p (orig_stmt_info
)));
137 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vectype
;
138 pattern_stmt_info
->mask_precision
= orig_stmt_info
->mask_precision
;
140 return pattern_stmt_info
;
143 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
144 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
148 vect_set_pattern_stmt (vec_info
*vinfo
, gimple
*pattern_stmt
,
149 stmt_vec_info orig_stmt_info
, tree vectype
)
151 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
152 STMT_VINFO_RELATED_STMT (orig_stmt_info
)
153 = vect_init_pattern_stmt (vinfo
, pattern_stmt
, orig_stmt_info
, vectype
);
156 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
157 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
158 be different from the vector type of the final pattern statement.
159 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
160 from which it was derived. */
163 append_pattern_def_seq (vec_info
*vinfo
,
164 stmt_vec_info stmt_info
, gimple
*new_stmt
,
165 tree vectype
= NULL_TREE
,
166 tree scalar_type_for_mask
= NULL_TREE
)
168 gcc_assert (!scalar_type_for_mask
169 == (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
)));
172 stmt_vec_info new_stmt_info
= vinfo
->add_stmt (new_stmt
);
173 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
174 if (scalar_type_for_mask
)
175 new_stmt_info
->mask_precision
176 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask
));
178 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
182 /* The caller wants to perform new operations on vect_external variable
183 VAR, so that the result of the operations would also be vect_external.
184 Return the edge on which the operations can be performed, if one exists.
185 Return null if the operations should instead be treated as part of
186 the pattern that needs them. */
189 vect_get_external_def_edge (vec_info
*vinfo
, tree var
)
192 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
194 e
= loop_preheader_edge (loop_vinfo
->loop
);
195 if (!SSA_NAME_IS_DEFAULT_DEF (var
))
197 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
199 || !dominated_by_p (CDI_DOMINATORS
, e
->dest
, bb
))
206 /* Return true if the target supports a vector version of CODE,
207 where CODE is known to map to a direct optab with the given SUBTYPE.
208 ITYPE specifies the type of (some of) the scalar inputs and OTYPE
209 specifies the type of the scalar result.
211 If CODE allows the inputs and outputs to have different type
212 (such as for WIDEN_SUM_EXPR), it is the input mode rather
213 than the output mode that determines the appropriate target pattern.
214 Operand 0 of the target pattern then specifies the mode that the output
217 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
218 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
222 vect_supportable_direct_optab_p (vec_info
*vinfo
, tree otype
, tree_code code
,
223 tree itype
, tree
*vecotype_out
,
224 tree
*vecitype_out
= NULL
,
225 enum optab_subtype subtype
= optab_default
)
227 tree vecitype
= get_vectype_for_scalar_type (vinfo
, itype
);
231 tree vecotype
= get_vectype_for_scalar_type (vinfo
, otype
);
235 optab optab
= optab_for_tree_code (code
, vecitype
, subtype
);
239 insn_code icode
= optab_handler (optab
, TYPE_MODE (vecitype
));
240 if (icode
== CODE_FOR_nothing
241 || insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (vecotype
))
244 *vecotype_out
= vecotype
;
246 *vecitype_out
= vecitype
;
250 /* Round bit precision PRECISION up to a full element. */
253 vect_element_precision (unsigned int precision
)
255 precision
= 1 << ceil_log2 (precision
);
256 return MAX (precision
, BITS_PER_UNIT
);
259 /* If OP is defined by a statement that's being considered for vectorization,
260 return information about that statement, otherwise return NULL. */
263 vect_get_internal_def (vec_info
*vinfo
, tree op
)
265 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (op
);
267 && STMT_VINFO_DEF_TYPE (def_stmt_info
) == vect_internal_def
)
268 return def_stmt_info
;
272 /* Check whether NAME, an ssa-name used in STMT_VINFO,
273 is a result of a type promotion, such that:
274 DEF_STMT: NAME = NOP (name0)
275 If CHECK_SIGN is TRUE, check that either both types are signed or both are
279 type_conversion_p (vec_info
*vinfo
, tree name
, bool check_sign
,
280 tree
*orig_type
, gimple
**def_stmt
, bool *promotion
)
282 tree type
= TREE_TYPE (name
);
284 enum vect_def_type dt
;
286 stmt_vec_info def_stmt_info
;
287 if (!vect_is_simple_use (name
, vinfo
, &dt
, &def_stmt_info
, def_stmt
))
290 if (dt
!= vect_internal_def
291 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
297 if (!is_gimple_assign (*def_stmt
))
300 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt
)))
303 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
305 *orig_type
= TREE_TYPE (oprnd0
);
306 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*orig_type
)
307 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*orig_type
)) && check_sign
))
310 if (TYPE_PRECISION (type
) >= (TYPE_PRECISION (*orig_type
) * 2))
315 if (!vect_is_simple_use (oprnd0
, vinfo
, &dt
))
321 /* Holds information about an input operand after some sign changes
322 and type promotions have been peeled away. */
323 class vect_unpromoted_value
{
325 vect_unpromoted_value ();
327 void set_op (tree
, vect_def_type
, stmt_vec_info
= NULL
);
329 /* The value obtained after peeling away zero or more casts. */
332 /* The type of OP. */
335 /* The definition type of OP. */
338 /* If OP is the result of peeling at least one cast, and if the cast
339 of OP itself is a vectorizable statement, CASTER identifies that
340 statement, otherwise it is null. */
341 stmt_vec_info caster
;
344 inline vect_unpromoted_value::vect_unpromoted_value ()
347 dt (vect_uninitialized_def
),
352 /* Set the operand to OP_IN, its definition type to DT_IN, and the
353 statement that casts it to CASTER_IN. */
356 vect_unpromoted_value::set_op (tree op_in
, vect_def_type dt_in
,
357 stmt_vec_info caster_in
)
360 type
= TREE_TYPE (op
);
365 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
366 to reach some vectorizable inner operand OP', continuing as long as it
367 is possible to convert OP' back to OP using a possible sign change
368 followed by a possible promotion P. Return this OP', or null if OP is
369 not a vectorizable SSA name. If there is a promotion P, describe its
370 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
371 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
372 have more than one user.
374 A successful return means that it is possible to go from OP' to OP
375 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
376 whereas the cast from UNPROM to OP might be a promotion, a sign
381 signed short *ptr = ...;
382 signed short C = *ptr;
383 unsigned short B = (unsigned short) C; // sign change
384 signed int A = (signed int) B; // unsigned promotion
385 ...possible other uses of A...
386 unsigned int OP = (unsigned int) A; // sign change
388 In this case it's possible to go directly from C to OP using:
390 OP = (unsigned int) (unsigned short) C;
391 +------------+ +--------------+
392 promotion sign change
394 so OP' would be C. The input to the promotion is B, so UNPROM
398 vect_look_through_possible_promotion (vec_info
*vinfo
, tree op
,
399 vect_unpromoted_value
*unprom
,
400 bool *single_use_p
= NULL
)
402 tree op_type
= TREE_TYPE (op
);
403 if (!INTEGRAL_TYPE_P (op_type
))
406 tree res
= NULL_TREE
;
407 unsigned int orig_precision
= TYPE_PRECISION (op_type
);
408 unsigned int min_precision
= orig_precision
;
409 stmt_vec_info caster
= NULL
;
410 while (TREE_CODE (op
) == SSA_NAME
&& INTEGRAL_TYPE_P (op_type
))
412 /* See whether OP is simple enough to vectorize. */
413 stmt_vec_info def_stmt_info
;
416 if (!vect_is_simple_use (op
, vinfo
, &dt
, &def_stmt_info
, &def_stmt
))
419 /* If OP is the input of a demotion, skip over it to see whether
420 OP is itself the result of a promotion. If so, the combined
421 effect of the promotion and the demotion might fit the required
422 pattern, otherwise neither operation fits.
424 This copes with cases such as the result of an arithmetic
425 operation being truncated before being stored, and where that
426 arithmetic operation has been recognized as an over-widened one. */
427 if (TYPE_PRECISION (op_type
) <= min_precision
)
429 /* Use OP as the UNPROM described above if we haven't yet
430 found a promotion, or if using the new input preserves the
431 sign of the previous promotion. */
433 || TYPE_PRECISION (unprom
->type
) == orig_precision
434 || TYPE_SIGN (unprom
->type
) == TYPE_SIGN (op_type
))
436 unprom
->set_op (op
, dt
, caster
);
437 min_precision
= TYPE_PRECISION (op_type
);
439 /* Stop if we've already seen a promotion and if this
440 conversion does more than change the sign. */
441 else if (TYPE_PRECISION (op_type
)
442 != TYPE_PRECISION (unprom
->type
))
445 /* The sequence now extends to OP. */
449 /* See whether OP is defined by a cast. Record it as CASTER if
450 the cast is potentially vectorizable. */
453 caster
= def_stmt_info
;
455 /* Ignore pattern statements, since we don't link uses for them. */
458 && !STMT_VINFO_RELATED_STMT (caster
)
459 && !has_single_use (res
))
460 *single_use_p
= false;
462 gassign
*assign
= dyn_cast
<gassign
*> (def_stmt
);
463 if (!assign
|| !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt
)))
466 /* Continue with the input to the cast. */
467 op
= gimple_assign_rhs1 (def_stmt
);
468 op_type
= TREE_TYPE (op
);
473 /* OP is an integer operand to an operation that returns TYPE, and we
474 want to treat the operation as a widening one. So far we can treat
475 it as widening from *COMMON_TYPE.
477 Return true if OP is suitable for such a widening operation,
478 either widening from *COMMON_TYPE or from some supertype of it.
479 Update *COMMON_TYPE to the supertype in the latter case.
481 SHIFT_P is true if OP is a shift amount. */
484 vect_joust_widened_integer (tree type
, bool shift_p
, tree op
,
487 /* Calculate the minimum precision required by OP, without changing
488 the sign of either operand. */
489 unsigned int precision
;
492 if (!wi::leu_p (wi::to_widest (op
), TYPE_PRECISION (type
) / 2))
494 precision
= TREE_INT_CST_LOW (op
);
498 precision
= wi::min_precision (wi::to_widest (op
),
499 TYPE_SIGN (*common_type
));
500 if (precision
* 2 > TYPE_PRECISION (type
))
504 /* If OP requires a wider type, switch to that type. The checks
505 above ensure that this is still narrower than the result. */
506 precision
= vect_element_precision (precision
);
507 if (TYPE_PRECISION (*common_type
) < precision
)
508 *common_type
= build_nonstandard_integer_type
509 (precision
, TYPE_UNSIGNED (*common_type
));
513 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
514 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
517 vect_joust_widened_type (tree type
, tree new_type
, tree
*common_type
)
519 if (types_compatible_p (*common_type
, new_type
))
522 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
523 if ((TYPE_PRECISION (new_type
) < TYPE_PRECISION (*common_type
))
524 && (TYPE_UNSIGNED (new_type
) || !TYPE_UNSIGNED (*common_type
)))
527 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
528 if (TYPE_PRECISION (*common_type
) < TYPE_PRECISION (new_type
)
529 && (TYPE_UNSIGNED (*common_type
) || !TYPE_UNSIGNED (new_type
)))
531 *common_type
= new_type
;
535 /* We have mismatched signs, with the signed type being
536 no wider than the unsigned type. In this case we need
537 a wider signed type. */
538 unsigned int precision
= MAX (TYPE_PRECISION (*common_type
),
539 TYPE_PRECISION (new_type
));
542 if (precision
* 2 > TYPE_PRECISION (type
))
545 *common_type
= build_nonstandard_integer_type (precision
, false);
549 /* Check whether STMT_INFO can be viewed as a tree of integer operations
550 in which each node either performs CODE or WIDENED_CODE, and where
551 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
552 specifies the maximum number of leaf operands. SHIFT_P says whether
553 CODE and WIDENED_CODE are some sort of shift.
555 If STMT_INFO is such a tree, return the number of leaf operands
556 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
557 to a type that (a) is narrower than the result of STMT_INFO and
558 (b) can hold all leaf operand values.
560 If SUBTYPE then allow that the signs of the operands
561 may differ in signs but not in precision. SUBTYPE is updated to reflect
564 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
568 vect_widened_op_tree (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree_code code
,
569 code_helper widened_code
, bool shift_p
,
570 unsigned int max_nops
,
571 vect_unpromoted_value
*unprom
, tree
*common_type
,
572 enum optab_subtype
*subtype
= NULL
)
574 /* Check for an integer operation with the right code. */
575 gimple
* stmt
= stmt_info
->stmt
;
576 if (!(is_gimple_assign (stmt
) || is_gimple_call (stmt
)))
579 code_helper rhs_code
;
580 if (is_gimple_assign (stmt
))
581 rhs_code
= gimple_assign_rhs_code (stmt
);
582 else if (is_gimple_call (stmt
))
583 rhs_code
= gimple_call_combined_fn (stmt
);
588 && rhs_code
!= widened_code
)
591 tree lhs
= gimple_get_lhs (stmt
);
592 tree type
= TREE_TYPE (lhs
);
593 if (!INTEGRAL_TYPE_P (type
))
596 /* Assume that both operands will be leaf operands. */
599 /* Check the operands. */
600 unsigned int next_op
= 0;
601 for (unsigned int i
= 0; i
< 2; ++i
)
603 vect_unpromoted_value
*this_unprom
= &unprom
[next_op
];
604 unsigned int nops
= 1;
605 tree op
= gimple_arg (stmt
, i
);
606 if (i
== 1 && TREE_CODE (op
) == INTEGER_CST
)
608 /* We already have a common type from earlier operands.
609 Update it to account for OP. */
610 this_unprom
->set_op (op
, vect_constant_def
);
611 if (!vect_joust_widened_integer (type
, shift_p
, op
, common_type
))
616 /* Only allow shifts by constants. */
617 if (shift_p
&& i
== 1)
620 if (rhs_code
!= code
)
622 /* If rhs_code is widened_code, don't look through further
623 possible promotions, there is a promotion already embedded
624 in the WIDEN_*_EXPR. */
625 if (TREE_CODE (op
) != SSA_NAME
626 || !INTEGRAL_TYPE_P (TREE_TYPE (op
)))
629 stmt_vec_info def_stmt_info
;
632 if (!vect_is_simple_use (op
, vinfo
, &dt
, &def_stmt_info
,
635 this_unprom
->set_op (op
, dt
, NULL
);
637 else if (!vect_look_through_possible_promotion (vinfo
, op
,
641 if (TYPE_PRECISION (this_unprom
->type
) == TYPE_PRECISION (type
))
643 /* The operand isn't widened. If STMT_INFO has the code
644 for an unwidened operation, recursively check whether
645 this operand is a node of the tree. */
648 || this_unprom
->dt
!= vect_internal_def
)
651 /* Give back the leaf slot allocated above now that we're
652 not treating this as a leaf operand. */
655 /* Recursively process the definition of the operand. */
656 stmt_vec_info def_stmt_info
657 = vinfo
->lookup_def (this_unprom
->op
);
658 nops
= vect_widened_op_tree (vinfo
, def_stmt_info
, code
,
659 widened_code
, shift_p
, max_nops
,
660 this_unprom
, common_type
,
669 /* Make sure that the operand is narrower than the result. */
670 if (TYPE_PRECISION (this_unprom
->type
) * 2
671 > TYPE_PRECISION (type
))
674 /* Update COMMON_TYPE for the new operand. */
676 *common_type
= this_unprom
->type
;
677 else if (!vect_joust_widened_type (type
, this_unprom
->type
,
682 /* See if we can sign extend the smaller type. */
683 if (TYPE_PRECISION (this_unprom
->type
)
684 > TYPE_PRECISION (*common_type
))
685 *common_type
= this_unprom
->type
;
686 *subtype
= optab_vector_mixed_sign
;
698 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
699 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
702 vect_recog_temp_ssa_var (tree type
, gimple
*stmt
= NULL
)
704 return make_temp_ssa_name (type
, stmt
, "patt");
707 /* STMT2_INFO describes a type conversion that could be split into STMT1
708 followed by a version of STMT2_INFO that takes NEW_RHS as its first
709 input. Try to do this using pattern statements, returning true on
713 vect_split_statement (vec_info
*vinfo
, stmt_vec_info stmt2_info
, tree new_rhs
,
714 gimple
*stmt1
, tree vectype
)
716 if (is_pattern_stmt_p (stmt2_info
))
718 /* STMT2_INFO is part of a pattern. Get the statement to which
719 the pattern is attached. */
720 stmt_vec_info orig_stmt2_info
= STMT_VINFO_RELATED_STMT (stmt2_info
);
721 vect_init_pattern_stmt (vinfo
, stmt1
, orig_stmt2_info
, vectype
);
723 if (dump_enabled_p ())
724 dump_printf_loc (MSG_NOTE
, vect_location
,
725 "Splitting pattern statement: %G", stmt2_info
->stmt
);
727 /* Since STMT2_INFO is a pattern statement, we can change it
728 in-situ without worrying about changing the code for the
730 gimple_assign_set_rhs1 (stmt2_info
->stmt
, new_rhs
);
732 if (dump_enabled_p ())
734 dump_printf_loc (MSG_NOTE
, vect_location
, "into: %G", stmt1
);
735 dump_printf_loc (MSG_NOTE
, vect_location
, "and: %G",
739 gimple_seq
*def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info
);
740 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info
) == stmt2_info
)
741 /* STMT2_INFO is the actual pattern statement. Add STMT1
742 to the end of the definition sequence. */
743 gimple_seq_add_stmt_without_update (def_seq
, stmt1
);
746 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
748 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt2_info
->stmt
, def_seq
);
749 gsi_insert_before_without_update (&gsi
, stmt1
, GSI_SAME_STMT
);
755 /* STMT2_INFO doesn't yet have a pattern. Try to create a
756 two-statement pattern now. */
757 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info
));
758 tree lhs_type
= TREE_TYPE (gimple_get_lhs (stmt2_info
->stmt
));
759 tree lhs_vectype
= get_vectype_for_scalar_type (vinfo
, lhs_type
);
763 if (dump_enabled_p ())
764 dump_printf_loc (MSG_NOTE
, vect_location
,
765 "Splitting statement: %G", stmt2_info
->stmt
);
767 /* Add STMT1 as a singleton pattern definition sequence. */
768 gimple_seq
*def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info
);
769 vect_init_pattern_stmt (vinfo
, stmt1
, stmt2_info
, vectype
);
770 gimple_seq_add_stmt_without_update (def_seq
, stmt1
);
772 /* Build the second of the two pattern statements. */
773 tree new_lhs
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
774 gassign
*new_stmt2
= gimple_build_assign (new_lhs
, NOP_EXPR
, new_rhs
);
775 vect_set_pattern_stmt (vinfo
, new_stmt2
, stmt2_info
, lhs_vectype
);
777 if (dump_enabled_p ())
779 dump_printf_loc (MSG_NOTE
, vect_location
,
780 "into pattern statements: %G", stmt1
);
781 dump_printf_loc (MSG_NOTE
, vect_location
, "and: %G",
782 (gimple
*) new_stmt2
);
789 /* Look for the following pattern
795 ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
796 HALF_TYPE and UNPROM will be set should the statement be found to
797 be a widened operation.
798 DIFF_STMT will be set to the MINUS_EXPR
799 statement that precedes the ABS_STMT unless vect_widened_op_tree
803 vect_recog_absolute_difference (vec_info
*vinfo
, gassign
*abs_stmt
,
805 vect_unpromoted_value unprom
[2],
811 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
812 inside the loop (in case we are analyzing an outer-loop). */
813 enum tree_code code
= gimple_assign_rhs_code (abs_stmt
);
814 if (code
!= ABS_EXPR
&& code
!= ABSU_EXPR
)
817 tree abs_oprnd
= gimple_assign_rhs1 (abs_stmt
);
818 tree abs_type
= TREE_TYPE (abs_oprnd
);
821 if (!ANY_INTEGRAL_TYPE_P (abs_type
)
822 || TYPE_OVERFLOW_WRAPS (abs_type
)
823 || TYPE_UNSIGNED (abs_type
))
826 /* Peel off conversions from the ABS input. This can involve sign
827 changes (e.g. from an unsigned subtraction to a signed ABS input)
828 or signed promotion, but it can't include unsigned promotion.
829 (Note that ABS of an unsigned promotion should have been folded
830 away before now anyway.) */
831 vect_unpromoted_value unprom_diff
;
832 abs_oprnd
= vect_look_through_possible_promotion (vinfo
, abs_oprnd
,
836 if (TYPE_PRECISION (unprom_diff
.type
) != TYPE_PRECISION (abs_type
)
837 && TYPE_UNSIGNED (unprom_diff
.type
))
840 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
841 stmt_vec_info diff_stmt_vinfo
= vect_get_internal_def (vinfo
, abs_oprnd
);
842 if (!diff_stmt_vinfo
)
845 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
846 inside the loop (in case we are analyzing an outer-loop). */
847 if (vect_widened_op_tree (vinfo
, diff_stmt_vinfo
,
848 MINUS_EXPR
, IFN_VEC_WIDEN_MINUS
,
849 false, 2, unprom
, half_type
))
852 /* Failed to find a widen operation so we check for a regular MINUS_EXPR. */
853 gassign
*diff
= dyn_cast
<gassign
*> (STMT_VINFO_STMT (diff_stmt_vinfo
));
854 if (diff_stmt
&& diff
855 && gimple_assign_rhs_code (diff
) == MINUS_EXPR
856 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd
)))
859 *half_type
= NULL_TREE
;
866 /* Convert UNPROM to TYPE and return the result, adding new statements
867 to STMT_INFO's pattern definition statements if no better way is
868 available. VECTYPE is the vector form of TYPE.
870 If SUBTYPE then convert the type based on the subtype. */
873 vect_convert_input (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree type
,
874 vect_unpromoted_value
*unprom
, tree vectype
,
875 enum optab_subtype subtype
= optab_default
)
877 /* Update the type if the signs differ. */
878 if (subtype
== optab_vector_mixed_sign
)
880 gcc_assert (!TYPE_UNSIGNED (type
));
881 if (TYPE_UNSIGNED (TREE_TYPE (unprom
->op
)))
883 type
= unsigned_type_for (type
);
884 vectype
= unsigned_type_for (vectype
);
888 /* Check for a no-op conversion. */
889 if (types_compatible_p (type
, TREE_TYPE (unprom
->op
)))
892 /* Allow the caller to create constant vect_unpromoted_values. */
893 if (TREE_CODE (unprom
->op
) == INTEGER_CST
)
894 return wide_int_to_tree (type
, wi::to_widest (unprom
->op
));
896 tree input
= unprom
->op
;
899 tree lhs
= gimple_get_lhs (unprom
->caster
->stmt
);
900 tree lhs_type
= TREE_TYPE (lhs
);
902 /* If the result of the existing cast is the right width, use it
903 instead of the source of the cast. */
904 if (TYPE_PRECISION (lhs_type
) == TYPE_PRECISION (type
))
906 /* If the precision we want is between the source and result
907 precisions of the existing cast, try splitting the cast into
908 two and tapping into a mid-way point. */
909 else if (TYPE_PRECISION (lhs_type
) > TYPE_PRECISION (type
)
910 && TYPE_PRECISION (type
) > TYPE_PRECISION (unprom
->type
))
912 /* In order to preserve the semantics of the original cast,
913 give the mid-way point the same signedness as the input value.
915 It would be possible to use a signed type here instead if
916 TYPE is signed and UNPROM->TYPE is unsigned, but that would
917 make the sign of the midtype sensitive to the order in
918 which we process the statements, since the signedness of
919 TYPE is the signedness required by just one of possibly
920 many users. Also, unsigned promotions are usually as cheap
921 as or cheaper than signed ones, so it's better to keep an
922 unsigned promotion. */
923 tree midtype
= build_nonstandard_integer_type
924 (TYPE_PRECISION (type
), TYPE_UNSIGNED (unprom
->type
));
925 tree vec_midtype
= get_vectype_for_scalar_type (vinfo
, midtype
);
928 input
= vect_recog_temp_ssa_var (midtype
, NULL
);
929 gassign
*new_stmt
= gimple_build_assign (input
, NOP_EXPR
,
931 if (!vect_split_statement (vinfo
, unprom
->caster
, input
, new_stmt
,
933 append_pattern_def_seq (vinfo
, stmt_info
,
934 new_stmt
, vec_midtype
);
938 /* See if we can reuse an existing result. */
939 if (types_compatible_p (type
, TREE_TYPE (input
)))
943 /* We need a new conversion statement. */
944 tree new_op
= vect_recog_temp_ssa_var (type
, NULL
);
945 gassign
*new_stmt
= gimple_build_assign (new_op
, NOP_EXPR
, input
);
947 /* If OP is an external value, see if we can insert the new statement
948 on an incoming edge. */
949 if (input
== unprom
->op
&& unprom
->dt
== vect_external_def
)
950 if (edge e
= vect_get_external_def_edge (vinfo
, input
))
952 basic_block new_bb
= gsi_insert_on_edge_immediate (e
, new_stmt
);
953 gcc_assert (!new_bb
);
957 /* As a (common) last resort, add the statement to the pattern itself. */
958 append_pattern_def_seq (vinfo
, stmt_info
, new_stmt
, vectype
);
962 /* Invoke vect_convert_input for N elements of UNPROM and store the
963 result in the corresponding elements of RESULT.
965 If SUBTYPE then convert the type based on the subtype. */
968 vect_convert_inputs (vec_info
*vinfo
, stmt_vec_info stmt_info
, unsigned int n
,
969 tree
*result
, tree type
, vect_unpromoted_value
*unprom
,
970 tree vectype
, enum optab_subtype subtype
= optab_default
)
972 for (unsigned int i
= 0; i
< n
; ++i
)
975 for (j
= 0; j
< i
; ++j
)
976 if (unprom
[j
].op
== unprom
[i
].op
)
980 result
[i
] = result
[j
];
982 result
[i
] = vect_convert_input (vinfo
, stmt_info
,
983 type
, &unprom
[i
], vectype
, subtype
);
987 /* The caller has created a (possibly empty) sequence of pattern definition
988 statements followed by a single statement PATTERN_STMT. Cast the result
989 of this final statement to TYPE. If a new statement is needed, add
990 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
991 and return the new statement, otherwise return PATTERN_STMT as-is.
992 VECITYPE is the vector form of PATTERN_STMT's result type. */
995 vect_convert_output (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree type
,
996 gimple
*pattern_stmt
, tree vecitype
)
998 tree lhs
= gimple_get_lhs (pattern_stmt
);
999 if (!types_compatible_p (type
, TREE_TYPE (lhs
)))
1001 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
, vecitype
);
1002 tree cast_var
= vect_recog_temp_ssa_var (type
, NULL
);
1003 pattern_stmt
= gimple_build_assign (cast_var
, NOP_EXPR
, lhs
);
1005 return pattern_stmt
;
1008 /* Return true if STMT_VINFO describes a reduction for which reassociation
1009 is allowed. If STMT_INFO is part of a group, assume that it's part of
1010 a reduction chain and optimistically assume that all statements
1011 except the last allow reassociation.
1012 Also require it to have code CODE and to be a reduction
1013 in the outermost loop. When returning true, store the operands in
1014 *OP0_OUT and *OP1_OUT. */
1017 vect_reassociating_reduction_p (vec_info
*vinfo
,
1018 stmt_vec_info stmt_info
, tree_code code
,
1019 tree
*op0_out
, tree
*op1_out
)
1021 loop_vec_info loop_info
= dyn_cast
<loop_vec_info
> (vinfo
);
1025 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
1026 if (!assign
|| gimple_assign_rhs_code (assign
) != code
)
1029 /* We don't allow changing the order of the computation in the inner-loop
1030 when doing outer-loop vectorization. */
1031 class loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
1032 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
))
1035 if (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
)
1037 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign
)),
1041 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info
) == NULL
)
1044 *op0_out
= gimple_assign_rhs1 (assign
);
1045 *op1_out
= gimple_assign_rhs2 (assign
);
1046 if (commutative_tree_code (code
) && STMT_VINFO_REDUC_IDX (stmt_info
) == 0)
1047 std::swap (*op0_out
, *op1_out
);
1051 /* match.pd function to match
1052 (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
1054 1) @1, @2, c, d, a, b are all integral type.
1055 2) There's single_use for both @1 and @2.
1056 3) a, c have same precision.
1057 4) c and @1 have different precision.
1058 5) c, d are the same type or they can differ in sign when convert is
1061 record a and c and d and @3. */
1063 extern bool gimple_cond_expr_convert_p (tree
, tree
*, tree (*)(tree
));
1065 /* Function vect_recog_cond_expr_convert
1067 Try to find the following pattern:
1072 TYPE_E op_true = (TYPE_E) A;
1073 TYPE_E op_false = (TYPE_E) B;
1075 E = C cmp D ? op_true : op_false;
1078 TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
1079 TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
1080 single_use of op_true and op_false.
1081 TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
1085 * STMT_VINFO: The stmt from which the pattern search begins.
1086 here it starts with E = c cmp D ? op_true : op_false;
1090 TYPE1 E' = C cmp D ? A : B;
1091 TYPE3 E = (TYPE3) E';
1093 There may extra nop_convert for A or B to handle different signness.
1095 * TYPE_OUT: The vector type of the output of this pattern.
1097 * Return value: A new stmt that will be used to replace the sequence of
1098 stmts that constitute the pattern. In this case it will be:
1100 E' = C cmp D ? A : B; is recorded in pattern definition statements; */
1103 vect_recog_cond_expr_convert_pattern (vec_info
*vinfo
,
1104 stmt_vec_info stmt_vinfo
, tree
*type_out
)
1106 gassign
*last_stmt
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
);
1107 tree lhs
, match
[4], temp
, type
, new_lhs
, op2
;
1109 gimple
*pattern_stmt
;
1114 lhs
= gimple_assign_lhs (last_stmt
);
1116 /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
1117 TYPE_PRECISION (A) == TYPE_PRECISION (C). */
1118 if (!gimple_cond_expr_convert_p (lhs
, &match
[0], NULL
))
1121 vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt
);
1124 type
= TREE_TYPE (match
[1]);
1125 if (TYPE_SIGN (type
) != TYPE_SIGN (TREE_TYPE (match
[2])))
1127 op2
= vect_recog_temp_ssa_var (type
, NULL
);
1128 gimple
* nop_stmt
= gimple_build_assign (op2
, NOP_EXPR
, match
[2]);
1129 append_pattern_def_seq (vinfo
, stmt_vinfo
, nop_stmt
,
1130 get_vectype_for_scalar_type (vinfo
, type
));
1133 temp
= vect_recog_temp_ssa_var (type
, NULL
);
1134 cond_stmt
= gimple_build_assign (temp
, build3 (COND_EXPR
, type
, match
[3],
1136 append_pattern_def_seq (vinfo
, stmt_vinfo
, cond_stmt
,
1137 get_vectype_for_scalar_type (vinfo
, type
));
1138 new_lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
1139 pattern_stmt
= gimple_build_assign (new_lhs
, NOP_EXPR
, temp
);
1140 *type_out
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1142 if (dump_enabled_p ())
1143 dump_printf_loc (MSG_NOTE
, vect_location
,
1144 "created pattern stmt: %G", pattern_stmt
);
1145 return pattern_stmt
;
1148 /* Function vect_recog_dot_prod_pattern
1150 Try to find the following pattern:
1157 sum_0 = phi <init, sum_1>
1160 S3 x_T = (TYPE1) x_t;
1161 S4 y_T = (TYPE1) y_t;
1162 S5 prod = x_T * y_T;
1163 [S6 prod = (TYPE2) prod; #optional]
1164 S7 sum_1 = prod + sum_0;
1166 where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
1167 the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
1168 'type1a' and 'type1b' can differ.
1172 * STMT_VINFO: The stmt from which the pattern search begins. In the
1173 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
1178 * TYPE_OUT: The type of the output of this pattern.
1180 * Return value: A new stmt that will be used to replace the sequence of
1181 stmts that constitute the pattern. In this case it will be:
1182 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
1184 Note: The dot-prod idiom is a widening reduction pattern that is
1185 vectorized without preserving all the intermediate results. It
1186 produces only N/2 (widened) results (by summing up pairs of
1187 intermediate results) rather than all N results. Therefore, we
1188 cannot allow this pattern when we want to get all the results and in
1189 the correct order (as is the case when this computation is in an
1190 inner-loop nested in an outer-loop that us being vectorized). */
1193 vect_recog_dot_prod_pattern (vec_info
*vinfo
,
1194 stmt_vec_info stmt_vinfo
, tree
*type_out
)
1196 tree oprnd0
, oprnd1
;
1197 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1198 tree type
, half_type
;
1199 gimple
*pattern_stmt
;
1202 /* Look for the following pattern
1206 DDPROD = (TYPE2) DPROD;
1207 sum_1 = DDPROD + sum_0;
1209 - DX is double the size of X
1210 - DY is double the size of Y
1211 - DX, DY, DPROD all have the same type but the sign
1212 between X, Y and DPROD can differ.
1213 - sum is the same size of DPROD or bigger
1214 - sum has been recognized as a reduction variable.
1216 This is equivalent to:
1217 DPROD = X w* Y; #widen mult
1218 sum_1 = DPROD w+ sum_0; #widen summation
1220 DPROD = X w* Y; #widen mult
1221 sum_1 = DPROD + sum_0; #summation
1224 /* Starting from LAST_STMT, follow the defs of its uses in search
1225 of the above pattern. */
1227 if (!vect_reassociating_reduction_p (vinfo
, stmt_vinfo
, PLUS_EXPR
,
1231 type
= TREE_TYPE (gimple_get_lhs (last_stmt
));
1233 vect_unpromoted_value unprom_mult
;
1234 oprnd0
= vect_look_through_possible_promotion (vinfo
, oprnd0
, &unprom_mult
);
1236 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1237 we know that oprnd1 is the reduction variable (defined by a loop-header
1238 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1239 Left to check that oprnd0 is defined by a (widen_)mult_expr */
1243 stmt_vec_info mult_vinfo
= vect_get_internal_def (vinfo
, oprnd0
);
1247 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1248 inside the loop (in case we are analyzing an outer-loop). */
1249 vect_unpromoted_value unprom0
[2];
1250 enum optab_subtype subtype
= optab_vector
;
1251 if (!vect_widened_op_tree (vinfo
, mult_vinfo
, MULT_EXPR
, WIDEN_MULT_EXPR
,
1252 false, 2, unprom0
, &half_type
, &subtype
))
1255 /* If there are two widening operations, make sure they agree on the sign
1256 of the extension. The result of an optab_vector_mixed_sign operation
1257 is signed; otherwise, the result has the same sign as the operands. */
1258 if (TYPE_PRECISION (unprom_mult
.type
) != TYPE_PRECISION (type
)
1259 && (subtype
== optab_vector_mixed_sign
1260 ? TYPE_UNSIGNED (unprom_mult
.type
)
1261 : TYPE_SIGN (unprom_mult
.type
) != TYPE_SIGN (half_type
)))
1264 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt
);
1266 /* If the inputs have mixed signs, canonicalize on using the signed
1267 input type for analysis. This also helps when emulating mixed-sign
1268 operations using signed operations. */
1269 if (subtype
== optab_vector_mixed_sign
)
1270 half_type
= signed_type_for (half_type
);
1273 if (!vect_supportable_direct_optab_p (vinfo
, type
, DOT_PROD_EXPR
, half_type
,
1274 type_out
, &half_vectype
, subtype
))
1276 /* We can emulate a mixed-sign dot-product using a sequence of
1277 signed dot-products; see vect_emulate_mixed_dot_prod for details. */
1278 if (subtype
!= optab_vector_mixed_sign
1279 || !vect_supportable_direct_optab_p (vinfo
, signed_type_for (type
),
1280 DOT_PROD_EXPR
, half_type
,
1281 type_out
, &half_vectype
,
1285 *type_out
= signed_or_unsigned_type_for (TYPE_UNSIGNED (type
),
1289 /* Get the inputs in the appropriate types. */
1291 vect_convert_inputs (vinfo
, stmt_vinfo
, 2, mult_oprnd
, half_type
,
1292 unprom0
, half_vectype
, subtype
);
1294 var
= vect_recog_temp_ssa_var (type
, NULL
);
1295 pattern_stmt
= gimple_build_assign (var
, DOT_PROD_EXPR
,
1296 mult_oprnd
[0], mult_oprnd
[1], oprnd1
);
1298 return pattern_stmt
;
1302 /* Function vect_recog_sad_pattern
1304 Try to find the following Sum of Absolute Difference (SAD) pattern:
1307 signed TYPE1 diff, abs_diff;
1310 sum_0 = phi <init, sum_1>
1313 S3 x_T = (TYPE1) x_t;
1314 S4 y_T = (TYPE1) y_t;
1315 S5 diff = x_T - y_T;
1316 S6 abs_diff = ABS_EXPR <diff>;
1317 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1318 S8 sum_1 = abs_diff + sum_0;
1320 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1321 same size of 'TYPE1' or bigger. This is a special case of a reduction
1326 * STMT_VINFO: The stmt from which the pattern search begins. In the
1327 example, when this function is called with S8, the pattern
1328 {S3,S4,S5,S6,S7,S8} will be detected.
1332 * TYPE_OUT: The type of the output of this pattern.
1334 * Return value: A new stmt that will be used to replace the sequence of
1335 stmts that constitute the pattern. In this case it will be:
1336 SAD_EXPR <x_t, y_t, sum_0>
1340 vect_recog_sad_pattern (vec_info
*vinfo
,
1341 stmt_vec_info stmt_vinfo
, tree
*type_out
)
1343 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1346 /* Look for the following pattern
1350 DAD = ABS_EXPR <DDIFF>;
1351 DDPROD = (TYPE2) DPROD;
1352 sum_1 = DAD + sum_0;
1354 - DX is at least double the size of X
1355 - DY is at least double the size of Y
1356 - DX, DY, DDIFF, DAD all have the same type
1357 - sum is the same size of DAD or bigger
1358 - sum has been recognized as a reduction variable.
1360 This is equivalent to:
1361 DDIFF = X w- Y; #widen sub
1362 DAD = ABS_EXPR <DDIFF>;
1363 sum_1 = DAD w+ sum_0; #widen summation
1365 DDIFF = X w- Y; #widen sub
1366 DAD = ABS_EXPR <DDIFF>;
1367 sum_1 = DAD + sum_0; #summation
1370 /* Starting from LAST_STMT, follow the defs of its uses in search
1371 of the above pattern. */
1373 tree plus_oprnd0
, plus_oprnd1
;
1374 if (!vect_reassociating_reduction_p (vinfo
, stmt_vinfo
, PLUS_EXPR
,
1375 &plus_oprnd0
, &plus_oprnd1
))
1378 tree sum_type
= TREE_TYPE (gimple_get_lhs (last_stmt
));
1380 /* Any non-truncating sequence of conversions is OK here, since
1381 with a successful match, the result of the ABS(U) is known to fit
1382 within the nonnegative range of the result type. (It cannot be the
1383 negative of the minimum signed value due to the range of the widening
1385 vect_unpromoted_value unprom_abs
;
1386 plus_oprnd0
= vect_look_through_possible_promotion (vinfo
, plus_oprnd0
,
1389 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1390 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1391 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1392 Then check that plus_oprnd0 is defined by an abs_expr. */
1397 stmt_vec_info abs_stmt_vinfo
= vect_get_internal_def (vinfo
, plus_oprnd0
);
1398 if (!abs_stmt_vinfo
)
1401 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1402 inside the loop (in case we are analyzing an outer-loop). */
1403 gassign
*abs_stmt
= dyn_cast
<gassign
*> (abs_stmt_vinfo
->stmt
);
1404 vect_unpromoted_value unprom
[2];
1408 gcall
*abd_stmt
= dyn_cast
<gcall
*> (abs_stmt_vinfo
->stmt
);
1410 || !gimple_call_internal_p (abd_stmt
)
1411 || gimple_call_num_args (abd_stmt
) != 2)
1414 tree abd_oprnd0
= gimple_call_arg (abd_stmt
, 0);
1415 tree abd_oprnd1
= gimple_call_arg (abd_stmt
, 1);
1417 if (gimple_call_internal_fn (abd_stmt
) == IFN_ABD
)
1419 if (!vect_look_through_possible_promotion (vinfo
, abd_oprnd0
,
1421 || !vect_look_through_possible_promotion (vinfo
, abd_oprnd1
,
1425 else if (gimple_call_internal_fn (abd_stmt
) == IFN_VEC_WIDEN_ABD
)
1427 unprom
[0].op
= abd_oprnd0
;
1428 unprom
[0].type
= TREE_TYPE (abd_oprnd0
);
1429 unprom
[1].op
= abd_oprnd1
;
1430 unprom
[1].type
= TREE_TYPE (abd_oprnd1
);
1435 half_type
= unprom
[0].type
;
1437 else if (!vect_recog_absolute_difference (vinfo
, abs_stmt
, &half_type
,
1441 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt
);
1444 if (!vect_supportable_direct_optab_p (vinfo
, sum_type
, SAD_EXPR
, half_type
,
1445 type_out
, &half_vectype
))
1448 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1450 vect_convert_inputs (vinfo
, stmt_vinfo
, 2, sad_oprnd
, half_type
,
1451 unprom
, half_vectype
);
1453 tree var
= vect_recog_temp_ssa_var (sum_type
, NULL
);
1454 gimple
*pattern_stmt
= gimple_build_assign (var
, SAD_EXPR
, sad_oprnd
[0],
1455 sad_oprnd
[1], plus_oprnd1
);
1457 return pattern_stmt
;
1460 /* Function vect_recog_abd_pattern
1462 Try to find the following ABsolute Difference (ABD) or
1463 widening ABD (WIDEN_ABD) pattern:
1467 TYPE3 x_cast = (TYPE3) x; // widening or no-op
1468 TYPE3 y_cast = (TYPE3) y; // widening or no-op
1469 TYPE3 diff = x_cast - y_cast;
1470 TYPE4 diff_cast = (TYPE4) diff; // widening or no-op
1471 TYPE5 abs = ABS(U)_EXPR <diff_cast>;
1473 WIDEN_ABD exists to optimize the case where TYPE4 is at least
1474 twice as wide as TYPE3.
1478 * STMT_VINFO: The stmt from which the pattern search begins
1482 * TYPE_OUT: The type of the output of this pattern
1484 * Return value: A new stmt that will be used to replace the sequence of
1485 stmts that constitute the pattern, principally:
1486 out = IFN_ABD (x, y)
1487 out = IFN_WIDEN_ABD (x, y)
1491 vect_recog_abd_pattern (vec_info
*vinfo
,
1492 stmt_vec_info stmt_vinfo
, tree
*type_out
)
1494 gassign
*last_stmt
= dyn_cast
<gassign
*> (STMT_VINFO_STMT (stmt_vinfo
));
1498 tree out_type
= TREE_TYPE (gimple_assign_lhs (last_stmt
));
1500 vect_unpromoted_value unprom
[2];
1503 if (!vect_recog_absolute_difference (vinfo
, last_stmt
, &half_type
,
1504 unprom
, &diff_stmt
))
1507 tree abd_in_type
, abd_out_type
;
1511 abd_in_type
= half_type
;
1512 abd_out_type
= abd_in_type
;
1516 unprom
[0].op
= gimple_assign_rhs1 (diff_stmt
);
1517 unprom
[1].op
= gimple_assign_rhs2 (diff_stmt
);
1518 abd_in_type
= signed_type_for (out_type
);
1519 abd_out_type
= abd_in_type
;
1522 tree vectype_in
= get_vectype_for_scalar_type (vinfo
, abd_in_type
);
1526 internal_fn ifn
= IFN_ABD
;
1527 tree vectype_out
= vectype_in
;
1529 if (TYPE_PRECISION (out_type
) >= TYPE_PRECISION (abd_in_type
) * 2
1530 && stmt_vinfo
->min_output_precision
>= TYPE_PRECISION (abd_in_type
) * 2)
1533 = build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type
) * 2,
1534 TYPE_UNSIGNED (abd_in_type
));
1535 tree mid_vectype
= get_vectype_for_scalar_type (vinfo
, mid_type
);
1537 code_helper dummy_code
;
1539 auto_vec
<tree
> dummy_vec
;
1541 && supportable_widening_operation (vinfo
, IFN_VEC_WIDEN_ABD
,
1542 stmt_vinfo
, mid_vectype
,
1544 &dummy_code
, &dummy_code
,
1545 &dummy_int
, &dummy_vec
))
1547 ifn
= IFN_VEC_WIDEN_ABD
;
1548 abd_out_type
= mid_type
;
1549 vectype_out
= mid_vectype
;
1554 && !direct_internal_fn_supported_p (ifn
, vectype_in
,
1555 OPTIMIZE_FOR_SPEED
))
1558 vect_pattern_detected ("vect_recog_abd_pattern", last_stmt
);
1561 vect_convert_inputs (vinfo
, stmt_vinfo
, 2, abd_oprnds
,
1562 abd_in_type
, unprom
, vectype_in
);
1564 *type_out
= get_vectype_for_scalar_type (vinfo
, out_type
);
1566 tree abd_result
= vect_recog_temp_ssa_var (abd_out_type
, NULL
);
1567 gcall
*abd_stmt
= gimple_build_call_internal (ifn
, 2,
1568 abd_oprnds
[0], abd_oprnds
[1]);
1569 gimple_call_set_lhs (abd_stmt
, abd_result
);
1570 gimple_set_location (abd_stmt
, gimple_location (last_stmt
));
1572 gimple
*stmt
= abd_stmt
;
1573 if (TYPE_PRECISION (abd_in_type
) == TYPE_PRECISION (abd_out_type
)
1574 && TYPE_PRECISION (abd_out_type
) < TYPE_PRECISION (out_type
)
1575 && !TYPE_UNSIGNED (abd_out_type
))
1577 tree unsign
= unsigned_type_for (abd_out_type
);
1578 tree unsign_vectype
= get_vectype_for_scalar_type (vinfo
, unsign
);
1579 stmt
= vect_convert_output (vinfo
, stmt_vinfo
, unsign
, stmt
,
1583 return vect_convert_output (vinfo
, stmt_vinfo
, out_type
, stmt
, vectype_out
);
1586 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1587 so that it can be treated as though it had the form:
1591 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1592 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1593 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1594 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1595 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1597 Try to replace the pattern with:
1601 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1602 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1603 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1604 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1606 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1608 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1609 name of the pattern being matched, for dump purposes. */
1612 vect_recog_widen_op_pattern (vec_info
*vinfo
,
1613 stmt_vec_info last_stmt_info
, tree
*type_out
,
1614 tree_code orig_code
, code_helper wide_code
,
1615 bool shift_p
, const char *name
)
1617 gimple
*last_stmt
= last_stmt_info
->stmt
;
1619 vect_unpromoted_value unprom
[2];
1621 if (!vect_widened_op_tree (vinfo
, last_stmt_info
, orig_code
, orig_code
,
1622 shift_p
, 2, unprom
, &half_type
))
1626 /* Pattern detected. */
1627 vect_pattern_detected (name
, last_stmt
);
1629 tree type
= TREE_TYPE (gimple_get_lhs (last_stmt
));
1631 if (TYPE_PRECISION (type
) != TYPE_PRECISION (half_type
) * 2
1632 || TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type
))
1633 itype
= build_nonstandard_integer_type (TYPE_PRECISION (half_type
) * 2,
1634 TYPE_UNSIGNED (half_type
));
1636 /* Check target support */
1637 tree vectype
= get_vectype_for_scalar_type (vinfo
, half_type
);
1638 tree vecitype
= get_vectype_for_scalar_type (vinfo
, itype
);
1640 tree vecctype
= vecitype
;
1641 if (orig_code
== MINUS_EXPR
1642 && TYPE_UNSIGNED (itype
)
1643 && TYPE_PRECISION (type
) > TYPE_PRECISION (itype
))
1645 /* Subtraction is special, even if half_type is unsigned and no matter
1646 whether type is signed or unsigned, if type is wider than itype,
1647 we need to sign-extend from the widening operation result to the
1649 Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1650 itype unsigned short and type either int or unsigned int.
1651 Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1652 (unsigned short) 0xffff, but for type int we want the result -1
1653 and for type unsigned int 0xffffffff rather than 0xffff. */
1654 ctype
= build_nonstandard_integer_type (TYPE_PRECISION (itype
), 0);
1655 vecctype
= get_vectype_for_scalar_type (vinfo
, ctype
);
1658 code_helper dummy_code
;
1660 auto_vec
<tree
> dummy_vec
;
1664 || !supportable_widening_operation (vinfo
, wide_code
, last_stmt_info
,
1666 &dummy_code
, &dummy_code
,
1667 &dummy_int
, &dummy_vec
))
1670 *type_out
= get_vectype_for_scalar_type (vinfo
, type
);
1675 vect_convert_inputs (vinfo
, last_stmt_info
,
1676 2, oprnd
, half_type
, unprom
, vectype
);
1678 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
1679 gimple
*pattern_stmt
= vect_gimple_build (var
, wide_code
, oprnd
[0], oprnd
[1]);
1681 if (vecctype
!= vecitype
)
1682 pattern_stmt
= vect_convert_output (vinfo
, last_stmt_info
, ctype
,
1683 pattern_stmt
, vecitype
);
1685 return vect_convert_output (vinfo
, last_stmt_info
,
1686 type
, pattern_stmt
, vecctype
);
1689 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1690 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1693 vect_recog_widen_mult_pattern (vec_info
*vinfo
, stmt_vec_info last_stmt_info
,
1696 return vect_recog_widen_op_pattern (vinfo
, last_stmt_info
, type_out
,
1697 MULT_EXPR
, WIDEN_MULT_EXPR
, false,
1698 "vect_recog_widen_mult_pattern");
1701 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1702 to IFN_VEC_WIDEN_PLUS. See vect_recog_widen_op_pattern for details. */
1705 vect_recog_widen_plus_pattern (vec_info
*vinfo
, stmt_vec_info last_stmt_info
,
1708 return vect_recog_widen_op_pattern (vinfo
, last_stmt_info
, type_out
,
1709 PLUS_EXPR
, IFN_VEC_WIDEN_PLUS
,
1710 false, "vect_recog_widen_plus_pattern");
1713 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1714 to IFN_VEC_WIDEN_MINUS. See vect_recog_widen_op_pattern for details. */
1716 vect_recog_widen_minus_pattern (vec_info
*vinfo
, stmt_vec_info last_stmt_info
,
1719 return vect_recog_widen_op_pattern (vinfo
, last_stmt_info
, type_out
,
1720 MINUS_EXPR
, IFN_VEC_WIDEN_MINUS
,
1721 false, "vect_recog_widen_minus_pattern");
1724 /* Try to detect abd on widened inputs, converting IFN_ABD
1725 to IFN_VEC_WIDEN_ABD. */
1727 vect_recog_widen_abd_pattern (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
1730 gassign
*last_stmt
= dyn_cast
<gassign
*> (STMT_VINFO_STMT (stmt_vinfo
));
1731 if (!last_stmt
|| !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (last_stmt
)))
1734 tree last_rhs
= gimple_assign_rhs1 (last_stmt
);
1736 tree in_type
= TREE_TYPE (last_rhs
);
1737 tree out_type
= TREE_TYPE (gimple_assign_lhs (last_stmt
));
1738 if (!INTEGRAL_TYPE_P (in_type
)
1739 || !INTEGRAL_TYPE_P (out_type
)
1740 || TYPE_PRECISION (in_type
) * 2 != TYPE_PRECISION (out_type
)
1741 || !TYPE_UNSIGNED (in_type
))
1744 vect_unpromoted_value unprom
;
1745 tree op
= vect_look_through_possible_promotion (vinfo
, last_rhs
, &unprom
);
1746 if (!op
|| TYPE_PRECISION (TREE_TYPE (op
)) != TYPE_PRECISION (in_type
))
1749 stmt_vec_info abd_pattern_vinfo
= vect_get_internal_def (vinfo
, op
);
1750 if (!abd_pattern_vinfo
)
1753 abd_pattern_vinfo
= vect_stmt_to_vectorize (abd_pattern_vinfo
);
1754 gcall
*abd_stmt
= dyn_cast
<gcall
*> (STMT_VINFO_STMT (abd_pattern_vinfo
));
1756 || !gimple_call_internal_p (abd_stmt
)
1757 || gimple_call_internal_fn (abd_stmt
) != IFN_ABD
)
1760 tree vectype_in
= get_vectype_for_scalar_type (vinfo
, in_type
);
1761 tree vectype_out
= get_vectype_for_scalar_type (vinfo
, out_type
);
1763 code_helper dummy_code
;
1765 auto_vec
<tree
> dummy_vec
;
1766 if (!supportable_widening_operation (vinfo
, IFN_VEC_WIDEN_ABD
, stmt_vinfo
,
1767 vectype_out
, vectype_in
,
1768 &dummy_code
, &dummy_code
,
1769 &dummy_int
, &dummy_vec
))
1772 vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt
);
1774 *type_out
= vectype_out
;
1776 tree abd_oprnd0
= gimple_call_arg (abd_stmt
, 0);
1777 tree abd_oprnd1
= gimple_call_arg (abd_stmt
, 1);
1778 tree widen_abd_result
= vect_recog_temp_ssa_var (out_type
, NULL
);
1779 gcall
*widen_abd_stmt
= gimple_build_call_internal (IFN_VEC_WIDEN_ABD
, 2,
1780 abd_oprnd0
, abd_oprnd1
);
1781 gimple_call_set_lhs (widen_abd_stmt
, widen_abd_result
);
1782 gimple_set_location (widen_abd_stmt
, gimple_location (last_stmt
));
1783 return widen_abd_stmt
;
1786 /* Function vect_recog_ctz_ffs_pattern
1788 Try to find the following pattern:
1793 B = __builtin_ctz{,l,ll} (A);
1797 B = __builtin_ffs{,l,ll} (A);
1801 * STMT_VINFO: The stmt from which the pattern search begins.
1802 here it starts with B = __builtin_* (A);
1806 * TYPE_OUT: The vector type of the output of this pattern.
1808 * Return value: A new stmt that will be used to replace the sequence of
1809 stmts that constitute the pattern, using clz or popcount builtins. */
1812 vect_recog_ctz_ffs_pattern (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
1815 gimple
*call_stmt
= stmt_vinfo
->stmt
;
1816 gimple
*pattern_stmt
;
1817 tree rhs_oprnd
, rhs_type
, lhs_oprnd
, lhs_type
, vec_type
, vec_rhs_type
;
1819 internal_fn ifn
= IFN_LAST
, ifnnew
= IFN_LAST
;
1820 bool defined_at_zero
= true, defined_at_zero_new
= false;
1821 int val
= 0, val_new
= 0, val_cmp
= 0;
1823 int sub
= 0, add
= 0;
1826 if (!is_gimple_call (call_stmt
))
1829 if (gimple_call_num_args (call_stmt
) != 1
1830 && gimple_call_num_args (call_stmt
) != 2)
1833 rhs_oprnd
= gimple_call_arg (call_stmt
, 0);
1834 rhs_type
= TREE_TYPE (rhs_oprnd
);
1835 lhs_oprnd
= gimple_call_lhs (call_stmt
);
1838 lhs_type
= TREE_TYPE (lhs_oprnd
);
1839 if (!INTEGRAL_TYPE_P (lhs_type
)
1840 || !INTEGRAL_TYPE_P (rhs_type
)
1841 || !type_has_mode_precision_p (rhs_type
)
1842 || TREE_CODE (rhs_oprnd
) != SSA_NAME
)
1845 switch (gimple_call_combined_fn (call_stmt
))
1849 if (!gimple_call_internal_p (call_stmt
)
1850 || gimple_call_num_args (call_stmt
) != 2)
1851 defined_at_zero
= false;
1853 val
= tree_to_shwi (gimple_call_arg (call_stmt
, 1));
1862 prec
= TYPE_PRECISION (rhs_type
);
1863 loc
= gimple_location (call_stmt
);
1865 vec_type
= get_vectype_for_scalar_type (vinfo
, lhs_type
);
1869 vec_rhs_type
= get_vectype_for_scalar_type (vinfo
, rhs_type
);
1873 /* Do it only if the backend doesn't have ctz<vector_mode>2 or
1874 ffs<vector_mode>2 pattern but does have clz<vector_mode>2 or
1875 popcount<vector_mode>2. */
1877 || direct_internal_fn_supported_p (ifn
, vec_rhs_type
,
1878 OPTIMIZE_FOR_SPEED
))
1882 && direct_internal_fn_supported_p (IFN_CTZ
, vec_rhs_type
,
1883 OPTIMIZE_FOR_SPEED
))
1887 = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type
),
1890 else if (direct_internal_fn_supported_p (IFN_CLZ
, vec_rhs_type
,
1891 OPTIMIZE_FOR_SPEED
))
1895 = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type
),
1898 if ((ifnnew
== IFN_LAST
1899 || (defined_at_zero
&& !defined_at_zero_new
))
1900 && direct_internal_fn_supported_p (IFN_POPCOUNT
, vec_rhs_type
,
1901 OPTIMIZE_FOR_SPEED
))
1903 ifnnew
= IFN_POPCOUNT
;
1904 defined_at_zero_new
= true;
1907 if (ifnnew
== IFN_LAST
)
1910 vect_pattern_detected ("vec_recog_ctz_ffs_pattern", call_stmt
);
1913 if ((ifnnew
== IFN_CLZ
1915 && defined_at_zero_new
1918 || (ifnnew
== IFN_POPCOUNT
&& ifn
== IFN_CTZ
))
1920 /* .CTZ (X) = PREC - .CLZ ((X - 1) & ~X)
1921 .CTZ (X) = .POPCOUNT ((X - 1) & ~X). */
1922 if (ifnnew
== IFN_CLZ
)
1926 if (!TYPE_UNSIGNED (rhs_type
))
1928 rhs_type
= unsigned_type_for (rhs_type
);
1929 vec_rhs_type
= get_vectype_for_scalar_type (vinfo
, rhs_type
);
1930 new_var
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1931 pattern_stmt
= gimple_build_assign (new_var
, NOP_EXPR
, rhs_oprnd
);
1932 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
,
1934 rhs_oprnd
= new_var
;
1937 tree m1
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1938 pattern_stmt
= gimple_build_assign (m1
, PLUS_EXPR
, rhs_oprnd
,
1939 build_int_cst (rhs_type
, -1));
1940 gimple_set_location (pattern_stmt
, loc
);
1941 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
1943 new_var
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1944 pattern_stmt
= gimple_build_assign (new_var
, BIT_NOT_EXPR
, rhs_oprnd
);
1945 gimple_set_location (pattern_stmt
, loc
);
1946 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
1947 rhs_oprnd
= new_var
;
1949 new_var
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1950 pattern_stmt
= gimple_build_assign (new_var
, BIT_AND_EXPR
,
1952 gimple_set_location (pattern_stmt
, loc
);
1953 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
1954 rhs_oprnd
= new_var
;
1956 else if (ifnnew
== IFN_CLZ
)
1958 /* .CTZ (X) = (PREC - 1) - .CLZ (X & -X)
1959 .FFS (X) = PREC - .CLZ (X & -X). */
1960 sub
= prec
- (ifn
== IFN_CTZ
);
1961 val_cmp
= sub
- val_new
;
1963 tree neg
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1964 pattern_stmt
= gimple_build_assign (neg
, NEGATE_EXPR
, rhs_oprnd
);
1965 gimple_set_location (pattern_stmt
, loc
);
1966 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
1968 new_var
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1969 pattern_stmt
= gimple_build_assign (new_var
, BIT_AND_EXPR
,
1971 gimple_set_location (pattern_stmt
, loc
);
1972 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
1973 rhs_oprnd
= new_var
;
1975 else if (ifnnew
== IFN_POPCOUNT
)
1977 /* .CTZ (X) = PREC - .POPCOUNT (X | -X)
1978 .FFS (X) = (PREC + 1) - .POPCOUNT (X | -X). */
1979 sub
= prec
+ (ifn
== IFN_FFS
);
1982 tree neg
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1983 pattern_stmt
= gimple_build_assign (neg
, NEGATE_EXPR
, rhs_oprnd
);
1984 gimple_set_location (pattern_stmt
, loc
);
1985 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
1987 new_var
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1988 pattern_stmt
= gimple_build_assign (new_var
, BIT_IOR_EXPR
,
1990 gimple_set_location (pattern_stmt
, loc
);
1991 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
1992 rhs_oprnd
= new_var
;
1994 else if (ifnnew
== IFN_CTZ
)
1996 /* .FFS (X) = .CTZ (X) + 1. */
2001 /* Create B = .IFNNEW (A). */
2002 new_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
2003 if ((ifnnew
== IFN_CLZ
|| ifnnew
== IFN_CTZ
) && defined_at_zero_new
)
2005 = gimple_build_call_internal (ifnnew
, 2, rhs_oprnd
,
2006 build_int_cst (integer_type_node
,
2009 pattern_stmt
= gimple_build_call_internal (ifnnew
, 1, rhs_oprnd
);
2010 gimple_call_set_lhs (pattern_stmt
, new_var
);
2011 gimple_set_location (pattern_stmt
, loc
);
2012 *type_out
= vec_type
;
2016 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_type
);
2017 tree ret_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
2018 pattern_stmt
= gimple_build_assign (ret_var
, MINUS_EXPR
,
2019 build_int_cst (lhs_type
, sub
),
2021 gimple_set_location (pattern_stmt
, loc
);
2026 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_type
);
2027 tree ret_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
2028 pattern_stmt
= gimple_build_assign (ret_var
, PLUS_EXPR
, new_var
,
2029 build_int_cst (lhs_type
, add
));
2030 gimple_set_location (pattern_stmt
, loc
);
2035 && (!defined_at_zero_new
|| val
!= val_cmp
))
2037 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_type
);
2038 tree ret_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
2039 rhs_oprnd
= gimple_call_arg (call_stmt
, 0);
2040 rhs_type
= TREE_TYPE (rhs_oprnd
);
2041 tree cmp
= build2_loc (loc
, NE_EXPR
, boolean_type_node
,
2042 rhs_oprnd
, build_zero_cst (rhs_type
));
2043 pattern_stmt
= gimple_build_assign (ret_var
, COND_EXPR
, cmp
,
2045 build_int_cst (lhs_type
, val
));
2048 if (dump_enabled_p ())
2049 dump_printf_loc (MSG_NOTE
, vect_location
,
2050 "created pattern stmt: %G", pattern_stmt
);
2052 return pattern_stmt
;
2055 /* Function vect_recog_popcount_clz_ctz_ffs_pattern
2057 Try to find the following pattern:
2063 temp_in = (UTYPE2)A;
2065 temp_out = __builtin_popcount{,l,ll} (temp_in);
2066 B = (TYPE1) temp_out;
2068 TYPE2 may or may not be equal to TYPE3.
2069 i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
2070 i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
2074 * STMT_VINFO: The stmt from which the pattern search begins.
2075 here it starts with B = (TYPE1) temp_out;
2079 * TYPE_OUT: The vector type of the output of this pattern.
2081 * Return value: A new stmt that will be used to replace the sequence of
2082 stmts that constitute the pattern. In this case it will be:
2085 Similarly for clz, ctz and ffs.
2089 vect_recog_popcount_clz_ctz_ffs_pattern (vec_info
*vinfo
,
2090 stmt_vec_info stmt_vinfo
,
2093 gassign
*last_stmt
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
);
2094 gimple
*call_stmt
, *pattern_stmt
;
2095 tree rhs_oprnd
, rhs_origin
, lhs_oprnd
, lhs_type
, vec_type
, new_var
;
2096 internal_fn ifn
= IFN_LAST
;
2099 /* Find B = (TYPE1) temp_out. */
2102 tree_code code
= gimple_assign_rhs_code (last_stmt
);
2103 if (!CONVERT_EXPR_CODE_P (code
))
2106 lhs_oprnd
= gimple_assign_lhs (last_stmt
);
2107 lhs_type
= TREE_TYPE (lhs_oprnd
);
2108 if (!INTEGRAL_TYPE_P (lhs_type
))
2111 rhs_oprnd
= gimple_assign_rhs1 (last_stmt
);
2112 if (TREE_CODE (rhs_oprnd
) != SSA_NAME
2113 || !has_single_use (rhs_oprnd
))
2115 call_stmt
= SSA_NAME_DEF_STMT (rhs_oprnd
);
2117 /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
2118 if (!is_gimple_call (call_stmt
))
2120 switch (gimple_call_combined_fn (call_stmt
))
2128 /* Punt if call result is unsigned and defined value at zero
2129 is negative, as the negative value doesn't extend correctly. */
2130 if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd
))
2131 && gimple_call_internal_p (call_stmt
)
2132 && CLZ_DEFINED_VALUE_AT_ZERO
2133 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd
)), val
) == 2
2139 /* Punt if call result is unsigned and defined value at zero
2140 is negative, as the negative value doesn't extend correctly. */
2141 if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd
))
2142 && gimple_call_internal_p (call_stmt
)
2143 && CTZ_DEFINED_VALUE_AT_ZERO
2144 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd
)), val
) == 2
2155 if (gimple_call_num_args (call_stmt
) != 1
2156 && gimple_call_num_args (call_stmt
) != 2)
2159 rhs_oprnd
= gimple_call_arg (call_stmt
, 0);
2160 vect_unpromoted_value unprom_diff
;
2162 = vect_look_through_possible_promotion (vinfo
, rhs_oprnd
, &unprom_diff
);
2167 /* Input and output of .POPCOUNT should be same-precision integer. */
2168 if (TYPE_PRECISION (unprom_diff
.type
) != TYPE_PRECISION (lhs_type
))
2171 /* Also A should be unsigned or same precision as temp_in, otherwise
2172 different builtins/internal functions have different behaviors. */
2173 if (TYPE_PRECISION (unprom_diff
.type
)
2174 != TYPE_PRECISION (TREE_TYPE (rhs_oprnd
)))
2178 /* For popcount require zero extension, which doesn't add any
2179 further bits to the count. */
2180 if (!TYPE_UNSIGNED (unprom_diff
.type
))
2184 /* clzll (x) == clz (x) + 32 for unsigned x != 0, so ok
2185 if it is undefined at zero or if it matches also for the
2186 defined value there. */
2187 if (!TYPE_UNSIGNED (unprom_diff
.type
))
2189 if (!type_has_mode_precision_p (lhs_type
)
2190 || !type_has_mode_precision_p (TREE_TYPE (rhs_oprnd
)))
2192 addend
= (TYPE_PRECISION (TREE_TYPE (rhs_oprnd
))
2193 - TYPE_PRECISION (lhs_type
));
2194 if (gimple_call_internal_p (call_stmt
)
2195 && gimple_call_num_args (call_stmt
) == 2)
2198 val1
= tree_to_shwi (gimple_call_arg (call_stmt
, 1));
2200 = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type
),
2202 if (d2
!= 2 || val1
!= val2
+ addend
)
2207 /* ctzll (x) == ctz (x) for unsigned or signed x != 0, so ok
2208 if it is undefined at zero or if it matches also for the
2209 defined value there. */
2210 if (gimple_call_internal_p (call_stmt
)
2211 && gimple_call_num_args (call_stmt
) == 2)
2214 val1
= tree_to_shwi (gimple_call_arg (call_stmt
, 1));
2216 = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type
),
2218 if (d2
!= 2 || val1
!= val2
)
2223 /* ffsll (x) == ffs (x) for unsigned or signed x. */
2229 vec_type
= get_vectype_for_scalar_type (vinfo
, lhs_type
);
2230 /* Do it only if the backend has popcount<vector_mode>2 etc. pattern. */
2235 = direct_internal_fn_supported_p (ifn
, vec_type
, OPTIMIZE_FOR_SPEED
);
2243 /* vect_recog_ctz_ffs_pattern can implement ffs using ctz. */
2244 if (direct_internal_fn_supported_p (IFN_CTZ
, vec_type
,
2245 OPTIMIZE_FOR_SPEED
))
2249 /* vect_recog_ctz_ffs_pattern can implement ffs or ctz using
2251 if (direct_internal_fn_supported_p (IFN_CLZ
, vec_type
,
2252 OPTIMIZE_FOR_SPEED
))
2254 if (direct_internal_fn_supported_p (IFN_POPCOUNT
, vec_type
,
2255 OPTIMIZE_FOR_SPEED
))
2262 vect_pattern_detected ("vec_recog_popcount_clz_ctz_ffs_pattern",
2265 /* Create B = .POPCOUNT (A). */
2266 new_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
2267 tree arg2
= NULL_TREE
;
2270 && CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type
),
2272 arg2
= build_int_cst (integer_type_node
, val
);
2273 else if (ifn
== IFN_CTZ
2274 && CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type
),
2276 arg2
= build_int_cst (integer_type_node
, val
);
2278 pattern_stmt
= gimple_build_call_internal (ifn
, 2, unprom_diff
.op
, arg2
);
2280 pattern_stmt
= gimple_build_call_internal (ifn
, 1, unprom_diff
.op
);
2281 gimple_call_set_lhs (pattern_stmt
, new_var
);
2282 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
2283 *type_out
= vec_type
;
2285 if (dump_enabled_p ())
2286 dump_printf_loc (MSG_NOTE
, vect_location
,
2287 "created pattern stmt: %G", pattern_stmt
);
2291 gcc_assert (supported
);
2292 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_type
);
2293 tree ret_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
2294 pattern_stmt
= gimple_build_assign (ret_var
, PLUS_EXPR
, new_var
,
2295 build_int_cst (lhs_type
, addend
));
2297 else if (!supported
)
2299 stmt_vec_info new_stmt_info
= vinfo
->add_stmt (pattern_stmt
);
2300 STMT_VINFO_VECTYPE (new_stmt_info
) = vec_type
;
2302 = vect_recog_ctz_ffs_pattern (vinfo
, new_stmt_info
, type_out
);
2303 if (pattern_stmt
== NULL
)
2305 if (gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (new_stmt_info
))
2307 gimple_seq
*pseq
= &STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
);
2308 gimple_seq_add_seq_without_update (pseq
, seq
);
2311 return pattern_stmt
;
2314 /* Function vect_recog_pow_pattern
2316 Try to find the following pattern:
2320 with POW being one of pow, powf, powi, powif and N being
2325 * STMT_VINFO: The stmt from which the pattern search begins.
2329 * TYPE_OUT: The type of the output of this pattern.
2331 * Return value: A new stmt that will be used to replace the sequence of
2332 stmts that constitute the pattern. In this case it will be:
2339 vect_recog_pow_pattern (vec_info
*vinfo
,
2340 stmt_vec_info stmt_vinfo
, tree
*type_out
)
2342 gimple
*last_stmt
= stmt_vinfo
->stmt
;
2347 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
2350 switch (gimple_call_combined_fn (last_stmt
))
2360 base
= gimple_call_arg (last_stmt
, 0);
2361 exp
= gimple_call_arg (last_stmt
, 1);
2362 if (TREE_CODE (exp
) != REAL_CST
2363 && TREE_CODE (exp
) != INTEGER_CST
)
2365 if (flag_unsafe_math_optimizations
2366 && TREE_CODE (base
) == REAL_CST
2367 && gimple_call_builtin_p (last_stmt
, BUILT_IN_NORMAL
))
2369 combined_fn log_cfn
;
2370 built_in_function exp_bfn
;
2371 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt
)))
2374 log_cfn
= CFN_BUILT_IN_LOG
;
2375 exp_bfn
= BUILT_IN_EXP
;
2378 log_cfn
= CFN_BUILT_IN_LOGF
;
2379 exp_bfn
= BUILT_IN_EXPF
;
2382 log_cfn
= CFN_BUILT_IN_LOGL
;
2383 exp_bfn
= BUILT_IN_EXPL
;
2388 tree logc
= fold_const_call (log_cfn
, TREE_TYPE (base
), base
);
2389 tree exp_decl
= builtin_decl_implicit (exp_bfn
);
2390 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
2391 does that, but if C is a power of 2, we want to use
2392 exp2 (log2 (C) * x) in the non-vectorized version, but for
2393 vectorization we don't have vectorized exp2. */
2395 && TREE_CODE (logc
) == REAL_CST
2397 && lookup_attribute ("omp declare simd",
2398 DECL_ATTRIBUTES (exp_decl
)))
2400 cgraph_node
*node
= cgraph_node::get_create (exp_decl
);
2401 if (node
->simd_clones
== NULL
)
2403 if (targetm
.simd_clone
.compute_vecsize_and_simdlen
== NULL
2404 || node
->definition
)
2406 expand_simd_clones (node
);
2407 if (node
->simd_clones
== NULL
)
2410 *type_out
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (base
));
2413 tree def
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
2414 gimple
*g
= gimple_build_assign (def
, MULT_EXPR
, exp
, logc
);
2415 append_pattern_def_seq (vinfo
, stmt_vinfo
, g
);
2416 tree res
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
2417 g
= gimple_build_call (exp_decl
, 1, def
);
2418 gimple_call_set_lhs (g
, res
);
2426 /* We now have a pow or powi builtin function call with a constant
2429 /* Catch squaring. */
2430 if ((tree_fits_shwi_p (exp
)
2431 && tree_to_shwi (exp
) == 2)
2432 || (TREE_CODE (exp
) == REAL_CST
2433 && real_equal (&TREE_REAL_CST (exp
), &dconst2
)))
2435 if (!vect_supportable_direct_optab_p (vinfo
, TREE_TYPE (base
), MULT_EXPR
,
2436 TREE_TYPE (base
), type_out
))
2439 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
2440 stmt
= gimple_build_assign (var
, MULT_EXPR
, base
, base
);
2444 /* Catch square root. */
2445 if (TREE_CODE (exp
) == REAL_CST
2446 && real_equal (&TREE_REAL_CST (exp
), &dconsthalf
))
2448 *type_out
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (base
));
2450 && direct_internal_fn_supported_p (IFN_SQRT
, *type_out
,
2451 OPTIMIZE_FOR_SPEED
))
2453 gcall
*stmt
= gimple_build_call_internal (IFN_SQRT
, 1, base
);
2454 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
2455 gimple_call_set_lhs (stmt
, var
);
2456 gimple_call_set_nothrow (stmt
, true);
2465 /* Function vect_recog_widen_sum_pattern
2467 Try to find the following pattern:
2470 TYPE x_T, sum = init;
2472 sum_0 = phi <init, sum_1>
2474 S2 x_T = (TYPE) x_t;
2475 S3 sum_1 = x_T + sum_0;
2477 where type 'TYPE' is at least double the size of type 'type', i.e - we're
2478 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
2479 a special case of a reduction computation.
2483 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
2484 when this function is called with S3, the pattern {S2,S3} will be detected.
2488 * TYPE_OUT: The type of the output of this pattern.
2490 * Return value: A new stmt that will be used to replace the sequence of
2491 stmts that constitute the pattern. In this case it will be:
2492 WIDEN_SUM <x_t, sum_0>
2494 Note: The widening-sum idiom is a widening reduction pattern that is
2495 vectorized without preserving all the intermediate results. It
2496 produces only N/2 (widened) results (by summing up pairs of
2497 intermediate results) rather than all N results. Therefore, we
2498 cannot allow this pattern when we want to get all the results and in
2499 the correct order (as is the case when this computation is in an
2500 inner-loop nested in an outer-loop that us being vectorized). */
2503 vect_recog_widen_sum_pattern (vec_info
*vinfo
,
2504 stmt_vec_info stmt_vinfo
, tree
*type_out
)
2506 gimple
*last_stmt
= stmt_vinfo
->stmt
;
2507 tree oprnd0
, oprnd1
;
2509 gimple
*pattern_stmt
;
2512 /* Look for the following pattern
2515 In which DX is at least double the size of X, and sum_1 has been
2516 recognized as a reduction variable.
2519 /* Starting from LAST_STMT, follow the defs of its uses in search
2520 of the above pattern. */
2522 if (!vect_reassociating_reduction_p (vinfo
, stmt_vinfo
, PLUS_EXPR
,
2524 || TREE_CODE (oprnd0
) != SSA_NAME
2525 || !vinfo
->lookup_def (oprnd0
))
2528 type
= TREE_TYPE (gimple_get_lhs (last_stmt
));
2530 /* So far so good. Since last_stmt was detected as a (summation) reduction,
2531 we know that oprnd1 is the reduction variable (defined by a loop-header
2532 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
2533 Left to check that oprnd0 is defined by a cast from type 'type' to type
2536 vect_unpromoted_value unprom0
;
2537 if (!vect_look_through_possible_promotion (vinfo
, oprnd0
, &unprom0
)
2538 || TYPE_PRECISION (unprom0
.type
) * 2 > TYPE_PRECISION (type
))
2541 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt
);
2543 if (!vect_supportable_direct_optab_p (vinfo
, type
, WIDEN_SUM_EXPR
,
2544 unprom0
.type
, type_out
))
2547 var
= vect_recog_temp_ssa_var (type
, NULL
);
2548 pattern_stmt
= gimple_build_assign (var
, WIDEN_SUM_EXPR
, unprom0
.op
, oprnd1
);
2550 return pattern_stmt
;
2553 /* Function vect_recog_bitfield_ref_pattern
2555 Try to find the following pattern:
2557 bf_value = BIT_FIELD_REF (container, bitsize, bitpos);
2558 result = (type_out) bf_value;
2562 if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
2564 where type_out is a non-bitfield type, that is to say, it's precision matches
2565 2^(TYPE_SIZE(type_out) - (TYPE_UNSIGNED (type_out) ? 1 : 2)).
2569 * STMT_VINFO: The stmt from which the pattern search begins.
2570 here it starts with:
2571 result = (type_out) bf_value;
2575 if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
2579 * TYPE_OUT: The vector type of the output of this pattern.
2581 * Return value: A new stmt that will be used to replace the sequence of
2582 stmts that constitute the pattern. If the precision of type_out is bigger
2583 than the precision type of _1 we perform the widening before the shifting,
2584 since the new precision will be large enough to shift the value and moving
2585 widening operations up the statement chain enables the generation of
2586 widening loads. If we are widening and the operation after the pattern is
2587 an addition then we mask first and shift later, to enable the generation of
2588 shifting adds. In the case of narrowing we will always mask first, shift
2589 last and then perform a narrowing operation. This will enable the
2590 generation of narrowing shifts.
2592 Widening with mask first, shift later:
2593 container = (type_out) container;
2594 masked = container & (((1 << bitsize) - 1) << bitpos);
2595 result = masked >> bitpos;
2597 Widening with shift first, mask last:
2598 container = (type_out) container;
2599 shifted = container >> bitpos;
2600 result = shifted & ((1 << bitsize) - 1);
2603 masked = container & (((1 << bitsize) - 1) << bitpos);
2604 result = masked >> bitpos;
2605 result = (type_out) result;
2607 If the bitfield is signed and it's wider than type_out, we need to
2608 keep the result sign-extended:
2609 container = (type) container;
2610 masked = container << (prec - bitsize - bitpos);
2611 result = (type_out) (masked >> (prec - bitsize));
2613 Here type is the signed variant of the wider of type_out and the type
2616 The shifting is always optional depending on whether bitpos != 0.
2618 When the original bitfield was inside a gcond then an new gcond is also
2619 generated with the newly `result` as the operand to the comparison.
2624 vect_recog_bitfield_ref_pattern (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2627 gimple
*bf_stmt
= NULL
;
2628 tree lhs
= NULL_TREE
;
2629 tree ret_type
= NULL_TREE
;
2630 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
2631 if (gcond
*cond_stmt
= dyn_cast
<gcond
*> (stmt
))
2633 tree op
= gimple_cond_lhs (cond_stmt
);
2634 if (TREE_CODE (op
) != SSA_NAME
)
2636 bf_stmt
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (op
));
2637 if (TREE_CODE (gimple_cond_rhs (cond_stmt
)) != INTEGER_CST
)
2640 else if (is_gimple_assign (stmt
)
2641 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt
))
2642 && TREE_CODE (gimple_assign_rhs1 (stmt
)) == SSA_NAME
)
2644 gimple
*second_stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
2645 bf_stmt
= dyn_cast
<gassign
*> (second_stmt
);
2646 lhs
= gimple_assign_lhs (stmt
);
2647 ret_type
= TREE_TYPE (lhs
);
2651 || gimple_assign_rhs_code (bf_stmt
) != BIT_FIELD_REF
)
2654 tree bf_ref
= gimple_assign_rhs1 (bf_stmt
);
2655 tree container
= TREE_OPERAND (bf_ref
, 0);
2656 ret_type
= ret_type
? ret_type
: TREE_TYPE (container
);
2658 if (!bit_field_offset (bf_ref
).is_constant ()
2659 || !bit_field_size (bf_ref
).is_constant ()
2660 || !tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (container
))))
2663 if (!INTEGRAL_TYPE_P (TREE_TYPE (bf_ref
))
2664 || !INTEGRAL_TYPE_P (TREE_TYPE (container
))
2665 || TYPE_MODE (TREE_TYPE (container
)) == E_BLKmode
)
2668 gimple
*use_stmt
, *pattern_stmt
;
2669 use_operand_p use_p
;
2670 bool shift_first
= true;
2671 tree container_type
= TREE_TYPE (container
);
2672 tree vectype
= get_vectype_for_scalar_type (vinfo
, container_type
);
2674 /* Calculate shift_n before the adjustments for widening loads, otherwise
2675 the container may change and we have to consider offset change for
2676 widening loads on big endianness. The shift_n calculated here can be
2677 independent of widening. */
2678 unsigned HOST_WIDE_INT shift_n
= bit_field_offset (bf_ref
).to_constant ();
2679 unsigned HOST_WIDE_INT mask_width
= bit_field_size (bf_ref
).to_constant ();
2680 unsigned HOST_WIDE_INT prec
= tree_to_uhwi (TYPE_SIZE (container_type
));
2681 if (BYTES_BIG_ENDIAN
)
2682 shift_n
= prec
- shift_n
- mask_width
;
2684 bool ref_sext
= (!TYPE_UNSIGNED (TREE_TYPE (bf_ref
)) &&
2685 TYPE_PRECISION (ret_type
) > mask_width
);
2686 bool load_widen
= (TYPE_PRECISION (TREE_TYPE (container
)) <
2687 TYPE_PRECISION (ret_type
));
2689 /* We move the conversion earlier if the loaded type is smaller than the
2690 return type to enable the use of widening loads. And if we need a
2691 sign extension, we need to convert the loaded value early to a signed
2693 if (ref_sext
|| load_widen
)
2695 tree type
= load_widen
? ret_type
: container_type
;
2697 type
= gimple_signed_type (type
);
2698 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
),
2699 NOP_EXPR
, container
);
2700 container
= gimple_get_lhs (pattern_stmt
);
2701 container_type
= TREE_TYPE (container
);
2702 prec
= tree_to_uhwi (TYPE_SIZE (container_type
));
2703 vectype
= get_vectype_for_scalar_type (vinfo
, container_type
);
2704 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
, vectype
);
2706 else if (!useless_type_conversion_p (TREE_TYPE (container
), ret_type
))
2707 /* If we are doing the conversion last then also delay the shift as we may
2708 be able to combine the shift and conversion in certain cases. */
2709 shift_first
= false;
2711 /* If the only use of the result of this BIT_FIELD_REF + CONVERT is a
2712 PLUS_EXPR then do the shift last as some targets can combine the shift and
2713 add into a single instruction. */
2714 if (lhs
&& single_imm_use (lhs
, &use_p
, &use_stmt
))
2716 if (gimple_code (use_stmt
) == GIMPLE_ASSIGN
2717 && gimple_assign_rhs_code (use_stmt
) == PLUS_EXPR
)
2718 shift_first
= false;
2721 /* If we don't have to shift we only generate the mask, so just fix the
2722 code-path to shift_first. */
2727 if (shift_first
&& !ref_sext
)
2729 tree shifted
= container
;
2733 = gimple_build_assign (vect_recog_temp_ssa_var (container_type
),
2734 RSHIFT_EXPR
, container
,
2735 build_int_cst (sizetype
, shift_n
));
2736 shifted
= gimple_assign_lhs (pattern_stmt
);
2737 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
, vectype
);
2740 tree mask
= wide_int_to_tree (container_type
,
2741 wi::mask (mask_width
, false, prec
));
2744 = gimple_build_assign (vect_recog_temp_ssa_var (container_type
),
2745 BIT_AND_EXPR
, shifted
, mask
);
2746 result
= gimple_assign_lhs (pattern_stmt
);
2750 tree temp
= vect_recog_temp_ssa_var (container_type
);
2753 tree mask
= wide_int_to_tree (container_type
,
2754 wi::shifted_mask (shift_n
,
2757 pattern_stmt
= gimple_build_assign (temp
, BIT_AND_EXPR
,
2762 HOST_WIDE_INT shl
= prec
- shift_n
- mask_width
;
2764 pattern_stmt
= gimple_build_assign (temp
, LSHIFT_EXPR
,
2766 build_int_cst (sizetype
,
2770 tree masked
= gimple_assign_lhs (pattern_stmt
);
2771 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
, vectype
);
2773 = gimple_build_assign (vect_recog_temp_ssa_var (container_type
),
2774 RSHIFT_EXPR
, masked
,
2775 build_int_cst (sizetype
, shift_n
));
2776 result
= gimple_assign_lhs (pattern_stmt
);
2779 if (!useless_type_conversion_p (TREE_TYPE (result
), ret_type
))
2781 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
, vectype
);
2783 = gimple_build_assign (vect_recog_temp_ssa_var (ret_type
),
2789 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
, vectype
);
2790 gcond
*cond_stmt
= dyn_cast
<gcond
*> (stmt_info
->stmt
);
2791 tree cond_cst
= gimple_cond_rhs (cond_stmt
);
2793 = gimple_build_cond (gimple_cond_code (cond_stmt
),
2794 gimple_get_lhs (pattern_stmt
),
2795 fold_convert (ret_type
, cond_cst
),
2796 gimple_cond_true_label (cond_stmt
),
2797 gimple_cond_false_label (cond_stmt
));
2800 *type_out
= STMT_VINFO_VECTYPE (stmt_info
);
2801 vect_pattern_detected ("bitfield_ref pattern", stmt_info
->stmt
);
2803 return pattern_stmt
;
2806 /* Function vect_recog_bit_insert_pattern
2808 Try to find the following pattern:
2810 written = BIT_INSERT_EXPR (container, value, bitpos);
2814 * STMT_VINFO: The stmt we want to replace.
2818 * TYPE_OUT: The vector type of the output of this pattern.
2820 * Return value: A new stmt that will be used to replace the sequence of
2821 stmts that constitute the pattern. In this case it will be:
2822 value = (container_type) value; // Make sure
2823 shifted = value << bitpos; // Shift value into place
2824 masked = shifted & (mask << bitpos); // Mask off the non-relevant bits in
2825 // the 'to-write value'.
2826 cleared = container & ~(mask << bitpos); // Clearing the bits we want to
2827 // write to from the value we want
2829 written = cleared | masked; // Write bits.
2832 where mask = ((1 << TYPE_PRECISION (value)) - 1), a mask to keep the number of
2833 bits corresponding to the real size of the bitfield value we are writing to.
2834 The shifting is always optional depending on whether bitpos != 0.
2839 vect_recog_bit_insert_pattern (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2842 gassign
*bf_stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
2843 if (!bf_stmt
|| gimple_assign_rhs_code (bf_stmt
) != BIT_INSERT_EXPR
)
2846 tree container
= gimple_assign_rhs1 (bf_stmt
);
2847 tree value
= gimple_assign_rhs2 (bf_stmt
);
2848 tree shift
= gimple_assign_rhs3 (bf_stmt
);
2850 tree bf_type
= TREE_TYPE (value
);
2851 tree container_type
= TREE_TYPE (container
);
2853 if (!INTEGRAL_TYPE_P (container_type
)
2854 || !tree_fits_uhwi_p (TYPE_SIZE (container_type
)))
2857 gimple
*pattern_stmt
;
2859 vect_unpromoted_value unprom
;
2860 unprom
.set_op (value
, vect_internal_def
);
2861 value
= vect_convert_input (vinfo
, stmt_info
, container_type
, &unprom
,
2862 get_vectype_for_scalar_type (vinfo
,
2865 unsigned HOST_WIDE_INT mask_width
= TYPE_PRECISION (bf_type
);
2866 unsigned HOST_WIDE_INT prec
= tree_to_uhwi (TYPE_SIZE (container_type
));
2867 unsigned HOST_WIDE_INT shift_n
= tree_to_uhwi (shift
);
2868 if (BYTES_BIG_ENDIAN
)
2870 shift_n
= prec
- shift_n
- mask_width
;
2871 shift
= build_int_cst (TREE_TYPE (shift
), shift_n
);
2874 if (!useless_type_conversion_p (TREE_TYPE (value
), container_type
))
2877 gimple_build_assign (vect_recog_temp_ssa_var (container_type
),
2879 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
);
2880 value
= gimple_get_lhs (pattern_stmt
);
2883 /* Shift VALUE into place. */
2884 tree shifted
= value
;
2887 gimple_seq stmts
= NULL
;
2889 = gimple_build (&stmts
, LSHIFT_EXPR
, container_type
, value
, shift
);
2890 if (!gimple_seq_empty_p (stmts
))
2891 append_pattern_def_seq (vinfo
, stmt_info
,
2892 gimple_seq_first_stmt (stmts
));
2896 = wide_int_to_tree (container_type
,
2897 wi::shifted_mask (shift_n
, mask_width
, false, prec
));
2899 /* Clear bits we don't want to write back from SHIFTED. */
2900 gimple_seq stmts
= NULL
;
2901 tree masked
= gimple_build (&stmts
, BIT_AND_EXPR
, container_type
, shifted
,
2903 if (!gimple_seq_empty_p (stmts
))
2905 pattern_stmt
= gimple_seq_first_stmt (stmts
);
2906 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
);
2909 /* Mask off the bits in the container that we are to write to. */
2910 mask_t
= wide_int_to_tree (container_type
,
2911 wi::shifted_mask (shift_n
, mask_width
, true, prec
));
2912 tree cleared
= vect_recog_temp_ssa_var (container_type
);
2913 pattern_stmt
= gimple_build_assign (cleared
, BIT_AND_EXPR
, container
, mask_t
);
2914 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
);
2916 /* Write MASKED into CLEARED. */
2918 = gimple_build_assign (vect_recog_temp_ssa_var (container_type
),
2919 BIT_IOR_EXPR
, cleared
, masked
);
2921 *type_out
= STMT_VINFO_VECTYPE (stmt_info
);
2922 vect_pattern_detected ("bit_insert pattern", stmt_info
->stmt
);
2924 return pattern_stmt
;
2928 /* Recognize cases in which an operation is performed in one type WTYPE
2929 but could be done more efficiently in a narrower type NTYPE. For example,
2932 ATYPE a; // narrower than NTYPE
2933 BTYPE b; // narrower than NTYPE
2934 WTYPE aw = (WTYPE) a;
2935 WTYPE bw = (WTYPE) b;
2936 WTYPE res = aw + bw; // only uses of aw and bw
2938 then it would be more efficient to do:
2940 NTYPE an = (NTYPE) a;
2941 NTYPE bn = (NTYPE) b;
2942 NTYPE resn = an + bn;
2943 WTYPE res = (WTYPE) resn;
2945 Other situations include things like:
2947 ATYPE a; // NTYPE or narrower
2948 WTYPE aw = (WTYPE) a;
2951 when only "(NTYPE) res" is significant. In that case it's more efficient
2952 to truncate "b" and do the operation on NTYPE instead:
2954 NTYPE an = (NTYPE) a;
2955 NTYPE bn = (NTYPE) b; // truncation
2956 NTYPE resn = an + bn;
2957 WTYPE res = (WTYPE) resn;
2959 All users of "res" should then use "resn" instead, making the final
2960 statement dead (not marked as relevant). The final statement is still
2961 needed to maintain the type correctness of the IR.
2963 vect_determine_precisions has already determined the minimum
2964 precison of the operation and the minimum precision required
2965 by users of the result. */
2968 vect_recog_over_widening_pattern (vec_info
*vinfo
,
2969 stmt_vec_info last_stmt_info
, tree
*type_out
)
2971 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
2975 /* See whether we have found that this operation can be done on a
2976 narrower type without changing its semantics. */
2977 unsigned int new_precision
= last_stmt_info
->operation_precision
;
2981 tree lhs
= gimple_assign_lhs (last_stmt
);
2982 tree type
= TREE_TYPE (lhs
);
2983 tree_code code
= gimple_assign_rhs_code (last_stmt
);
2985 /* Punt for reductions where we don't handle the type conversions. */
2986 if (STMT_VINFO_DEF_TYPE (last_stmt_info
) == vect_reduction_def
)
2989 /* Keep the first operand of a COND_EXPR as-is: only the other two
2990 operands are interesting. */
2991 unsigned int first_op
= (code
== COND_EXPR
? 2 : 1);
2993 /* Check the operands. */
2994 unsigned int nops
= gimple_num_ops (last_stmt
) - first_op
;
2995 auto_vec
<vect_unpromoted_value
, 3> unprom (nops
);
2996 unprom
.quick_grow_cleared (nops
);
2997 unsigned int min_precision
= 0;
2998 bool single_use_p
= false;
2999 for (unsigned int i
= 0; i
< nops
; ++i
)
3001 tree op
= gimple_op (last_stmt
, first_op
+ i
);
3002 if (TREE_CODE (op
) == INTEGER_CST
)
3003 unprom
[i
].set_op (op
, vect_constant_def
);
3004 else if (TREE_CODE (op
) == SSA_NAME
)
3006 bool op_single_use_p
= true;
3007 if (!vect_look_through_possible_promotion (vinfo
, op
, &unprom
[i
],
3012 (1) N bits of the result are needed;
3013 (2) all inputs are widened from M<N bits; and
3014 (3) one operand OP is a single-use SSA name
3016 we can shift the M->N widening from OP to the output
3017 without changing the number or type of extensions involved.
3018 This then reduces the number of copies of STMT_INFO.
3020 If instead of (3) more than one operand is a single-use SSA name,
3021 shifting the extension to the output is even more of a win.
3025 (1) N bits of the result are needed;
3026 (2) one operand OP2 is widened from M2<N bits;
3027 (3) another operand OP1 is widened from M1<M2 bits; and
3028 (4) both OP1 and OP2 are single-use
3030 the choice is between:
3032 (a) truncating OP2 to M1, doing the operation on M1,
3033 and then widening the result to N
3035 (b) widening OP1 to M2, doing the operation on M2, and then
3036 widening the result to N
3038 Both shift the M2->N widening of the inputs to the output.
3039 (a) additionally shifts the M1->M2 widening to the output;
3040 it requires fewer copies of STMT_INFO but requires an extra
3043 Which is better will depend on the complexity and cost of
3044 STMT_INFO, which is hard to predict at this stage. However,
3045 a clear tie-breaker in favor of (b) is the fact that the
3046 truncation in (a) increases the length of the operation chain.
3048 If instead of (4) only one of OP1 or OP2 is single-use,
3049 (b) is still a win over doing the operation in N bits:
3050 it still shifts the M2->N widening on the single-use operand
3051 to the output and reduces the number of STMT_INFO copies.
3053 If neither operand is single-use then operating on fewer than
3054 N bits might lead to more extensions overall. Whether it does
3055 or not depends on global information about the vectorization
3056 region, and whether that's a good trade-off would again
3057 depend on the complexity and cost of the statements involved,
3058 as well as things like register pressure that are not normally
3059 modelled at this stage. We therefore ignore these cases
3060 and just optimize the clear single-use wins above.
3062 Thus we take the maximum precision of the unpromoted operands
3063 and record whether any operand is single-use. */
3064 if (unprom
[i
].dt
== vect_internal_def
)
3066 min_precision
= MAX (min_precision
,
3067 TYPE_PRECISION (unprom
[i
].type
));
3068 single_use_p
|= op_single_use_p
;
3075 /* Although the operation could be done in operation_precision, we have
3076 to balance that against introducing extra truncations or extensions.
3077 Calculate the minimum precision that can be handled efficiently.
3079 The loop above determined that the operation could be handled
3080 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
3081 extension from the inputs to the output without introducing more
3082 instructions, and would reduce the number of instructions required
3083 for STMT_INFO itself.
3085 vect_determine_precisions has also determined that the result only
3086 needs min_output_precision bits. Truncating by a factor of N times
3087 requires a tree of N - 1 instructions, so if TYPE is N times wider
3088 than min_output_precision, doing the operation in TYPE and truncating
3089 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
3092 - truncating the input to a unary operation and doing the operation
3093 in the new type requires at most N - 1 + 1 = N instructions per
3096 - doing the same for a binary operation requires at most
3097 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
3099 Both unary and binary operations require fewer instructions than
3100 this if the operands were extended from a suitable truncated form.
3101 Thus there is usually nothing to lose by doing operations in
3102 min_output_precision bits, but there can be something to gain. */
3104 min_precision
= last_stmt_info
->min_output_precision
;
3106 min_precision
= MIN (min_precision
, last_stmt_info
->min_output_precision
);
3108 /* Apply the minimum efficient precision we just calculated. */
3109 if (new_precision
< min_precision
)
3110 new_precision
= min_precision
;
3111 new_precision
= vect_element_precision (new_precision
);
3112 if (new_precision
>= TYPE_PRECISION (type
))
3115 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt
);
3117 *type_out
= get_vectype_for_scalar_type (vinfo
, type
);
3121 /* We've found a viable pattern. Get the new type of the operation. */
3122 bool unsigned_p
= (last_stmt_info
->operation_sign
== UNSIGNED
);
3123 tree new_type
= build_nonstandard_integer_type (new_precision
, unsigned_p
);
3125 /* If we're truncating an operation, we need to make sure that we
3126 don't introduce new undefined overflow. The codes tested here are
3127 a subset of those accepted by vect_truncatable_operation_p. */
3128 tree op_type
= new_type
;
3129 if (TYPE_OVERFLOW_UNDEFINED (new_type
)
3130 && (code
== PLUS_EXPR
|| code
== MINUS_EXPR
|| code
== MULT_EXPR
))
3131 op_type
= build_nonstandard_integer_type (new_precision
, true);
3133 /* We specifically don't check here whether the target supports the
3134 new operation, since it might be something that a later pattern
3135 wants to rewrite anyway. If targets have a minimum element size
3136 for some optabs, we should pattern-match smaller ops to larger ops
3137 where beneficial. */
3138 tree new_vectype
= get_vectype_for_scalar_type (vinfo
, new_type
);
3139 tree op_vectype
= get_vectype_for_scalar_type (vinfo
, op_type
);
3140 if (!new_vectype
|| !op_vectype
)
3143 if (dump_enabled_p ())
3144 dump_printf_loc (MSG_NOTE
, vect_location
, "demoting %T to %T\n",
3147 /* Calculate the rhs operands for an operation on OP_TYPE. */
3149 for (unsigned int i
= 1; i
< first_op
; ++i
)
3150 ops
[i
- 1] = gimple_op (last_stmt
, i
);
3151 /* For right shifts limit the shift operand. */
3152 vect_convert_inputs (vinfo
, last_stmt_info
, nops
, &ops
[first_op
- 1],
3153 op_type
, &unprom
[0], op_vectype
);
3155 /* Limit shift operands. */
3156 if (code
== RSHIFT_EXPR
)
3158 wide_int min_value
, max_value
;
3159 if (TREE_CODE (ops
[1]) == INTEGER_CST
)
3160 ops
[1] = wide_int_to_tree (op_type
,
3161 wi::umin (wi::to_wide (ops
[1]),
3162 new_precision
- 1));
3163 else if (!vect_get_range_info (ops
[1], &min_value
, &max_value
)
3164 || wi::ge_p (max_value
, new_precision
, TYPE_SIGN (op_type
)))
3166 /* ??? Note the following bad for SLP as that only supports
3167 same argument widened shifts and it un-CSEs same arguments. */
3168 tree new_var
= vect_recog_temp_ssa_var (op_type
, NULL
);
3169 gimple
*pattern_stmt
3170 = gimple_build_assign (new_var
, MIN_EXPR
, ops
[1],
3171 build_int_cst (op_type
, new_precision
- 1));
3172 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
3173 if (ops
[1] == unprom
[1].op
&& unprom
[1].dt
== vect_external_def
)
3175 if (edge e
= vect_get_external_def_edge (vinfo
, ops
[1]))
3178 = gsi_insert_on_edge_immediate (e
, pattern_stmt
);
3179 gcc_assert (!new_bb
);
3185 append_pattern_def_seq (vinfo
, last_stmt_info
, pattern_stmt
,
3191 /* Use the operation to produce a result of type OP_TYPE. */
3192 tree new_var
= vect_recog_temp_ssa_var (op_type
, NULL
);
3193 gimple
*pattern_stmt
= gimple_build_assign (new_var
, code
,
3194 ops
[0], ops
[1], ops
[2]);
3195 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
3197 if (dump_enabled_p ())
3198 dump_printf_loc (MSG_NOTE
, vect_location
,
3199 "created pattern stmt: %G", pattern_stmt
);
3201 /* Convert back to the original signedness, if OP_TYPE is different
3203 if (op_type
!= new_type
)
3204 pattern_stmt
= vect_convert_output (vinfo
, last_stmt_info
, new_type
,
3205 pattern_stmt
, op_vectype
);
3207 /* Promote the result to the original type. */
3208 pattern_stmt
= vect_convert_output (vinfo
, last_stmt_info
, type
,
3209 pattern_stmt
, new_vectype
);
3211 return pattern_stmt
;
3214 /* Recognize the following patterns:
3216 ATYPE a; // narrower than TYPE
3217 BTYPE b; // narrower than TYPE
3219 1) Multiply high with scaling
3220 TYPE res = ((TYPE) a * (TYPE) b) >> c;
3221 Here, c is bitsize (TYPE) / 2 - 1.
3223 2) ... or also with rounding
3224 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
3225 Here, d is bitsize (TYPE) / 2 - 2.
3227 3) Normal multiply high
3228 TYPE res = ((TYPE) a * (TYPE) b) >> e;
3229 Here, e is bitsize (TYPE) / 2.
3231 where only the bottom half of res is used. */
3234 vect_recog_mulhs_pattern (vec_info
*vinfo
,
3235 stmt_vec_info last_stmt_info
, tree
*type_out
)
3237 /* Check for a right shift. */
3238 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
3240 || gimple_assign_rhs_code (last_stmt
) != RSHIFT_EXPR
)
3243 /* Check that the shift result is wider than the users of the
3244 result need (i.e. that narrowing would be a natural choice). */
3245 tree lhs_type
= TREE_TYPE (gimple_assign_lhs (last_stmt
));
3246 unsigned int target_precision
3247 = vect_element_precision (last_stmt_info
->min_output_precision
);
3248 if (!INTEGRAL_TYPE_P (lhs_type
)
3249 || target_precision
>= TYPE_PRECISION (lhs_type
))
3252 /* Look through any change in sign on the outer shift input. */
3253 vect_unpromoted_value unprom_rshift_input
;
3254 tree rshift_input
= vect_look_through_possible_promotion
3255 (vinfo
, gimple_assign_rhs1 (last_stmt
), &unprom_rshift_input
);
3257 || TYPE_PRECISION (TREE_TYPE (rshift_input
))
3258 != TYPE_PRECISION (lhs_type
))
3261 /* Get the definition of the shift input. */
3262 stmt_vec_info rshift_input_stmt_info
3263 = vect_get_internal_def (vinfo
, rshift_input
);
3264 if (!rshift_input_stmt_info
)
3266 gassign
*rshift_input_stmt
3267 = dyn_cast
<gassign
*> (rshift_input_stmt_info
->stmt
);
3268 if (!rshift_input_stmt
)
3271 stmt_vec_info mulh_stmt_info
;
3273 bool rounding_p
= false;
3275 /* Check for the presence of the rounding term. */
3276 if (gimple_assign_rhs_code (rshift_input_stmt
) == PLUS_EXPR
)
3278 /* Check that the outer shift was by 1. */
3279 if (!integer_onep (gimple_assign_rhs2 (last_stmt
)))
3282 /* Check that the second operand of the PLUS_EXPR is 1. */
3283 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt
)))
3286 /* Look through any change in sign on the addition input. */
3287 vect_unpromoted_value unprom_plus_input
;
3288 tree plus_input
= vect_look_through_possible_promotion
3289 (vinfo
, gimple_assign_rhs1 (rshift_input_stmt
), &unprom_plus_input
);
3291 || TYPE_PRECISION (TREE_TYPE (plus_input
))
3292 != TYPE_PRECISION (TREE_TYPE (rshift_input
)))
3295 /* Get the definition of the multiply-high-scale part. */
3296 stmt_vec_info plus_input_stmt_info
3297 = vect_get_internal_def (vinfo
, plus_input
);
3298 if (!plus_input_stmt_info
)
3300 gassign
*plus_input_stmt
3301 = dyn_cast
<gassign
*> (plus_input_stmt_info
->stmt
);
3302 if (!plus_input_stmt
3303 || gimple_assign_rhs_code (plus_input_stmt
) != RSHIFT_EXPR
)
3306 /* Look through any change in sign on the scaling input. */
3307 vect_unpromoted_value unprom_scale_input
;
3308 tree scale_input
= vect_look_through_possible_promotion
3309 (vinfo
, gimple_assign_rhs1 (plus_input_stmt
), &unprom_scale_input
);
3311 || TYPE_PRECISION (TREE_TYPE (scale_input
))
3312 != TYPE_PRECISION (TREE_TYPE (plus_input
)))
3315 /* Get the definition of the multiply-high part. */
3316 mulh_stmt_info
= vect_get_internal_def (vinfo
, scale_input
);
3317 if (!mulh_stmt_info
)
3320 /* Get the scaling term. */
3321 scale_term
= gimple_assign_rhs2 (plus_input_stmt
);
3326 mulh_stmt_info
= rshift_input_stmt_info
;
3327 scale_term
= gimple_assign_rhs2 (last_stmt
);
3330 /* Check that the scaling factor is constant. */
3331 if (TREE_CODE (scale_term
) != INTEGER_CST
)
3334 /* Check whether the scaling input term can be seen as two widened
3335 inputs multiplied together. */
3336 vect_unpromoted_value unprom_mult
[2];
3339 = vect_widened_op_tree (vinfo
, mulh_stmt_info
, MULT_EXPR
, WIDEN_MULT_EXPR
,
3340 false, 2, unprom_mult
, &new_type
);
3344 /* Adjust output precision. */
3345 if (TYPE_PRECISION (new_type
) < target_precision
)
3346 new_type
= build_nonstandard_integer_type
3347 (target_precision
, TYPE_UNSIGNED (new_type
));
3349 unsigned mult_precision
= TYPE_PRECISION (new_type
);
3351 /* Check that the scaling factor is expected. Instead of
3352 target_precision, we should use the one that we actually
3353 use for internal function. */
3356 /* Check pattern 2). */
3357 if (wi::to_widest (scale_term
) + mult_precision
+ 2
3358 != TYPE_PRECISION (lhs_type
))
3365 /* Check for pattern 1). */
3366 if (wi::to_widest (scale_term
) + mult_precision
+ 1
3367 == TYPE_PRECISION (lhs_type
))
3369 /* Check for pattern 3). */
3370 else if (wi::to_widest (scale_term
) + mult_precision
3371 == TYPE_PRECISION (lhs_type
))
3377 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt
);
3379 /* Check for target support. */
3380 tree new_vectype
= get_vectype_for_scalar_type (vinfo
, new_type
);
3382 || !direct_internal_fn_supported_p
3383 (ifn
, new_vectype
, OPTIMIZE_FOR_SPEED
))
3386 /* The IR requires a valid vector type for the cast result, even though
3387 it's likely to be discarded. */
3388 *type_out
= get_vectype_for_scalar_type (vinfo
, lhs_type
);
3392 /* Generate the IFN_MULHRS call. */
3393 tree new_var
= vect_recog_temp_ssa_var (new_type
, NULL
);
3395 vect_convert_inputs (vinfo
, last_stmt_info
, 2, new_ops
, new_type
,
3396 unprom_mult
, new_vectype
);
3398 = gimple_build_call_internal (ifn
, 2, new_ops
[0], new_ops
[1]);
3399 gimple_call_set_lhs (mulhrs_stmt
, new_var
);
3400 gimple_set_location (mulhrs_stmt
, gimple_location (last_stmt
));
3402 if (dump_enabled_p ())
3403 dump_printf_loc (MSG_NOTE
, vect_location
,
3404 "created pattern stmt: %G", (gimple
*) mulhrs_stmt
);
3406 return vect_convert_output (vinfo
, last_stmt_info
, lhs_type
,
3407 mulhrs_stmt
, new_vectype
);
3410 /* Recognize the patterns:
3412 ATYPE a; // narrower than TYPE
3413 BTYPE b; // narrower than TYPE
3414 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
3415 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
3417 where only the bottom half of avg is used. Try to transform them into:
3419 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
3420 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
3424 TYPE avg = (TYPE) avg';
3426 where NTYPE is no wider than half of TYPE. Since only the bottom half
3427 of avg is used, all or part of the cast of avg' should become redundant.
3429 If there is no target support available, generate code to distribute rshift
3430 over plus and add a carry. */
3433 vect_recog_average_pattern (vec_info
*vinfo
,
3434 stmt_vec_info last_stmt_info
, tree
*type_out
)
3436 /* Check for a shift right by one bit. */
3437 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
3439 || gimple_assign_rhs_code (last_stmt
) != RSHIFT_EXPR
3440 || !integer_onep (gimple_assign_rhs2 (last_stmt
)))
3443 /* Check that the shift result is wider than the users of the
3444 result need (i.e. that narrowing would be a natural choice). */
3445 tree lhs
= gimple_assign_lhs (last_stmt
);
3446 tree type
= TREE_TYPE (lhs
);
3447 unsigned int target_precision
3448 = vect_element_precision (last_stmt_info
->min_output_precision
);
3449 if (!INTEGRAL_TYPE_P (type
) || target_precision
>= TYPE_PRECISION (type
))
3452 /* Look through any change in sign on the shift input. */
3453 tree rshift_rhs
= gimple_assign_rhs1 (last_stmt
);
3454 vect_unpromoted_value unprom_plus
;
3455 rshift_rhs
= vect_look_through_possible_promotion (vinfo
, rshift_rhs
,
3458 || TYPE_PRECISION (TREE_TYPE (rshift_rhs
)) != TYPE_PRECISION (type
))
3461 /* Get the definition of the shift input. */
3462 stmt_vec_info plus_stmt_info
= vect_get_internal_def (vinfo
, rshift_rhs
);
3463 if (!plus_stmt_info
)
3466 /* Check whether the shift input can be seen as a tree of additions on
3467 2 or 3 widened inputs.
3469 Note that the pattern should be a win even if the result of one or
3470 more additions is reused elsewhere: if the pattern matches, we'd be
3471 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
3472 internal_fn ifn
= IFN_AVG_FLOOR
;
3473 vect_unpromoted_value unprom
[3];
3475 unsigned int nops
= vect_widened_op_tree (vinfo
, plus_stmt_info
, PLUS_EXPR
,
3476 IFN_VEC_WIDEN_PLUS
, false, 3,
3482 /* Check that one operand is 1. */
3484 for (i
= 0; i
< 3; ++i
)
3485 if (integer_onep (unprom
[i
].op
))
3489 /* Throw away the 1 operand and keep the other two. */
3491 unprom
[i
] = unprom
[2];
3495 vect_pattern_detected ("vect_recog_average_pattern", last_stmt
);
3499 (a) the operation can be viewed as:
3501 TYPE widened0 = (TYPE) UNPROM[0];
3502 TYPE widened1 = (TYPE) UNPROM[1];
3503 TYPE tmp1 = widened0 + widened1 {+ 1};
3504 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
3506 (b) the first two statements are equivalent to:
3508 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
3509 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
3511 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
3514 (d) all the operations can be performed correctly at twice the width of
3515 NEW_TYPE, due to the nature of the average operation; and
3517 (e) users of the result of the right shift need only TARGET_PRECISION
3518 bits, where TARGET_PRECISION is no more than half of TYPE's
3521 Under these circumstances, the only situation in which NEW_TYPE
3522 could be narrower than TARGET_PRECISION is if widened0, widened1
3523 and an addition result are all used more than once. Thus we can
3524 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
3525 as "free", whereas widening the result of the average instruction
3526 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
3527 therefore better not to go narrower than TARGET_PRECISION. */
3528 if (TYPE_PRECISION (new_type
) < target_precision
)
3529 new_type
= build_nonstandard_integer_type (target_precision
,
3530 TYPE_UNSIGNED (new_type
));
3532 /* Check for target support. */
3533 tree new_vectype
= get_vectype_for_scalar_type (vinfo
, new_type
);
3537 bool fallback_p
= false;
3539 if (direct_internal_fn_supported_p (ifn
, new_vectype
, OPTIMIZE_FOR_SPEED
))
3541 else if (TYPE_UNSIGNED (new_type
)
3542 && optab_for_tree_code (RSHIFT_EXPR
, new_vectype
, optab_scalar
)
3543 && optab_for_tree_code (PLUS_EXPR
, new_vectype
, optab_default
)
3544 && optab_for_tree_code (BIT_IOR_EXPR
, new_vectype
, optab_default
)
3545 && optab_for_tree_code (BIT_AND_EXPR
, new_vectype
, optab_default
))
3550 /* The IR requires a valid vector type for the cast result, even though
3551 it's likely to be discarded. */
3552 *type_out
= get_vectype_for_scalar_type (vinfo
, type
);
3556 tree new_var
= vect_recog_temp_ssa_var (new_type
, NULL
);
3558 vect_convert_inputs (vinfo
, last_stmt_info
, 2, new_ops
, new_type
,
3559 unprom
, new_vectype
);
3563 /* As a fallback, generate code for following sequence:
3565 shifted_op0 = new_ops[0] >> 1;
3566 shifted_op1 = new_ops[1] >> 1;
3567 sum_of_shifted = shifted_op0 + shifted_op1;
3568 unmasked_carry = new_ops[0] and/or new_ops[1];
3569 carry = unmasked_carry & 1;
3570 new_var = sum_of_shifted + carry;
3573 tree one_cst
= build_one_cst (new_type
);
3576 tree shifted_op0
= vect_recog_temp_ssa_var (new_type
, NULL
);
3577 g
= gimple_build_assign (shifted_op0
, RSHIFT_EXPR
, new_ops
[0], one_cst
);
3578 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
3580 tree shifted_op1
= vect_recog_temp_ssa_var (new_type
, NULL
);
3581 g
= gimple_build_assign (shifted_op1
, RSHIFT_EXPR
, new_ops
[1], one_cst
);
3582 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
3584 tree sum_of_shifted
= vect_recog_temp_ssa_var (new_type
, NULL
);
3585 g
= gimple_build_assign (sum_of_shifted
, PLUS_EXPR
,
3586 shifted_op0
, shifted_op1
);
3587 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
3589 tree unmasked_carry
= vect_recog_temp_ssa_var (new_type
, NULL
);
3590 tree_code c
= (ifn
== IFN_AVG_CEIL
) ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
3591 g
= gimple_build_assign (unmasked_carry
, c
, new_ops
[0], new_ops
[1]);
3592 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
3594 tree carry
= vect_recog_temp_ssa_var (new_type
, NULL
);
3595 g
= gimple_build_assign (carry
, BIT_AND_EXPR
, unmasked_carry
, one_cst
);
3596 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
3598 g
= gimple_build_assign (new_var
, PLUS_EXPR
, sum_of_shifted
, carry
);
3599 return vect_convert_output (vinfo
, last_stmt_info
, type
, g
, new_vectype
);
3602 /* Generate the IFN_AVG* call. */
3603 gcall
*average_stmt
= gimple_build_call_internal (ifn
, 2, new_ops
[0],
3605 gimple_call_set_lhs (average_stmt
, new_var
);
3606 gimple_set_location (average_stmt
, gimple_location (last_stmt
));
3608 if (dump_enabled_p ())
3609 dump_printf_loc (MSG_NOTE
, vect_location
,
3610 "created pattern stmt: %G", (gimple
*) average_stmt
);
3612 return vect_convert_output (vinfo
, last_stmt_info
,
3613 type
, average_stmt
, new_vectype
);
3616 /* Recognize cases in which the input to a cast is wider than its
3617 output, and the input is fed by a widening operation. Fold this
3618 by removing the unnecessary intermediate widening. E.g.:
3621 unsigned int b = (unsigned int) a;
3622 unsigned short c = (unsigned short) b;
3626 unsigned short c = (unsigned short) a;
3628 Although this is rare in input IR, it is an expected side-effect
3629 of the over-widening pattern above.
3631 This is beneficial also for integer-to-float conversions, if the
3632 widened integer has more bits than the float, and if the unwidened
3636 vect_recog_cast_forwprop_pattern (vec_info
*vinfo
,
3637 stmt_vec_info last_stmt_info
, tree
*type_out
)
3639 /* Check for a cast, including an integer-to-float conversion. */
3640 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
3643 tree_code code
= gimple_assign_rhs_code (last_stmt
);
3644 if (!CONVERT_EXPR_CODE_P (code
) && code
!= FLOAT_EXPR
)
3647 /* Make sure that the rhs is a scalar with a natural bitsize. */
3648 tree lhs
= gimple_assign_lhs (last_stmt
);
3651 tree lhs_type
= TREE_TYPE (lhs
);
3652 scalar_mode lhs_mode
;
3653 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type
)
3654 || !is_a
<scalar_mode
> (TYPE_MODE (lhs_type
), &lhs_mode
))
3657 /* Check for a narrowing operation (from a vector point of view). */
3658 tree rhs
= gimple_assign_rhs1 (last_stmt
);
3659 tree rhs_type
= TREE_TYPE (rhs
);
3660 if (!INTEGRAL_TYPE_P (rhs_type
)
3661 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type
)
3662 || TYPE_PRECISION (rhs_type
) <= GET_MODE_BITSIZE (lhs_mode
))
3665 /* Try to find an unpromoted input. */
3666 vect_unpromoted_value unprom
;
3667 if (!vect_look_through_possible_promotion (vinfo
, rhs
, &unprom
)
3668 || TYPE_PRECISION (unprom
.type
) >= TYPE_PRECISION (rhs_type
))
3671 /* If the bits above RHS_TYPE matter, make sure that they're the
3672 same when extending from UNPROM as they are when extending from RHS. */
3673 if (!INTEGRAL_TYPE_P (lhs_type
)
3674 && TYPE_SIGN (rhs_type
) != TYPE_SIGN (unprom
.type
))
3677 /* We can get the same result by casting UNPROM directly, to avoid
3678 the unnecessary widening and narrowing. */
3679 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt
);
3681 *type_out
= get_vectype_for_scalar_type (vinfo
, lhs_type
);
3685 tree new_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
3686 gimple
*pattern_stmt
= gimple_build_assign (new_var
, code
, unprom
.op
);
3687 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
3689 return pattern_stmt
;
3692 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
3693 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
3696 vect_recog_widen_shift_pattern (vec_info
*vinfo
,
3697 stmt_vec_info last_stmt_info
, tree
*type_out
)
3699 return vect_recog_widen_op_pattern (vinfo
, last_stmt_info
, type_out
,
3700 LSHIFT_EXPR
, WIDEN_LSHIFT_EXPR
, true,
3701 "vect_recog_widen_shift_pattern");
3704 /* Detect a rotate pattern wouldn't be otherwise vectorized:
3708 S0 a_t = b_t r<< c_t;
3712 * STMT_VINFO: The stmt from which the pattern search begins,
3713 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
3717 S2 e_t = d_t & (B - 1);
3718 S3 f_t = b_t << c_t;
3719 S4 g_t = b_t >> e_t;
3722 where B is element bitsize of type.
3726 * TYPE_OUT: The type of the output of this pattern.
3728 * Return value: A new stmt that will be used to replace the rotate
3732 vect_recog_rotate_pattern (vec_info
*vinfo
,
3733 stmt_vec_info stmt_vinfo
, tree
*type_out
)
3735 gimple
*last_stmt
= stmt_vinfo
->stmt
;
3736 tree oprnd0
, oprnd1
, lhs
, var
, var1
, var2
, vectype
, type
, stype
, def
, def2
;
3737 gimple
*pattern_stmt
, *def_stmt
;
3738 enum tree_code rhs_code
;
3739 enum vect_def_type dt
;
3740 optab optab1
, optab2
;
3741 edge ext_def
= NULL
;
3742 bool bswap16_p
= false;
3744 if (is_gimple_assign (last_stmt
))
3746 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3756 lhs
= gimple_assign_lhs (last_stmt
);
3757 oprnd0
= gimple_assign_rhs1 (last_stmt
);
3758 type
= TREE_TYPE (oprnd0
);
3759 oprnd1
= gimple_assign_rhs2 (last_stmt
);
3761 else if (gimple_call_builtin_p (last_stmt
, BUILT_IN_BSWAP16
))
3763 /* __builtin_bswap16 (x) is another form of x r>> 8.
3764 The vectorizer has bswap support, but only if the argument isn't
3766 lhs
= gimple_call_lhs (last_stmt
);
3767 oprnd0
= gimple_call_arg (last_stmt
, 0);
3768 type
= TREE_TYPE (oprnd0
);
3770 || TYPE_PRECISION (TREE_TYPE (lhs
)) != 16
3771 || TYPE_PRECISION (type
) <= 16
3772 || TREE_CODE (oprnd0
) != SSA_NAME
3773 || BITS_PER_UNIT
!= 8)
3776 stmt_vec_info def_stmt_info
;
3777 if (!vect_is_simple_use (oprnd0
, vinfo
, &dt
, &def_stmt_info
, &def_stmt
))
3780 if (dt
!= vect_internal_def
)
3783 if (gimple_assign_cast_p (def_stmt
))
3785 def
= gimple_assign_rhs1 (def_stmt
);
3786 if (INTEGRAL_TYPE_P (TREE_TYPE (def
))
3787 && TYPE_PRECISION (TREE_TYPE (def
)) == 16)
3791 type
= TREE_TYPE (lhs
);
3792 vectype
= get_vectype_for_scalar_type (vinfo
, type
);
3793 if (vectype
== NULL_TREE
)
3796 if (tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype
))
3798 /* The encoding uses one stepped pattern for each byte in the
3800 vec_perm_builder
elts (TYPE_VECTOR_SUBPARTS (char_vectype
), 2, 3);
3801 for (unsigned i
= 0; i
< 3; ++i
)
3802 for (unsigned j
= 0; j
< 2; ++j
)
3803 elts
.quick_push ((i
+ 1) * 2 - j
- 1);
3805 vec_perm_indices
indices (elts
, 1,
3806 TYPE_VECTOR_SUBPARTS (char_vectype
));
3807 machine_mode vmode
= TYPE_MODE (char_vectype
);
3808 if (can_vec_perm_const_p (vmode
, vmode
, indices
))
3810 /* vectorizable_bswap can handle the __builtin_bswap16 if we
3811 undo the argument promotion. */
3812 if (!useless_type_conversion_p (type
, TREE_TYPE (oprnd0
)))
3814 def
= vect_recog_temp_ssa_var (type
, NULL
);
3815 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd0
);
3816 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3820 /* Pattern detected. */
3821 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt
);
3823 *type_out
= vectype
;
3825 /* Pattern supported. Create a stmt to be used to replace the
3826 pattern, with the unpromoted argument. */
3827 var
= vect_recog_temp_ssa_var (type
, NULL
);
3828 pattern_stmt
= gimple_build_call (gimple_call_fndecl (last_stmt
),
3830 gimple_call_set_lhs (pattern_stmt
, var
);
3831 gimple_call_set_fntype (as_a
<gcall
*> (pattern_stmt
),
3832 gimple_call_fntype (last_stmt
));
3833 return pattern_stmt
;
3837 oprnd1
= build_int_cst (integer_type_node
, 8);
3838 rhs_code
= LROTATE_EXPR
;
3844 if (TREE_CODE (oprnd0
) != SSA_NAME
3845 || !INTEGRAL_TYPE_P (type
)
3846 || TYPE_PRECISION (TREE_TYPE (lhs
)) != TYPE_PRECISION (type
))
3849 stmt_vec_info def_stmt_info
;
3850 if (!vect_is_simple_use (oprnd1
, vinfo
, &dt
, &def_stmt_info
, &def_stmt
))
3853 if (dt
!= vect_internal_def
3854 && dt
!= vect_constant_def
3855 && dt
!= vect_external_def
)
3858 vectype
= get_vectype_for_scalar_type (vinfo
, type
);
3859 if (vectype
== NULL_TREE
)
3862 /* If vector/vector or vector/scalar rotate is supported by the target,
3863 don't do anything here. */
3864 optab1
= optab_for_tree_code (rhs_code
, vectype
, optab_vector
);
3866 && optab_handler (optab1
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
3871 if (!useless_type_conversion_p (type
, TREE_TYPE (oprnd0
)))
3873 def
= vect_recog_temp_ssa_var (type
, NULL
);
3874 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd0
);
3875 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3879 /* Pattern detected. */
3880 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt
);
3882 *type_out
= vectype
;
3884 /* Pattern supported. Create a stmt to be used to replace the
3886 var
= vect_recog_temp_ssa_var (type
, NULL
);
3887 pattern_stmt
= gimple_build_assign (var
, LROTATE_EXPR
, oprnd0
,
3889 return pattern_stmt
;
3894 if (is_a
<bb_vec_info
> (vinfo
) || dt
!= vect_internal_def
)
3896 optab2
= optab_for_tree_code (rhs_code
, vectype
, optab_scalar
);
3898 && optab_handler (optab2
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
3902 tree utype
= unsigned_type_for (type
);
3903 tree uvectype
= get_vectype_for_scalar_type (vinfo
, utype
);
3907 /* If vector/vector or vector/scalar shifts aren't supported by the target,
3908 don't do anything here either. */
3909 optab1
= optab_for_tree_code (LSHIFT_EXPR
, uvectype
, optab_vector
);
3910 optab2
= optab_for_tree_code (RSHIFT_EXPR
, uvectype
, optab_vector
);
3912 || optab_handler (optab1
, TYPE_MODE (uvectype
)) == CODE_FOR_nothing
3914 || optab_handler (optab2
, TYPE_MODE (uvectype
)) == CODE_FOR_nothing
)
3916 if (! is_a
<bb_vec_info
> (vinfo
) && dt
== vect_internal_def
)
3918 optab1
= optab_for_tree_code (LSHIFT_EXPR
, uvectype
, optab_scalar
);
3919 optab2
= optab_for_tree_code (RSHIFT_EXPR
, uvectype
, optab_scalar
);
3921 || optab_handler (optab1
, TYPE_MODE (uvectype
)) == CODE_FOR_nothing
3923 || optab_handler (optab2
, TYPE_MODE (uvectype
)) == CODE_FOR_nothing
)
3927 *type_out
= vectype
;
3929 if (!useless_type_conversion_p (utype
, TREE_TYPE (oprnd0
)))
3931 def
= vect_recog_temp_ssa_var (utype
, NULL
);
3932 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd0
);
3933 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, uvectype
);
3937 if (dt
== vect_external_def
&& TREE_CODE (oprnd1
) == SSA_NAME
)
3938 ext_def
= vect_get_external_def_edge (vinfo
, oprnd1
);
3941 scalar_int_mode mode
= SCALAR_INT_TYPE_MODE (utype
);
3942 if (dt
!= vect_internal_def
|| TYPE_MODE (TREE_TYPE (oprnd1
)) == mode
)
3944 else if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
3946 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
3947 if (TYPE_MODE (TREE_TYPE (rhs1
)) == mode
3948 && TYPE_PRECISION (TREE_TYPE (rhs1
))
3949 == TYPE_PRECISION (type
))
3953 if (def
== NULL_TREE
)
3955 def
= vect_recog_temp_ssa_var (utype
, NULL
);
3956 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
3957 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, uvectype
);
3959 stype
= TREE_TYPE (def
);
3961 if (TREE_CODE (def
) == INTEGER_CST
)
3963 if (!tree_fits_uhwi_p (def
)
3964 || tree_to_uhwi (def
) >= GET_MODE_PRECISION (mode
)
3965 || integer_zerop (def
))
3967 def2
= build_int_cst (stype
,
3968 GET_MODE_PRECISION (mode
) - tree_to_uhwi (def
));
3972 tree vecstype
= get_vectype_for_scalar_type (vinfo
, stype
);
3974 if (vecstype
== NULL_TREE
)
3976 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
3977 def_stmt
= gimple_build_assign (def2
, NEGATE_EXPR
, def
);
3981 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
3982 gcc_assert (!new_bb
);
3985 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecstype
);
3987 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
3988 tree mask
= build_int_cst (stype
, GET_MODE_PRECISION (mode
) - 1);
3989 def_stmt
= gimple_build_assign (def2
, BIT_AND_EXPR
,
3990 gimple_assign_lhs (def_stmt
), mask
);
3994 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
3995 gcc_assert (!new_bb
);
3998 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecstype
);
4001 var1
= vect_recog_temp_ssa_var (utype
, NULL
);
4002 def_stmt
= gimple_build_assign (var1
, rhs_code
== LROTATE_EXPR
4003 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
4005 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, uvectype
);
4007 var2
= vect_recog_temp_ssa_var (utype
, NULL
);
4008 def_stmt
= gimple_build_assign (var2
, rhs_code
== LROTATE_EXPR
4009 ? RSHIFT_EXPR
: LSHIFT_EXPR
,
4011 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, uvectype
);
4013 /* Pattern detected. */
4014 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt
);
4016 /* Pattern supported. Create a stmt to be used to replace the pattern. */
4017 var
= vect_recog_temp_ssa_var (utype
, NULL
);
4018 pattern_stmt
= gimple_build_assign (var
, BIT_IOR_EXPR
, var1
, var2
);
4020 if (!useless_type_conversion_p (type
, utype
))
4022 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, uvectype
);
4023 tree result
= vect_recog_temp_ssa_var (type
, NULL
);
4024 pattern_stmt
= gimple_build_assign (result
, NOP_EXPR
, var
);
4026 return pattern_stmt
;
4029 /* Detect a vector by vector shift pattern that wouldn't be otherwise
4037 S3 res_T = b_T op a_t;
4039 where type 'TYPE' is a type with different size than 'type',
4040 and op is <<, >> or rotate.
4045 TYPE b_T, c_T, res_T;
4048 S1 a_t = (type) c_T;
4050 S3 res_T = b_T op a_t;
4054 * STMT_VINFO: The stmt from which the pattern search begins,
4055 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
4056 with a shift/rotate which has same type on both operands, in the
4057 second case just b_T op c_T, in the first case with added cast
4058 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
4062 * TYPE_OUT: The type of the output of this pattern.
4064 * Return value: A new stmt that will be used to replace the shift/rotate
4068 vect_recog_vector_vector_shift_pattern (vec_info
*vinfo
,
4069 stmt_vec_info stmt_vinfo
,
4072 gimple
*last_stmt
= stmt_vinfo
->stmt
;
4073 tree oprnd0
, oprnd1
, lhs
, var
;
4074 gimple
*pattern_stmt
;
4075 enum tree_code rhs_code
;
4077 if (!is_gimple_assign (last_stmt
))
4080 rhs_code
= gimple_assign_rhs_code (last_stmt
);
4092 lhs
= gimple_assign_lhs (last_stmt
);
4093 oprnd0
= gimple_assign_rhs1 (last_stmt
);
4094 oprnd1
= gimple_assign_rhs2 (last_stmt
);
4095 if (TREE_CODE (oprnd0
) != SSA_NAME
4096 || TREE_CODE (oprnd1
) != SSA_NAME
4097 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
4098 || !INTEGRAL_TYPE_P (TREE_TYPE (oprnd0
))
4099 || !type_has_mode_precision_p (TREE_TYPE (oprnd1
))
4100 || TYPE_PRECISION (TREE_TYPE (lhs
))
4101 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
4104 stmt_vec_info def_vinfo
= vect_get_internal_def (vinfo
, oprnd1
);
4108 *type_out
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (oprnd0
));
4109 if (*type_out
== NULL_TREE
)
4112 tree def
= NULL_TREE
;
4113 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_vinfo
->stmt
);
4114 if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
4116 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
4117 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
4118 && TYPE_PRECISION (TREE_TYPE (rhs1
))
4119 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
4121 if (TYPE_PRECISION (TREE_TYPE (oprnd1
))
4122 >= TYPE_PRECISION (TREE_TYPE (rhs1
)))
4127 = build_low_bits_mask (TREE_TYPE (rhs1
),
4128 TYPE_PRECISION (TREE_TYPE (oprnd1
)));
4129 def
= vect_recog_temp_ssa_var (TREE_TYPE (rhs1
), NULL
);
4130 def_stmt
= gimple_build_assign (def
, BIT_AND_EXPR
, rhs1
, mask
);
4131 tree vecstype
= get_vectype_for_scalar_type (vinfo
,
4133 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecstype
);
4138 if (def
== NULL_TREE
)
4140 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
4141 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
4142 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4145 /* Pattern detected. */
4146 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt
);
4148 /* Pattern supported. Create a stmt to be used to replace the pattern. */
4149 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
4150 pattern_stmt
= gimple_build_assign (var
, rhs_code
, oprnd0
, def
);
4152 return pattern_stmt
;
4155 /* Return true iff the target has a vector optab implementing the operation
4156 CODE on type VECTYPE. */
4159 target_has_vecop_for_code (tree_code code
, tree vectype
)
4161 optab voptab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4163 && optab_handler (voptab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
;
4166 /* Verify that the target has optabs of VECTYPE to perform all the steps
4167 needed by the multiplication-by-immediate synthesis algorithm described by
4168 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
4169 present. Return true iff the target supports all the steps. */
4172 target_supports_mult_synth_alg (struct algorithm
*alg
, mult_variant var
,
4173 tree vectype
, bool synth_shift_p
)
4175 if (alg
->op
[0] != alg_zero
&& alg
->op
[0] != alg_m
)
4178 bool supports_vminus
= target_has_vecop_for_code (MINUS_EXPR
, vectype
);
4179 bool supports_vplus
= target_has_vecop_for_code (PLUS_EXPR
, vectype
);
4181 if (var
== negate_variant
4182 && !target_has_vecop_for_code (NEGATE_EXPR
, vectype
))
4185 /* If we must synthesize shifts with additions make sure that vector
4186 addition is available. */
4187 if ((var
== add_variant
|| synth_shift_p
) && !supports_vplus
)
4190 for (int i
= 1; i
< alg
->ops
; i
++)
4198 case alg_add_factor
:
4199 if (!supports_vplus
)
4204 case alg_sub_factor
:
4205 if (!supports_vminus
)
4211 case alg_impossible
:
4221 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
4222 putting the final result in DEST. Append all statements but the last into
4223 VINFO. Return the last statement. */
4226 synth_lshift_by_additions (vec_info
*vinfo
,
4227 tree dest
, tree op
, HOST_WIDE_INT amnt
,
4228 stmt_vec_info stmt_info
)
4231 tree itype
= TREE_TYPE (op
);
4233 gcc_assert (amnt
>= 0);
4234 for (i
= 0; i
< amnt
; i
++)
4236 tree tmp_var
= (i
< amnt
- 1) ? vect_recog_temp_ssa_var (itype
, NULL
)
4239 = gimple_build_assign (tmp_var
, PLUS_EXPR
, prev_res
, prev_res
);
4242 append_pattern_def_seq (vinfo
, stmt_info
, stmt
);
4250 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
4251 CODE to operands OP1 and OP2, creating a new temporary SSA var in
4252 the process if necessary. Append the resulting assignment statements
4253 to the sequence in STMT_VINFO. Return the SSA variable that holds the
4254 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
4255 left shifts using additions. */
4258 apply_binop_and_append_stmt (vec_info
*vinfo
,
4259 tree_code code
, tree op1
, tree op2
,
4260 stmt_vec_info stmt_vinfo
, bool synth_shift_p
)
4262 if (integer_zerop (op2
)
4263 && (code
== LSHIFT_EXPR
4264 || code
== PLUS_EXPR
))
4266 gcc_assert (TREE_CODE (op1
) == SSA_NAME
);
4271 tree itype
= TREE_TYPE (op1
);
4272 tree tmp_var
= vect_recog_temp_ssa_var (itype
, NULL
);
4274 if (code
== LSHIFT_EXPR
4277 stmt
= synth_lshift_by_additions (vinfo
, tmp_var
, op1
,
4278 TREE_INT_CST_LOW (op2
), stmt_vinfo
);
4279 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
4283 stmt
= gimple_build_assign (tmp_var
, code
, op1
, op2
);
4284 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
4288 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
4289 and simple arithmetic operations to be vectorized. Record the statements
4290 produced in STMT_VINFO and return the last statement in the sequence or
4291 NULL if it's not possible to synthesize such a multiplication.
4292 This function mirrors the behavior of expand_mult_const in expmed.cc but
4293 works on tree-ssa form. */
4296 vect_synth_mult_by_constant (vec_info
*vinfo
, tree op
, tree val
,
4297 stmt_vec_info stmt_vinfo
)
4299 tree itype
= TREE_TYPE (op
);
4300 machine_mode mode
= TYPE_MODE (itype
);
4301 struct algorithm alg
;
4302 mult_variant variant
;
4303 if (!tree_fits_shwi_p (val
))
4306 /* Multiplication synthesis by shifts, adds and subs can introduce
4307 signed overflow where the original operation didn't. Perform the
4308 operations on an unsigned type and cast back to avoid this.
4309 In the future we may want to relax this for synthesis algorithms
4310 that we can prove do not cause unexpected overflow. */
4311 bool cast_to_unsigned_p
= !TYPE_OVERFLOW_WRAPS (itype
);
4313 tree multtype
= cast_to_unsigned_p
? unsigned_type_for (itype
) : itype
;
4314 tree vectype
= get_vectype_for_scalar_type (vinfo
, multtype
);
4318 /* Targets that don't support vector shifts but support vector additions
4319 can synthesize shifts that way. */
4320 bool synth_shift_p
= !vect_supportable_shift (vinfo
, LSHIFT_EXPR
, multtype
);
4322 HOST_WIDE_INT hwval
= tree_to_shwi (val
);
4323 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
4324 The vectorizer's benefit analysis will decide whether it's beneficial
4326 bool possible
= choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype
))
4327 ? TYPE_MODE (vectype
) : mode
,
4328 hwval
, &alg
, &variant
, MAX_COST
);
4332 if (!target_supports_mult_synth_alg (&alg
, variant
, vectype
, synth_shift_p
))
4337 /* Clear out the sequence of statements so we can populate it below. */
4338 gimple
*stmt
= NULL
;
4340 if (cast_to_unsigned_p
)
4342 tree tmp_op
= vect_recog_temp_ssa_var (multtype
, NULL
);
4343 stmt
= gimple_build_assign (tmp_op
, CONVERT_EXPR
, op
);
4344 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
4348 if (alg
.op
[0] == alg_zero
)
4349 accumulator
= build_int_cst (multtype
, 0);
4353 bool needs_fixup
= (variant
== negate_variant
)
4354 || (variant
== add_variant
);
4356 for (int i
= 1; i
< alg
.ops
; i
++)
4358 tree shft_log
= build_int_cst (multtype
, alg
.log
[i
]);
4359 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
4360 tree tmp_var
= NULL_TREE
;
4367 = synth_lshift_by_additions (vinfo
, accum_tmp
, accumulator
,
4368 alg
.log
[i
], stmt_vinfo
);
4370 stmt
= gimple_build_assign (accum_tmp
, LSHIFT_EXPR
, accumulator
,
4375 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, op
, shft_log
,
4376 stmt_vinfo
, synth_shift_p
);
4377 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
,
4381 tmp_var
= apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, op
,
4382 shft_log
, stmt_vinfo
,
4384 /* In some algorithms the first step involves zeroing the
4385 accumulator. If subtracting from such an accumulator
4386 just emit the negation directly. */
4387 if (integer_zerop (accumulator
))
4388 stmt
= gimple_build_assign (accum_tmp
, NEGATE_EXPR
, tmp_var
);
4390 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, accumulator
,
4395 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, accumulator
,
4396 shft_log
, stmt_vinfo
, synth_shift_p
);
4397 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, tmp_var
, op
);
4401 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, accumulator
,
4402 shft_log
, stmt_vinfo
, synth_shift_p
);
4403 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, tmp_var
, op
);
4405 case alg_add_factor
:
4407 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, accumulator
,
4408 shft_log
, stmt_vinfo
, synth_shift_p
);
4409 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
,
4412 case alg_sub_factor
:
4414 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, accumulator
,
4415 shft_log
, stmt_vinfo
, synth_shift_p
);
4416 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, tmp_var
,
4422 /* We don't want to append the last stmt in the sequence to stmt_vinfo
4423 but rather return it directly. */
4425 if ((i
< alg
.ops
- 1) || needs_fixup
|| cast_to_unsigned_p
)
4426 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
4427 accumulator
= accum_tmp
;
4429 if (variant
== negate_variant
)
4431 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
4432 stmt
= gimple_build_assign (accum_tmp
, NEGATE_EXPR
, accumulator
);
4433 accumulator
= accum_tmp
;
4434 if (cast_to_unsigned_p
)
4435 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
4437 else if (variant
== add_variant
)
4439 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
4440 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
, op
);
4441 accumulator
= accum_tmp
;
4442 if (cast_to_unsigned_p
)
4443 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
4445 /* Move back to a signed if needed. */
4446 if (cast_to_unsigned_p
)
4448 tree accum_tmp
= vect_recog_temp_ssa_var (itype
, NULL
);
4449 stmt
= gimple_build_assign (accum_tmp
, CONVERT_EXPR
, accumulator
);
4455 /* Detect multiplication by constant and convert it into a sequence of
4456 shifts and additions, subtractions, negations. We reuse the
4457 choose_mult_variant algorithms from expmed.cc
4461 STMT_VINFO: The stmt from which the pattern search begins,
4466 * TYPE_OUT: The type of the output of this pattern.
4468 * Return value: A new stmt that will be used to replace
4469 the multiplication. */
4472 vect_recog_mult_pattern (vec_info
*vinfo
,
4473 stmt_vec_info stmt_vinfo
, tree
*type_out
)
4475 gimple
*last_stmt
= stmt_vinfo
->stmt
;
4476 tree oprnd0
, oprnd1
, vectype
, itype
;
4477 gimple
*pattern_stmt
;
4479 if (!is_gimple_assign (last_stmt
))
4482 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
4485 oprnd0
= gimple_assign_rhs1 (last_stmt
);
4486 oprnd1
= gimple_assign_rhs2 (last_stmt
);
4487 itype
= TREE_TYPE (oprnd0
);
4489 if (TREE_CODE (oprnd0
) != SSA_NAME
4490 || TREE_CODE (oprnd1
) != INTEGER_CST
4491 || !INTEGRAL_TYPE_P (itype
)
4492 || !type_has_mode_precision_p (itype
))
4495 vectype
= get_vectype_for_scalar_type (vinfo
, itype
);
4496 if (vectype
== NULL_TREE
)
4499 /* If the target can handle vectorized multiplication natively,
4500 don't attempt to optimize this. */
4501 optab mul_optab
= optab_for_tree_code (MULT_EXPR
, vectype
, optab_default
);
4502 if (mul_optab
!= unknown_optab
)
4504 machine_mode vec_mode
= TYPE_MODE (vectype
);
4505 int icode
= (int) optab_handler (mul_optab
, vec_mode
);
4506 if (icode
!= CODE_FOR_nothing
)
4510 pattern_stmt
= vect_synth_mult_by_constant (vinfo
,
4511 oprnd0
, oprnd1
, stmt_vinfo
);
4515 /* Pattern detected. */
4516 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt
);
4518 *type_out
= vectype
;
4520 return pattern_stmt
;
4523 /* Detect a signed division by a constant that wouldn't be
4524 otherwise vectorized:
4530 where type 'type' is an integral type and N is a constant.
4532 Similarly handle modulo by a constant:
4538 * STMT_VINFO: The stmt from which the pattern search begins,
4539 i.e. the division stmt. S1 is replaced by if N is a power
4540 of two constant and type is signed:
4541 S3 y_t = b_t < 0 ? N - 1 : 0;
4543 S1' a_t = x_t >> log2 (N);
4545 S4 is replaced if N is a power of two constant and
4546 type is signed by (where *_T temporaries have unsigned type):
4547 S9 y_T = b_t < 0 ? -1U : 0U;
4548 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
4549 S7 z_t = (type) z_T;
4551 S5 x_t = w_t & (N - 1);
4552 S4' a_t = x_t - z_t;
4556 * TYPE_OUT: The type of the output of this pattern.
4558 * Return value: A new stmt that will be used to replace the division
4559 S1 or modulo S4 stmt. */
4562 vect_recog_divmod_pattern (vec_info
*vinfo
,
4563 stmt_vec_info stmt_vinfo
, tree
*type_out
)
4565 gimple
*last_stmt
= stmt_vinfo
->stmt
;
4566 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
4567 gimple
*pattern_stmt
, *def_stmt
;
4568 enum tree_code rhs_code
;
4571 int dummy_int
, prec
;
4573 if (!is_gimple_assign (last_stmt
))
4576 rhs_code
= gimple_assign_rhs_code (last_stmt
);
4579 case TRUNC_DIV_EXPR
:
4580 case EXACT_DIV_EXPR
:
4581 case TRUNC_MOD_EXPR
:
4587 oprnd0
= gimple_assign_rhs1 (last_stmt
);
4588 oprnd1
= gimple_assign_rhs2 (last_stmt
);
4589 itype
= TREE_TYPE (oprnd0
);
4590 if (TREE_CODE (oprnd0
) != SSA_NAME
4591 || TREE_CODE (oprnd1
) != INTEGER_CST
4592 || TREE_CODE (itype
) != INTEGER_TYPE
4593 || !type_has_mode_precision_p (itype
))
4596 scalar_int_mode itype_mode
= SCALAR_INT_TYPE_MODE (itype
);
4597 vectype
= get_vectype_for_scalar_type (vinfo
, itype
);
4598 if (vectype
== NULL_TREE
)
4601 if (optimize_bb_for_size_p (gimple_bb (last_stmt
)))
4603 /* If the target can handle vectorized division or modulo natively,
4604 don't attempt to optimize this, since native division is likely
4605 to give smaller code. */
4606 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
4607 if (optab
!= unknown_optab
)
4609 machine_mode vec_mode
= TYPE_MODE (vectype
);
4610 int icode
= (int) optab_handler (optab
, vec_mode
);
4611 if (icode
!= CODE_FOR_nothing
)
4616 prec
= TYPE_PRECISION (itype
);
4617 if (integer_pow2p (oprnd1
))
4619 if (TYPE_UNSIGNED (itype
) || tree_int_cst_sgn (oprnd1
) != 1)
4622 /* Pattern detected. */
4623 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt
);
4625 *type_out
= vectype
;
4627 /* Check if the target supports this internal function. */
4628 internal_fn ifn
= IFN_DIV_POW2
;
4629 if (direct_internal_fn_supported_p (ifn
, vectype
, OPTIMIZE_FOR_SPEED
))
4631 tree shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
4633 tree var_div
= vect_recog_temp_ssa_var (itype
, NULL
);
4634 gimple
*div_stmt
= gimple_build_call_internal (ifn
, 2, oprnd0
, shift
);
4635 gimple_call_set_lhs (div_stmt
, var_div
);
4637 if (rhs_code
== TRUNC_MOD_EXPR
)
4639 append_pattern_def_seq (vinfo
, stmt_vinfo
, div_stmt
);
4641 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4642 LSHIFT_EXPR
, var_div
, shift
);
4643 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4645 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4647 gimple_assign_lhs (def_stmt
));
4650 pattern_stmt
= div_stmt
;
4651 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
4653 return pattern_stmt
;
4656 cond
= build2 (LT_EXPR
, boolean_type_node
, oprnd0
,
4657 build_int_cst (itype
, 0));
4658 if (rhs_code
== TRUNC_DIV_EXPR
4659 || rhs_code
== EXACT_DIV_EXPR
)
4661 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
4664 = gimple_build_assign (var
, COND_EXPR
, cond
,
4665 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
4666 build_int_cst (itype
, 1)),
4667 build_int_cst (itype
, 0));
4668 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4669 var
= vect_recog_temp_ssa_var (itype
, NULL
);
4671 = gimple_build_assign (var
, PLUS_EXPR
, oprnd0
,
4672 gimple_assign_lhs (def_stmt
));
4673 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4675 shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
4677 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4678 RSHIFT_EXPR
, var
, shift
);
4683 if (compare_tree_int (oprnd1
, 2) == 0)
4685 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
4686 def_stmt
= gimple_build_assign (signmask
, COND_EXPR
, cond
,
4687 build_int_cst (itype
, 1),
4688 build_int_cst (itype
, 0));
4689 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4694 = build_nonstandard_integer_type (prec
, 1);
4695 tree vecutype
= get_vectype_for_scalar_type (vinfo
, utype
);
4697 = build_int_cst (utype
, GET_MODE_BITSIZE (itype_mode
)
4698 - tree_log2 (oprnd1
));
4699 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
4701 def_stmt
= gimple_build_assign (var
, COND_EXPR
, cond
,
4702 build_int_cst (utype
, -1),
4703 build_int_cst (utype
, 0));
4704 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecutype
);
4705 var
= vect_recog_temp_ssa_var (utype
, NULL
);
4706 def_stmt
= gimple_build_assign (var
, RSHIFT_EXPR
,
4707 gimple_assign_lhs (def_stmt
),
4709 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecutype
);
4710 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
4712 = gimple_build_assign (signmask
, NOP_EXPR
, var
);
4713 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4716 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4717 PLUS_EXPR
, oprnd0
, signmask
);
4718 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4720 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4721 BIT_AND_EXPR
, gimple_assign_lhs (def_stmt
),
4722 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
4723 build_int_cst (itype
, 1)));
4724 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4727 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4728 MINUS_EXPR
, gimple_assign_lhs (def_stmt
),
4732 return pattern_stmt
;
4735 if ((cst
= uniform_integer_cst_p (oprnd1
))
4736 && TYPE_UNSIGNED (itype
)
4737 && rhs_code
== TRUNC_DIV_EXPR
4739 && targetm
.vectorize
.preferred_div_as_shifts_over_mult (vectype
))
4741 /* We can use the relationship:
4743 x // N == ((x+N+2) // (N+1) + x) // (N+1) for 0 <= x < N(N+3)
4745 to optimize cases where N+1 is a power of 2, and where // (N+1)
4746 is therefore a shift right. When operating in modes that are
4747 multiples of a byte in size, there are two cases:
4749 (1) N(N+3) is not representable, in which case the question
4750 becomes whether the replacement expression overflows.
4751 It is enough to test that x+N+2 does not overflow,
4752 i.e. that x < MAX-(N+1).
4754 (2) N(N+3) is representable, in which case it is the (only)
4755 bound that we need to check.
4757 ??? For now we just handle the case where // (N+1) is a shift
4758 right by half the precision, since some architectures can
4759 optimize the associated addition and shift combinations
4760 into single instructions. */
4762 auto wcst
= wi::to_wide (cst
);
4763 int pow
= wi::exact_log2 (wcst
+ 1);
4764 if (pow
== prec
/ 2)
4766 gimple
*stmt
= SSA_NAME_DEF_STMT (oprnd0
);
4768 gimple_ranger ranger
;
4771 /* Check that no overflow will occur. If we don't have range
4772 information we can't perform the optimization. */
4774 if (ranger
.range_of_expr (r
, oprnd0
, stmt
) && !r
.undefined_p ())
4776 wide_int max
= r
.upper_bound ();
4777 wide_int one
= wi::shwi (1, prec
);
4778 wide_int adder
= wi::add (one
, wi::lshift (one
, pow
));
4779 wi::overflow_type ovf
;
4780 wi::add (max
, adder
, UNSIGNED
, &ovf
);
4781 if (ovf
== wi::OVF_NONE
)
4783 *type_out
= vectype
;
4784 tree tadder
= wide_int_to_tree (itype
, adder
);
4785 tree rshift
= wide_int_to_tree (itype
, pow
);
4787 tree new_lhs1
= vect_recog_temp_ssa_var (itype
, NULL
);
4789 = gimple_build_assign (new_lhs1
, PLUS_EXPR
, oprnd0
, tadder
);
4790 append_pattern_def_seq (vinfo
, stmt_vinfo
, patt1
, vectype
);
4792 tree new_lhs2
= vect_recog_temp_ssa_var (itype
, NULL
);
4793 patt1
= gimple_build_assign (new_lhs2
, RSHIFT_EXPR
, new_lhs1
,
4795 append_pattern_def_seq (vinfo
, stmt_vinfo
, patt1
, vectype
);
4797 tree new_lhs3
= vect_recog_temp_ssa_var (itype
, NULL
);
4798 patt1
= gimple_build_assign (new_lhs3
, PLUS_EXPR
, new_lhs2
,
4800 append_pattern_def_seq (vinfo
, stmt_vinfo
, patt1
, vectype
);
4802 tree new_lhs4
= vect_recog_temp_ssa_var (itype
, NULL
);
4803 pattern_stmt
= gimple_build_assign (new_lhs4
, RSHIFT_EXPR
,
4806 return pattern_stmt
;
4812 if (prec
> HOST_BITS_PER_WIDE_INT
4813 || integer_zerop (oprnd1
))
4816 if (!can_mult_highpart_p (TYPE_MODE (vectype
), TYPE_UNSIGNED (itype
)))
4819 if (TYPE_UNSIGNED (itype
))
4821 unsigned HOST_WIDE_INT mh
, ml
;
4822 int pre_shift
, post_shift
;
4823 unsigned HOST_WIDE_INT d
= (TREE_INT_CST_LOW (oprnd1
)
4824 & GET_MODE_MASK (itype_mode
));
4825 tree t1
, t2
, t3
, t4
;
4827 if (d
>= (HOST_WIDE_INT_1U
<< (prec
- 1)))
4828 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
4831 /* Find a suitable multiplier and right shift count
4832 instead of multiplying with D. */
4833 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
4835 /* If the suggested multiplier is more than SIZE bits, we can do better
4836 for even divisors, using an initial right shift. */
4837 if (mh
!= 0 && (d
& 1) == 0)
4839 pre_shift
= ctz_or_zero (d
);
4840 mh
= choose_multiplier (d
>> pre_shift
, prec
, prec
- pre_shift
,
4841 &ml
, &post_shift
, &dummy_int
);
4849 if (post_shift
- 1 >= prec
)
4852 /* t1 = oprnd0 h* ml;
4856 q = t4 >> (post_shift - 1); */
4857 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
4858 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
4859 build_int_cst (itype
, ml
));
4860 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4862 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
4864 = gimple_build_assign (t2
, MINUS_EXPR
, oprnd0
, t1
);
4865 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4867 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
4869 = gimple_build_assign (t3
, RSHIFT_EXPR
, t2
, integer_one_node
);
4870 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4872 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
4874 = gimple_build_assign (t4
, PLUS_EXPR
, t1
, t3
);
4876 if (post_shift
!= 1)
4878 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4880 q
= vect_recog_temp_ssa_var (itype
, NULL
);
4882 = gimple_build_assign (q
, RSHIFT_EXPR
, t4
,
4883 build_int_cst (itype
, post_shift
- 1));
4888 pattern_stmt
= def_stmt
;
4893 if (pre_shift
>= prec
|| post_shift
>= prec
)
4896 /* t1 = oprnd0 >> pre_shift;
4898 q = t2 >> post_shift; */
4901 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
4903 = gimple_build_assign (t1
, RSHIFT_EXPR
, oprnd0
,
4904 build_int_cst (NULL
, pre_shift
));
4905 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4910 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
4911 def_stmt
= gimple_build_assign (t2
, MULT_HIGHPART_EXPR
, t1
,
4912 build_int_cst (itype
, ml
));
4916 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4918 q
= vect_recog_temp_ssa_var (itype
, NULL
);
4920 = gimple_build_assign (q
, RSHIFT_EXPR
, t2
,
4921 build_int_cst (itype
, post_shift
));
4926 pattern_stmt
= def_stmt
;
4931 unsigned HOST_WIDE_INT ml
;
4933 HOST_WIDE_INT d
= TREE_INT_CST_LOW (oprnd1
);
4934 unsigned HOST_WIDE_INT abs_d
;
4936 tree t1
, t2
, t3
, t4
;
4938 /* Give up for -1. */
4942 /* Since d might be INT_MIN, we have to cast to
4943 unsigned HOST_WIDE_INT before negating to avoid
4944 undefined signed overflow. */
4946 ? (unsigned HOST_WIDE_INT
) d
4947 : - (unsigned HOST_WIDE_INT
) d
);
4949 /* n rem d = n rem -d */
4950 if (rhs_code
== TRUNC_MOD_EXPR
&& d
< 0)
4953 oprnd1
= build_int_cst (itype
, abs_d
);
4955 if (HOST_BITS_PER_WIDE_INT
>= prec
4956 && abs_d
== HOST_WIDE_INT_1U
<< (prec
- 1))
4957 /* This case is not handled correctly below. */
4960 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
, &post_shift
, &dummy_int
);
4961 if (ml
>= HOST_WIDE_INT_1U
<< (prec
- 1))
4964 ml
|= HOST_WIDE_INT_M1U
<< (prec
- 1);
4966 if (post_shift
>= prec
)
4969 /* t1 = oprnd0 h* ml; */
4970 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
4971 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
4972 build_int_cst (itype
, ml
));
4976 /* t2 = t1 + oprnd0; */
4977 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4978 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
4979 def_stmt
= gimple_build_assign (t2
, PLUS_EXPR
, t1
, oprnd0
);
4986 /* t3 = t2 >> post_shift; */
4987 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4988 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
4989 def_stmt
= gimple_build_assign (t3
, RSHIFT_EXPR
, t2
,
4990 build_int_cst (itype
, post_shift
));
4997 get_range_query (cfun
)->range_of_expr (r
, oprnd0
);
4998 if (!r
.varying_p () && !r
.undefined_p ())
5000 if (!wi::neg_p (r
.lower_bound (), TYPE_SIGN (itype
)))
5002 else if (wi::neg_p (r
.upper_bound (), TYPE_SIGN (itype
)))
5006 if (msb
== 0 && d
>= 0)
5010 pattern_stmt
= def_stmt
;
5014 /* t4 = oprnd0 >> (prec - 1);
5015 or if we know from VRP that oprnd0 >= 0
5017 or if we know from VRP that oprnd0 < 0
5019 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
5020 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
5022 def_stmt
= gimple_build_assign (t4
, INTEGER_CST
,
5023 build_int_cst (itype
, msb
));
5025 def_stmt
= gimple_build_assign (t4
, RSHIFT_EXPR
, oprnd0
,
5026 build_int_cst (itype
, prec
- 1));
5027 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
5029 /* q = t3 - t4; or q = t4 - t3; */
5030 q
= vect_recog_temp_ssa_var (itype
, NULL
);
5031 pattern_stmt
= gimple_build_assign (q
, MINUS_EXPR
, d
< 0 ? t4
: t3
,
5036 if (rhs_code
== TRUNC_MOD_EXPR
)
5040 /* We divided. Now finish by:
5043 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
);
5045 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
5046 def_stmt
= gimple_build_assign (t1
, MULT_EXPR
, q
, oprnd1
);
5047 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
5049 r
= vect_recog_temp_ssa_var (itype
, NULL
);
5050 pattern_stmt
= gimple_build_assign (r
, MINUS_EXPR
, oprnd0
, t1
);
5053 /* Pattern detected. */
5054 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt
);
5056 *type_out
= vectype
;
5057 return pattern_stmt
;
5060 /* Function vect_recog_mixed_size_cond_pattern
5062 Try to find the following pattern:
5067 S1 a_T = x_t CMP y_t ? b_T : c_T;
5069 where type 'TYPE' is an integral type which has different size
5070 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
5071 than 'type', the constants need to fit into an integer type
5072 with the same width as 'type') or results of conversion from 'type'.
5076 * STMT_VINFO: The stmt from which the pattern search begins.
5080 * TYPE_OUT: The type of the output of this pattern.
5082 * Return value: A new stmt that will be used to replace the pattern.
5083 Additionally a def_stmt is added.
5085 a_it = x_t CMP y_t ? b_it : c_it;
5086 a_T = (TYPE) a_it; */
5089 vect_recog_mixed_size_cond_pattern (vec_info
*vinfo
,
5090 stmt_vec_info stmt_vinfo
, tree
*type_out
)
5092 gimple
*last_stmt
= stmt_vinfo
->stmt
;
5093 tree cond_expr
, then_clause
, else_clause
;
5094 tree type
, vectype
, comp_vectype
, itype
= NULL_TREE
, vecitype
;
5095 gimple
*pattern_stmt
, *def_stmt
;
5096 tree orig_type0
= NULL_TREE
, orig_type1
= NULL_TREE
;
5097 gimple
*def_stmt0
= NULL
, *def_stmt1
= NULL
;
5099 tree comp_scalar_type
;
5101 if (!is_gimple_assign (last_stmt
)
5102 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
5103 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
5106 cond_expr
= gimple_assign_rhs1 (last_stmt
);
5107 then_clause
= gimple_assign_rhs2 (last_stmt
);
5108 else_clause
= gimple_assign_rhs3 (last_stmt
);
5110 if (!COMPARISON_CLASS_P (cond_expr
))
5113 comp_scalar_type
= TREE_TYPE (TREE_OPERAND (cond_expr
, 0));
5114 comp_vectype
= get_vectype_for_scalar_type (vinfo
, comp_scalar_type
);
5115 if (comp_vectype
== NULL_TREE
)
5118 type
= TREE_TYPE (gimple_assign_lhs (last_stmt
));
5119 if (types_compatible_p (type
, comp_scalar_type
)
5120 || ((TREE_CODE (then_clause
) != INTEGER_CST
5121 || TREE_CODE (else_clause
) != INTEGER_CST
)
5122 && !INTEGRAL_TYPE_P (comp_scalar_type
))
5123 || !INTEGRAL_TYPE_P (type
))
5126 if ((TREE_CODE (then_clause
) != INTEGER_CST
5127 && !type_conversion_p (vinfo
, then_clause
, false,
5128 &orig_type0
, &def_stmt0
, &promotion
))
5129 || (TREE_CODE (else_clause
) != INTEGER_CST
5130 && !type_conversion_p (vinfo
, else_clause
, false,
5131 &orig_type1
, &def_stmt1
, &promotion
)))
5134 if (orig_type0
&& orig_type1
5135 && !types_compatible_p (orig_type0
, orig_type1
))
5140 if (!types_compatible_p (orig_type0
, comp_scalar_type
))
5142 then_clause
= gimple_assign_rhs1 (def_stmt0
);
5148 if (!types_compatible_p (orig_type1
, comp_scalar_type
))
5150 else_clause
= gimple_assign_rhs1 (def_stmt1
);
5155 HOST_WIDE_INT cmp_mode_size
5156 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype
));
5158 scalar_int_mode type_mode
= SCALAR_INT_TYPE_MODE (type
);
5159 if (GET_MODE_BITSIZE (type_mode
) == cmp_mode_size
)
5162 vectype
= get_vectype_for_scalar_type (vinfo
, type
);
5163 if (vectype
== NULL_TREE
)
5166 if (expand_vec_cond_expr_p (vectype
, comp_vectype
, TREE_CODE (cond_expr
)))
5169 if (itype
== NULL_TREE
)
5170 itype
= build_nonstandard_integer_type (cmp_mode_size
,
5171 TYPE_UNSIGNED (type
));
5173 if (itype
== NULL_TREE
5174 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype
)) != cmp_mode_size
)
5177 vecitype
= get_vectype_for_scalar_type (vinfo
, itype
);
5178 if (vecitype
== NULL_TREE
)
5181 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
, TREE_CODE (cond_expr
)))
5184 if (GET_MODE_BITSIZE (type_mode
) > cmp_mode_size
)
5186 if ((TREE_CODE (then_clause
) == INTEGER_CST
5187 && !int_fits_type_p (then_clause
, itype
))
5188 || (TREE_CODE (else_clause
) == INTEGER_CST
5189 && !int_fits_type_p (else_clause
, itype
)))
5193 def_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
5194 COND_EXPR
, unshare_expr (cond_expr
),
5195 fold_convert (itype
, then_clause
),
5196 fold_convert (itype
, else_clause
));
5197 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
5198 NOP_EXPR
, gimple_assign_lhs (def_stmt
));
5200 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecitype
);
5201 *type_out
= vectype
;
5203 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt
);
5205 return pattern_stmt
;
5209 /* Helper function of vect_recog_bool_pattern. Called recursively, return
5210 true if bool VAR can and should be optimized that way. Assume it shouldn't
5211 in case it's a result of a comparison which can be directly vectorized into
5212 a vector comparison. Fills in STMTS with all stmts visited during the
5216 check_bool_pattern (tree var
, vec_info
*vinfo
, hash_set
<gimple
*> &stmts
)
5219 enum tree_code rhs_code
;
5221 stmt_vec_info def_stmt_info
= vect_get_internal_def (vinfo
, var
);
5225 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_stmt_info
->stmt
);
5229 if (stmts
.contains (def_stmt
))
5232 rhs1
= gimple_assign_rhs1 (def_stmt
);
5233 rhs_code
= gimple_assign_rhs_code (def_stmt
);
5237 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
5242 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
5244 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
5249 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
5256 if (! check_bool_pattern (rhs1
, vinfo
, stmts
)
5257 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt
), vinfo
, stmts
))
5262 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
5264 tree vecitype
, comp_vectype
;
5266 /* If the comparison can throw, then is_gimple_condexpr will be
5267 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
5268 if (stmt_could_throw_p (cfun
, def_stmt
))
5271 comp_vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs1
));
5272 if (comp_vectype
== NULL_TREE
)
5275 tree mask_type
= get_mask_type_for_scalar_type (vinfo
,
5278 && expand_vec_cmp_expr_p (comp_vectype
, mask_type
, rhs_code
))
5281 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
5283 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
5285 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
5286 vecitype
= get_vectype_for_scalar_type (vinfo
, itype
);
5287 if (vecitype
== NULL_TREE
)
5291 vecitype
= comp_vectype
;
5292 if (! expand_vec_cond_expr_p (vecitype
, comp_vectype
, rhs_code
))
5300 bool res
= stmts
.add (def_stmt
);
5301 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
5308 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
5309 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
5310 pattern sequence. */
5313 adjust_bool_pattern_cast (vec_info
*vinfo
,
5314 tree type
, tree var
, stmt_vec_info stmt_info
)
5316 gimple
*cast_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
5318 append_pattern_def_seq (vinfo
, stmt_info
, cast_stmt
,
5319 get_vectype_for_scalar_type (vinfo
, type
));
5320 return gimple_assign_lhs (cast_stmt
);
5323 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
5324 VAR is an SSA_NAME that should be transformed from bool to a wider integer
5325 type, OUT_TYPE is the desired final integer type of the whole pattern.
5326 STMT_INFO is the info of the pattern root and is where pattern stmts should
5327 be associated with. DEFS is a map of pattern defs. */
5330 adjust_bool_pattern (vec_info
*vinfo
, tree var
, tree out_type
,
5331 stmt_vec_info stmt_info
, hash_map
<tree
, tree
> &defs
)
5333 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
5334 enum tree_code rhs_code
, def_rhs_code
;
5335 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
5337 gimple
*pattern_stmt
, *def_stmt
;
5338 tree trueval
= NULL_TREE
;
5340 rhs1
= gimple_assign_rhs1 (stmt
);
5341 rhs2
= gimple_assign_rhs2 (stmt
);
5342 rhs_code
= gimple_assign_rhs_code (stmt
);
5343 loc
= gimple_location (stmt
);
5348 irhs1
= *defs
.get (rhs1
);
5349 itype
= TREE_TYPE (irhs1
);
5351 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
5356 irhs1
= *defs
.get (rhs1
);
5357 itype
= TREE_TYPE (irhs1
);
5359 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
5360 BIT_XOR_EXPR
, irhs1
, build_int_cst (itype
, 1));
5364 /* Try to optimize x = y & (a < b ? 1 : 0); into
5365 x = (a < b ? y : 0);
5371 S1 a_b = x1 CMP1 y1;
5372 S2 b_b = x2 CMP2 y2;
5374 S4 d_T = (TYPE) c_b;
5376 we would normally emit:
5378 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5379 S2' b_T = x2 CMP2 y2 ? 1 : 0;
5380 S3' c_T = a_T & b_T;
5383 but we can save one stmt by using the
5384 result of one of the COND_EXPRs in the other COND_EXPR and leave
5385 BIT_AND_EXPR stmt out:
5387 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5388 S3' c_T = x2 CMP2 y2 ? a_T : 0;
5391 At least when VEC_COND_EXPR is implemented using masks
5392 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
5393 computes the comparison masks and ands it, in one case with
5394 all ones vector, in the other case with a vector register.
5395 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
5396 often more expensive. */
5397 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
5398 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
5399 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
5401 irhs1
= *defs
.get (rhs1
);
5402 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
5403 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
5404 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1
))))
5406 rhs_code
= def_rhs_code
;
5408 rhs2
= gimple_assign_rhs2 (def_stmt
);
5413 irhs2
= *defs
.get (rhs2
);
5416 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
5417 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
5418 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
5420 irhs2
= *defs
.get (rhs2
);
5421 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
5422 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
5423 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1
))))
5425 rhs_code
= def_rhs_code
;
5427 rhs2
= gimple_assign_rhs2 (def_stmt
);
5432 irhs1
= *defs
.get (rhs1
);
5438 irhs1
= *defs
.get (rhs1
);
5439 irhs2
= *defs
.get (rhs2
);
5441 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
5442 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
5444 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
5445 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
5446 int out_prec
= TYPE_PRECISION (out_type
);
5447 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
5448 irhs2
= adjust_bool_pattern_cast (vinfo
, TREE_TYPE (irhs1
), irhs2
,
5450 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
5451 irhs1
= adjust_bool_pattern_cast (vinfo
, TREE_TYPE (irhs2
), irhs1
,
5455 irhs1
= adjust_bool_pattern_cast (vinfo
,
5456 out_type
, irhs1
, stmt_info
);
5457 irhs2
= adjust_bool_pattern_cast (vinfo
,
5458 out_type
, irhs2
, stmt_info
);
5461 itype
= TREE_TYPE (irhs1
);
5463 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
5464 rhs_code
, irhs1
, irhs2
);
5469 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
5470 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
5471 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
))
5472 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1
)),
5473 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1
)))))
5475 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
5477 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
5480 itype
= TREE_TYPE (rhs1
);
5481 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
5482 if (trueval
== NULL_TREE
)
5483 trueval
= build_int_cst (itype
, 1);
5485 gcc_checking_assert (useless_type_conversion_p (itype
,
5486 TREE_TYPE (trueval
)));
5488 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
5489 COND_EXPR
, cond_expr
, trueval
,
5490 build_int_cst (itype
, 0));
5494 gimple_set_location (pattern_stmt
, loc
);
5495 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
,
5496 get_vectype_for_scalar_type (vinfo
, itype
));
5497 defs
.put (var
, gimple_assign_lhs (pattern_stmt
));
5500 /* Comparison function to qsort a vector of gimple stmts after UID. */
5503 sort_after_uid (const void *p1
, const void *p2
)
5505 const gimple
*stmt1
= *(const gimple
* const *)p1
;
5506 const gimple
*stmt2
= *(const gimple
* const *)p2
;
5507 return gimple_uid (stmt1
) - gimple_uid (stmt2
);
5510 /* Create pattern stmts for all stmts participating in the bool pattern
5511 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
5512 OUT_TYPE. Return the def of the pattern root. */
5515 adjust_bool_stmts (vec_info
*vinfo
, hash_set
<gimple
*> &bool_stmt_set
,
5516 tree out_type
, stmt_vec_info stmt_info
)
5518 /* Gather original stmts in the bool pattern in their order of appearance
5520 auto_vec
<gimple
*> bool_stmts (bool_stmt_set
.elements ());
5521 for (hash_set
<gimple
*>::iterator i
= bool_stmt_set
.begin ();
5522 i
!= bool_stmt_set
.end (); ++i
)
5523 bool_stmts
.quick_push (*i
);
5524 bool_stmts
.qsort (sort_after_uid
);
5526 /* Now process them in that order, producing pattern stmts. */
5527 hash_map
<tree
, tree
> defs
;
5528 for (unsigned i
= 0; i
< bool_stmts
.length (); ++i
)
5529 adjust_bool_pattern (vinfo
, gimple_assign_lhs (bool_stmts
[i
]),
5530 out_type
, stmt_info
, defs
);
5532 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
5533 gimple
*pattern_stmt
5534 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
5535 return gimple_assign_lhs (pattern_stmt
);
5538 /* Return the proper type for converting bool VAR into
5539 an integer value or NULL_TREE if no such type exists.
5540 The type is chosen so that the converted value has the
5541 same number of elements as VAR's vector type. */
5544 integer_type_for_mask (tree var
, vec_info
*vinfo
)
5546 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var
)))
5549 stmt_vec_info def_stmt_info
= vect_get_internal_def (vinfo
, var
);
5550 if (!def_stmt_info
|| !vect_use_mask_type_p (def_stmt_info
))
5553 return build_nonstandard_integer_type (def_stmt_info
->mask_precision
, 1);
5556 /* Function vect_recog_bool_pattern
5558 Try to find pattern like following:
5560 bool a_b, b_b, c_b, d_b, e_b;
5563 S1 a_b = x1 CMP1 y1;
5564 S2 b_b = x2 CMP2 y2;
5566 S4 d_b = x3 CMP3 y3;
5568 S6 f_T = (TYPE) e_b;
5570 where type 'TYPE' is an integral type. Or a similar pattern
5573 S6 f_Y = e_b ? r_Y : s_Y;
5575 as results from if-conversion of a complex condition.
5579 * STMT_VINFO: The stmt at the end from which the pattern
5580 search begins, i.e. cast of a bool to
5585 * TYPE_OUT: The type of the output of this pattern.
5587 * Return value: A new stmt that will be used to replace the pattern.
5589 Assuming size of TYPE is the same as size of all comparisons
5590 (otherwise some casts would be added where needed), the above
5591 sequence we create related pattern stmts:
5592 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5593 S3' c_T = x2 CMP2 y2 ? a_T : 0;
5594 S4' d_T = x3 CMP3 y3 ? 1 : 0;
5595 S5' e_T = c_T | d_T;
5598 Instead of the above S3' we could emit:
5599 S2' b_T = x2 CMP2 y2 ? 1 : 0;
5600 S3' c_T = a_T | b_T;
5601 but the above is more efficient. */
5604 vect_recog_bool_pattern (vec_info
*vinfo
,
5605 stmt_vec_info stmt_vinfo
, tree
*type_out
)
5607 gimple
*last_stmt
= stmt_vinfo
->stmt
;
5608 enum tree_code rhs_code
;
5609 tree var
, lhs
, rhs
, vectype
;
5610 gimple
*pattern_stmt
;
5612 if (!is_gimple_assign (last_stmt
))
5615 var
= gimple_assign_rhs1 (last_stmt
);
5616 lhs
= gimple_assign_lhs (last_stmt
);
5617 rhs_code
= gimple_assign_rhs_code (last_stmt
);
5619 if (rhs_code
== VIEW_CONVERT_EXPR
)
5620 var
= TREE_OPERAND (var
, 0);
5622 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var
)))
5625 hash_set
<gimple
*> bool_stmts
;
5627 if (CONVERT_EXPR_CODE_P (rhs_code
)
5628 || rhs_code
== VIEW_CONVERT_EXPR
)
5630 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
5631 || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs
)))
5633 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
5635 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
5637 rhs
= adjust_bool_stmts (vinfo
, bool_stmts
,
5638 TREE_TYPE (lhs
), stmt_vinfo
);
5639 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
5640 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
5641 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
5644 = gimple_build_assign (lhs
, NOP_EXPR
, rhs
);
5648 tree type
= integer_type_for_mask (var
, vinfo
);
5649 tree cst0
, cst1
, tmp
;
5654 /* We may directly use cond with narrowed type to avoid
5655 multiple cond exprs with following result packing and
5656 perform single cond with packed mask instead. In case
5657 of widening we better make cond first and then extract
5659 if (TYPE_MODE (type
) == TYPE_MODE (TREE_TYPE (lhs
)))
5660 type
= TREE_TYPE (lhs
);
5662 cst0
= build_int_cst (type
, 0);
5663 cst1
= build_int_cst (type
, 1);
5664 tmp
= vect_recog_temp_ssa_var (type
, NULL
);
5665 pattern_stmt
= gimple_build_assign (tmp
, COND_EXPR
, var
, cst1
, cst0
);
5667 if (!useless_type_conversion_p (type
, TREE_TYPE (lhs
)))
5669 tree new_vectype
= get_vectype_for_scalar_type (vinfo
, type
);
5670 append_pattern_def_seq (vinfo
, stmt_vinfo
,
5671 pattern_stmt
, new_vectype
);
5673 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
5674 pattern_stmt
= gimple_build_assign (lhs
, CONVERT_EXPR
, tmp
);
5678 *type_out
= vectype
;
5679 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
5681 return pattern_stmt
;
5683 else if (rhs_code
== COND_EXPR
5684 && TREE_CODE (var
) == SSA_NAME
)
5686 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
5687 if (vectype
== NULL_TREE
)
5690 /* Build a scalar type for the boolean result that when
5691 vectorized matches the vector type of the result in
5692 size and number of elements. */
5694 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype
)),
5695 TYPE_VECTOR_SUBPARTS (vectype
));
5698 = build_nonstandard_integer_type (prec
,
5699 TYPE_UNSIGNED (TREE_TYPE (var
)));
5700 if (get_vectype_for_scalar_type (vinfo
, type
) == NULL_TREE
)
5703 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
5704 var
= adjust_bool_stmts (vinfo
, bool_stmts
, type
, stmt_vinfo
);
5705 else if (integer_type_for_mask (var
, vinfo
))
5708 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
5710 = gimple_build_assign (lhs
, COND_EXPR
,
5711 build2 (NE_EXPR
, boolean_type_node
,
5712 var
, build_int_cst (TREE_TYPE (var
), 0)),
5713 gimple_assign_rhs2 (last_stmt
),
5714 gimple_assign_rhs3 (last_stmt
));
5715 *type_out
= vectype
;
5716 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
5718 return pattern_stmt
;
5720 else if (rhs_code
== SSA_NAME
5721 && STMT_VINFO_DATA_REF (stmt_vinfo
))
5723 stmt_vec_info pattern_stmt_info
;
5724 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
5725 if (!vectype
|| !VECTOR_MODE_P (TYPE_MODE (vectype
)))
5728 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
5729 rhs
= adjust_bool_stmts (vinfo
, bool_stmts
,
5730 TREE_TYPE (vectype
), stmt_vinfo
);
5733 tree type
= integer_type_for_mask (var
, vinfo
);
5734 tree cst0
, cst1
, new_vectype
;
5739 if (TYPE_MODE (type
) == TYPE_MODE (TREE_TYPE (vectype
)))
5740 type
= TREE_TYPE (vectype
);
5742 cst0
= build_int_cst (type
, 0);
5743 cst1
= build_int_cst (type
, 1);
5744 new_vectype
= get_vectype_for_scalar_type (vinfo
, type
);
5746 rhs
= vect_recog_temp_ssa_var (type
, NULL
);
5747 pattern_stmt
= gimple_build_assign (rhs
, COND_EXPR
, var
, cst1
, cst0
);
5748 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, new_vectype
);
5751 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
5752 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
5754 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
5755 gimple
*cast_stmt
= gimple_build_assign (rhs2
, NOP_EXPR
, rhs
);
5756 append_pattern_def_seq (vinfo
, stmt_vinfo
, cast_stmt
);
5759 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
5760 pattern_stmt_info
= vinfo
->add_stmt (pattern_stmt
);
5761 vinfo
->move_dr (pattern_stmt_info
, stmt_vinfo
);
5762 *type_out
= vectype
;
5763 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
5765 return pattern_stmt
;
5772 /* A helper for vect_recog_mask_conversion_pattern. Build
5773 conversion of MASK to a type suitable for masking VECTYPE.
5774 Built statement gets required vectype and is appended to
5775 a pattern sequence of STMT_VINFO.
5777 Return converted mask. */
5780 build_mask_conversion (vec_info
*vinfo
,
5781 tree mask
, tree vectype
, stmt_vec_info stmt_vinfo
)
5786 masktype
= truth_type_for (vectype
);
5787 tmp
= vect_recog_temp_ssa_var (TREE_TYPE (masktype
), NULL
);
5788 stmt
= gimple_build_assign (tmp
, CONVERT_EXPR
, mask
);
5789 append_pattern_def_seq (vinfo
, stmt_vinfo
,
5790 stmt
, masktype
, TREE_TYPE (vectype
));
5796 /* Function vect_recog_mask_conversion_pattern
5798 Try to find statements which require boolean type
5799 converison. Additional conversion statements are
5800 added to handle such cases. For example:
5810 S4 c_1 = m_3 ? c_2 : c_3;
5812 Will be transformed into:
5816 S3'' m_2' = (_Bool[bitsize=32])m_2
5817 S3' m_3' = m_1 & m_2';
5818 S4'' m_3'' = (_Bool[bitsize=8])m_3'
5819 S4' c_1' = m_3'' ? c_2 : c_3; */
5822 vect_recog_mask_conversion_pattern (vec_info
*vinfo
,
5823 stmt_vec_info stmt_vinfo
, tree
*type_out
)
5825 gimple
*last_stmt
= stmt_vinfo
->stmt
;
5826 enum tree_code rhs_code
;
5827 tree lhs
= NULL_TREE
, rhs1
, rhs2
, tmp
, rhs1_type
, rhs2_type
;
5828 tree vectype1
, vectype2
;
5829 stmt_vec_info pattern_stmt_info
;
5830 tree rhs1_op0
= NULL_TREE
, rhs1_op1
= NULL_TREE
;
5831 tree rhs1_op0_type
= NULL_TREE
, rhs1_op1_type
= NULL_TREE
;
5833 /* Check for MASK_LOAD and MASK_STORE as well as COND_OP calls requiring mask
5835 if (is_gimple_call (last_stmt
)
5836 && gimple_call_internal_p (last_stmt
))
5838 gcall
*pattern_stmt
;
5840 internal_fn ifn
= gimple_call_internal_fn (last_stmt
);
5841 int mask_argno
= internal_fn_mask_index (ifn
);
5845 bool store_p
= internal_store_fn_p (ifn
);
5846 bool load_p
= internal_store_fn_p (ifn
);
5849 int rhs_index
= internal_fn_stored_value_index (ifn
);
5850 tree rhs
= gimple_call_arg (last_stmt
, rhs_index
);
5851 vectype1
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs
));
5855 lhs
= gimple_call_lhs (last_stmt
);
5858 vectype1
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
5864 tree mask_arg
= gimple_call_arg (last_stmt
, mask_argno
);
5865 tree mask_arg_type
= integer_type_for_mask (mask_arg
, vinfo
);
5868 vectype2
= get_mask_type_for_scalar_type (vinfo
, mask_arg_type
);
5871 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1
),
5872 TYPE_VECTOR_SUBPARTS (vectype2
)))
5875 else if (store_p
|| load_p
)
5878 tmp
= build_mask_conversion (vinfo
, mask_arg
, vectype1
, stmt_vinfo
);
5880 auto_vec
<tree
, 8> args
;
5881 unsigned int nargs
= gimple_call_num_args (last_stmt
);
5882 args
.safe_grow (nargs
, true);
5883 for (unsigned int i
= 0; i
< nargs
; ++i
)
5884 args
[i
] = ((int) i
== mask_argno
5886 : gimple_call_arg (last_stmt
, i
));
5887 pattern_stmt
= gimple_build_call_internal_vec (ifn
, args
);
5891 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
5892 gimple_call_set_lhs (pattern_stmt
, lhs
);
5895 if (load_p
|| store_p
)
5896 gimple_call_set_nothrow (pattern_stmt
, true);
5898 pattern_stmt_info
= vinfo
->add_stmt (pattern_stmt
);
5899 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
5900 vinfo
->move_dr (pattern_stmt_info
, stmt_vinfo
);
5902 *type_out
= vectype1
;
5903 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
5905 return pattern_stmt
;
5908 if (!is_gimple_assign (last_stmt
))
5911 gimple
*pattern_stmt
;
5912 lhs
= gimple_assign_lhs (last_stmt
);
5913 rhs1
= gimple_assign_rhs1 (last_stmt
);
5914 rhs_code
= gimple_assign_rhs_code (last_stmt
);
5916 /* Check for cond expression requiring mask conversion. */
5917 if (rhs_code
== COND_EXPR
)
5919 vectype1
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
5921 if (TREE_CODE (rhs1
) == SSA_NAME
)
5923 rhs1_type
= integer_type_for_mask (rhs1
, vinfo
);
5927 else if (COMPARISON_CLASS_P (rhs1
))
5929 /* Check whether we're comparing scalar booleans and (if so)
5930 whether a better mask type exists than the mask associated
5931 with boolean-sized elements. This avoids unnecessary packs
5932 and unpacks if the booleans are set from comparisons of
5933 wider types. E.g. in:
5935 int x1, x2, x3, x4, y1, y1;
5937 bool b1 = (x1 == x2);
5938 bool b2 = (x3 == x4);
5939 ... = b1 == b2 ? y1 : y2;
5941 it is better for b1 and b2 to use the mask type associated
5942 with int elements rather bool (byte) elements. */
5943 rhs1_op0
= TREE_OPERAND (rhs1
, 0);
5944 rhs1_op1
= TREE_OPERAND (rhs1
, 1);
5945 if (!rhs1_op0
|| !rhs1_op1
)
5947 rhs1_op0_type
= integer_type_for_mask (rhs1_op0
, vinfo
);
5948 rhs1_op1_type
= integer_type_for_mask (rhs1_op1
, vinfo
);
5951 rhs1_type
= TREE_TYPE (rhs1_op0
);
5952 else if (!rhs1_op1_type
)
5953 rhs1_type
= TREE_TYPE (rhs1_op1
);
5954 else if (TYPE_PRECISION (rhs1_op0_type
)
5955 != TYPE_PRECISION (rhs1_op1_type
))
5957 int tmp0
= (int) TYPE_PRECISION (rhs1_op0_type
)
5958 - (int) TYPE_PRECISION (TREE_TYPE (lhs
));
5959 int tmp1
= (int) TYPE_PRECISION (rhs1_op1_type
)
5960 - (int) TYPE_PRECISION (TREE_TYPE (lhs
));
5961 if ((tmp0
> 0 && tmp1
> 0) || (tmp0
< 0 && tmp1
< 0))
5963 if (abs (tmp0
) > abs (tmp1
))
5964 rhs1_type
= rhs1_op1_type
;
5966 rhs1_type
= rhs1_op0_type
;
5969 rhs1_type
= build_nonstandard_integer_type
5970 (TYPE_PRECISION (TREE_TYPE (lhs
)), 1);
5973 rhs1_type
= rhs1_op0_type
;
5978 vectype2
= get_mask_type_for_scalar_type (vinfo
, rhs1_type
);
5980 if (!vectype1
|| !vectype2
)
5983 /* Continue if a conversion is needed. Also continue if we have
5984 a comparison whose vector type would normally be different from
5985 VECTYPE2 when considered in isolation. In that case we'll
5986 replace the comparison with an SSA name (so that we can record
5987 its vector type) and behave as though the comparison was an SSA
5988 name from the outset. */
5989 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1
),
5990 TYPE_VECTOR_SUBPARTS (vectype2
))
5995 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
5996 in place, we can handle it in vectorizable_condition. This avoids
5997 unnecessary promotion stmts and increased vectorization factor. */
5998 if (COMPARISON_CLASS_P (rhs1
)
5999 && INTEGRAL_TYPE_P (rhs1_type
)
6000 && known_le (TYPE_VECTOR_SUBPARTS (vectype1
),
6001 TYPE_VECTOR_SUBPARTS (vectype2
)))
6003 enum vect_def_type dt
;
6004 if (vect_is_simple_use (TREE_OPERAND (rhs1
, 0), vinfo
, &dt
)
6005 && dt
== vect_external_def
6006 && vect_is_simple_use (TREE_OPERAND (rhs1
, 1), vinfo
, &dt
)
6007 && (dt
== vect_external_def
6008 || dt
== vect_constant_def
))
6010 tree wide_scalar_type
= build_nonstandard_integer_type
6011 (vector_element_bits (vectype1
), TYPE_UNSIGNED (rhs1_type
));
6012 tree vectype3
= get_vectype_for_scalar_type (vinfo
,
6014 if (expand_vec_cond_expr_p (vectype1
, vectype3
, TREE_CODE (rhs1
)))
6019 /* If rhs1 is a comparison we need to move it into a
6020 separate statement. */
6021 if (TREE_CODE (rhs1
) != SSA_NAME
)
6023 tmp
= vect_recog_temp_ssa_var (TREE_TYPE (rhs1
), NULL
);
6025 && TYPE_PRECISION (rhs1_op0_type
) != TYPE_PRECISION (rhs1_type
))
6026 rhs1_op0
= build_mask_conversion (vinfo
, rhs1_op0
,
6027 vectype2
, stmt_vinfo
);
6029 && TYPE_PRECISION (rhs1_op1_type
) != TYPE_PRECISION (rhs1_type
))
6030 rhs1_op1
= build_mask_conversion (vinfo
, rhs1_op1
,
6031 vectype2
, stmt_vinfo
);
6032 pattern_stmt
= gimple_build_assign (tmp
, TREE_CODE (rhs1
),
6033 rhs1_op0
, rhs1_op1
);
6035 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vectype2
,
6039 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
6040 TYPE_VECTOR_SUBPARTS (vectype2
)))
6041 tmp
= build_mask_conversion (vinfo
, rhs1
, vectype1
, stmt_vinfo
);
6045 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
6046 pattern_stmt
= gimple_build_assign (lhs
, COND_EXPR
, tmp
,
6047 gimple_assign_rhs2 (last_stmt
),
6048 gimple_assign_rhs3 (last_stmt
));
6050 *type_out
= vectype1
;
6051 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
6053 return pattern_stmt
;
6056 /* Now check for binary boolean operations requiring conversion for
6058 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs
)))
6061 if (rhs_code
!= BIT_IOR_EXPR
6062 && rhs_code
!= BIT_XOR_EXPR
6063 && rhs_code
!= BIT_AND_EXPR
6064 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
)
6067 rhs2
= gimple_assign_rhs2 (last_stmt
);
6069 rhs1_type
= integer_type_for_mask (rhs1
, vinfo
);
6070 rhs2_type
= integer_type_for_mask (rhs2
, vinfo
);
6072 if (!rhs1_type
|| !rhs2_type
6073 || TYPE_PRECISION (rhs1_type
) == TYPE_PRECISION (rhs2_type
))
6076 if (TYPE_PRECISION (rhs1_type
) < TYPE_PRECISION (rhs2_type
))
6078 vectype1
= get_mask_type_for_scalar_type (vinfo
, rhs1_type
);
6081 rhs2
= build_mask_conversion (vinfo
, rhs2
, vectype1
, stmt_vinfo
);
6085 vectype1
= get_mask_type_for_scalar_type (vinfo
, rhs2_type
);
6088 rhs1
= build_mask_conversion (vinfo
, rhs1
, vectype1
, stmt_vinfo
);
6091 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
6092 pattern_stmt
= gimple_build_assign (lhs
, rhs_code
, rhs1
, rhs2
);
6094 *type_out
= vectype1
;
6095 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
6097 return pattern_stmt
;
6100 /* STMT_INFO is a load or store. If the load or store is conditional, return
6101 the boolean condition under which it occurs, otherwise return null. */
6104 vect_get_load_store_mask (stmt_vec_info stmt_info
)
6106 if (gassign
*def_assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
6108 gcc_assert (gimple_assign_single_p (def_assign
));
6112 if (gcall
*def_call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
6114 internal_fn ifn
= gimple_call_internal_fn (def_call
);
6115 int mask_index
= internal_fn_mask_index (ifn
);
6116 return gimple_call_arg (def_call
, mask_index
);
6122 /* Return MASK if MASK is suitable for masking an operation on vectors
6123 of type VECTYPE, otherwise convert it into such a form and return
6124 the result. Associate any conversion statements with STMT_INFO's
6128 vect_convert_mask_for_vectype (tree mask
, tree vectype
,
6129 stmt_vec_info stmt_info
, vec_info
*vinfo
)
6131 tree mask_type
= integer_type_for_mask (mask
, vinfo
);
6134 tree mask_vectype
= get_mask_type_for_scalar_type (vinfo
, mask_type
);
6136 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
),
6137 TYPE_VECTOR_SUBPARTS (mask_vectype
)))
6138 mask
= build_mask_conversion (vinfo
, mask
, vectype
, stmt_info
);
6143 /* Return the equivalent of:
6145 fold_convert (TYPE, VALUE)
6147 with the expectation that the operation will be vectorized.
6148 If new statements are needed, add them as pattern statements
6152 vect_add_conversion_to_pattern (vec_info
*vinfo
,
6153 tree type
, tree value
, stmt_vec_info stmt_info
)
6155 if (useless_type_conversion_p (type
, TREE_TYPE (value
)))
6158 tree new_value
= vect_recog_temp_ssa_var (type
, NULL
);
6159 gassign
*conversion
= gimple_build_assign (new_value
, CONVERT_EXPR
, value
);
6160 append_pattern_def_seq (vinfo
, stmt_info
, conversion
,
6161 get_vectype_for_scalar_type (vinfo
, type
));
6165 /* Try to convert STMT_INFO into a call to a gather load or scatter store
6166 internal function. Return the final statement on success and set
6167 *TYPE_OUT to the vector type being loaded or stored.
6169 This function only handles gathers and scatters that were recognized
6170 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
6173 vect_recog_gather_scatter_pattern (vec_info
*vinfo
,
6174 stmt_vec_info stmt_info
, tree
*type_out
)
6176 /* Currently we only support this for loop vectorization. */
6177 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6181 /* Make sure that we're looking at a gather load or scatter store. */
6182 data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
6183 if (!dr
|| !STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
6186 /* Get the boolean that controls whether the load or store happens.
6187 This is null if the operation is unconditional. */
6188 tree mask
= vect_get_load_store_mask (stmt_info
);
6190 /* Make sure that the target supports an appropriate internal
6191 function for the gather/scatter operation. */
6192 gather_scatter_info gs_info
;
6193 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, &gs_info
)
6194 || gs_info
.ifn
== IFN_LAST
)
6197 /* Convert the mask to the right form. */
6198 tree gs_vectype
= get_vectype_for_scalar_type (loop_vinfo
,
6199 gs_info
.element_type
);
6201 mask
= vect_convert_mask_for_vectype (mask
, gs_vectype
, stmt_info
,
6203 else if (gs_info
.ifn
== IFN_MASK_SCATTER_STORE
6204 || gs_info
.ifn
== IFN_MASK_GATHER_LOAD
6205 || gs_info
.ifn
== IFN_MASK_LEN_SCATTER_STORE
6206 || gs_info
.ifn
== IFN_MASK_LEN_GATHER_LOAD
)
6207 mask
= build_int_cst (TREE_TYPE (truth_type_for (gs_vectype
)), -1);
6209 /* Get the invariant base and non-invariant offset, converting the
6210 latter to the same width as the vector elements. */
6211 tree base
= gs_info
.base
;
6212 tree offset_type
= TREE_TYPE (gs_info
.offset_vectype
);
6213 tree offset
= vect_add_conversion_to_pattern (vinfo
, offset_type
,
6214 gs_info
.offset
, stmt_info
);
6216 /* Build the new pattern statement. */
6217 tree scale
= size_int (gs_info
.scale
);
6218 gcall
*pattern_stmt
;
6219 if (DR_IS_READ (dr
))
6221 tree zero
= build_zero_cst (gs_info
.element_type
);
6223 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 5, base
,
6224 offset
, scale
, zero
, mask
);
6226 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 4, base
,
6227 offset
, scale
, zero
);
6228 tree load_lhs
= vect_recog_temp_ssa_var (gs_info
.element_type
, NULL
);
6229 gimple_call_set_lhs (pattern_stmt
, load_lhs
);
6233 tree rhs
= vect_get_store_rhs (stmt_info
);
6235 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 5,
6236 base
, offset
, scale
, rhs
,
6239 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 4,
6240 base
, offset
, scale
, rhs
);
6242 gimple_call_set_nothrow (pattern_stmt
, true);
6244 /* Copy across relevant vectorization info and associate DR with the
6245 new pattern statement instead of the original statement. */
6246 stmt_vec_info pattern_stmt_info
= loop_vinfo
->add_stmt (pattern_stmt
);
6247 loop_vinfo
->move_dr (pattern_stmt_info
, stmt_info
);
6249 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6250 *type_out
= vectype
;
6251 vect_pattern_detected ("gather/scatter pattern", stmt_info
->stmt
);
6253 return pattern_stmt
;
6256 /* Return true if TYPE is a non-boolean integer type. These are the types
6257 that we want to consider for narrowing. */
6260 vect_narrowable_type_p (tree type
)
6262 return INTEGRAL_TYPE_P (type
) && !VECT_SCALAR_BOOLEAN_TYPE_P (type
);
6265 /* Return true if the operation given by CODE can be truncated to N bits
6266 when only N bits of the output are needed. This is only true if bit N+1
6267 of the inputs has no effect on the low N bits of the result. */
6270 vect_truncatable_operation_p (tree_code code
)
6288 /* Record that STMT_INFO could be changed from operating on TYPE to
6289 operating on a type with the precision and sign given by PRECISION
6290 and SIGN respectively. PRECISION is an arbitrary bit precision;
6291 it might not be a whole number of bytes. */
6294 vect_set_operation_type (stmt_vec_info stmt_info
, tree type
,
6295 unsigned int precision
, signop sign
)
6297 /* Round the precision up to a whole number of bytes. */
6298 precision
= vect_element_precision (precision
);
6299 if (precision
< TYPE_PRECISION (type
)
6300 && (!stmt_info
->operation_precision
6301 || stmt_info
->operation_precision
> precision
))
6303 stmt_info
->operation_precision
= precision
;
6304 stmt_info
->operation_sign
= sign
;
6308 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
6309 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
6310 is an arbitrary bit precision; it might not be a whole number of bytes. */
6313 vect_set_min_input_precision (stmt_vec_info stmt_info
, tree type
,
6314 unsigned int min_input_precision
)
6316 /* This operation in isolation only requires the inputs to have
6317 MIN_INPUT_PRECISION of precision, However, that doesn't mean
6318 that MIN_INPUT_PRECISION is a natural precision for the chain
6319 as a whole. E.g. consider something like:
6321 unsigned short *x, *y;
6322 *y = ((*x & 0xf0) >> 4) | (*y << 4);
6324 The right shift can be done on unsigned chars, and only requires the
6325 result of "*x & 0xf0" to be done on unsigned chars. But taking that
6326 approach would mean turning a natural chain of single-vector unsigned
6327 short operations into one that truncates "*x" and then extends
6328 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
6329 operation and one vector for each unsigned char operation.
6330 This would be a significant pessimization.
6332 Instead only propagate the maximum of this precision and the precision
6333 required by the users of the result. This means that we don't pessimize
6334 the case above but continue to optimize things like:
6338 *y = ((*x & 0xf0) >> 4) | (*y << 4);
6340 Here we would truncate two vectors of *x to a single vector of
6341 unsigned chars and use single-vector unsigned char operations for
6342 everything else, rather than doing two unsigned short copies of
6343 "(*x & 0xf0) >> 4" and then truncating the result. */
6344 min_input_precision
= MAX (min_input_precision
,
6345 stmt_info
->min_output_precision
);
6347 if (min_input_precision
< TYPE_PRECISION (type
)
6348 && (!stmt_info
->min_input_precision
6349 || stmt_info
->min_input_precision
> min_input_precision
))
6350 stmt_info
->min_input_precision
= min_input_precision
;
6353 /* Subroutine of vect_determine_min_output_precision. Return true if
6354 we can calculate a reduced number of output bits for STMT_INFO,
6355 whose result is LHS. */
6358 vect_determine_min_output_precision_1 (vec_info
*vinfo
,
6359 stmt_vec_info stmt_info
, tree lhs
)
6361 /* Take the maximum precision required by users of the result. */
6362 unsigned int precision
= 0;
6363 imm_use_iterator iter
;
6365 FOR_EACH_IMM_USE_FAST (use
, iter
, lhs
)
6367 gimple
*use_stmt
= USE_STMT (use
);
6368 if (is_gimple_debug (use_stmt
))
6370 stmt_vec_info use_stmt_info
= vinfo
->lookup_stmt (use_stmt
);
6371 if (!use_stmt_info
|| !use_stmt_info
->min_input_precision
)
6373 /* The input precision recorded for COND_EXPRs applies only to the
6374 "then" and "else" values. */
6375 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
6377 && gimple_assign_rhs_code (assign
) == COND_EXPR
6378 && use
->use
!= gimple_assign_rhs2_ptr (assign
)
6379 && use
->use
!= gimple_assign_rhs3_ptr (assign
))
6381 precision
= MAX (precision
, use_stmt_info
->min_input_precision
);
6384 if (dump_enabled_p ())
6385 dump_printf_loc (MSG_NOTE
, vect_location
,
6386 "only the low %d bits of %T are significant\n",
6388 stmt_info
->min_output_precision
= precision
;
6392 /* Calculate min_output_precision for STMT_INFO. */
6395 vect_determine_min_output_precision (vec_info
*vinfo
, stmt_vec_info stmt_info
)
6397 /* We're only interested in statements with a narrowable result. */
6398 tree lhs
= gimple_get_lhs (stmt_info
->stmt
);
6400 || TREE_CODE (lhs
) != SSA_NAME
6401 || !vect_narrowable_type_p (TREE_TYPE (lhs
)))
6404 if (!vect_determine_min_output_precision_1 (vinfo
, stmt_info
, lhs
))
6405 stmt_info
->min_output_precision
= TYPE_PRECISION (TREE_TYPE (lhs
));
6408 /* Use range information to decide whether STMT (described by STMT_INFO)
6409 could be done in a narrower type. This is effectively a forward
6410 propagation, since it uses context-independent information that applies
6411 to all users of an SSA name. */
6414 vect_determine_precisions_from_range (stmt_vec_info stmt_info
, gassign
*stmt
)
6416 tree lhs
= gimple_assign_lhs (stmt
);
6417 if (!lhs
|| TREE_CODE (lhs
) != SSA_NAME
)
6420 tree type
= TREE_TYPE (lhs
);
6421 if (!vect_narrowable_type_p (type
))
6424 /* First see whether we have any useful range information for the result. */
6425 unsigned int precision
= TYPE_PRECISION (type
);
6426 signop sign
= TYPE_SIGN (type
);
6427 wide_int min_value
, max_value
;
6428 if (!vect_get_range_info (lhs
, &min_value
, &max_value
))
6431 tree_code code
= gimple_assign_rhs_code (stmt
);
6432 unsigned int nops
= gimple_num_ops (stmt
);
6434 if (!vect_truncatable_operation_p (code
))
6435 /* Check that all relevant input operands are compatible, and update
6436 [MIN_VALUE, MAX_VALUE] to include their ranges. */
6437 for (unsigned int i
= 1; i
< nops
; ++i
)
6439 tree op
= gimple_op (stmt
, i
);
6440 if (TREE_CODE (op
) == INTEGER_CST
)
6442 /* Don't require the integer to have RHS_TYPE (which it might
6443 not for things like shift amounts, etc.), but do require it
6445 if (!int_fits_type_p (op
, type
))
6448 min_value
= wi::min (min_value
, wi::to_wide (op
, precision
), sign
);
6449 max_value
= wi::max (max_value
, wi::to_wide (op
, precision
), sign
);
6451 else if (TREE_CODE (op
) == SSA_NAME
)
6453 /* Ignore codes that don't take uniform arguments. */
6454 if (!types_compatible_p (TREE_TYPE (op
), type
))
6457 wide_int op_min_value
, op_max_value
;
6458 if (!vect_get_range_info (op
, &op_min_value
, &op_max_value
))
6461 min_value
= wi::min (min_value
, op_min_value
, sign
);
6462 max_value
= wi::max (max_value
, op_max_value
, sign
);
6468 /* Try to switch signed types for unsigned types if we can.
6469 This is better for two reasons. First, unsigned ops tend
6470 to be cheaper than signed ops. Second, it means that we can
6474 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
6479 unsigned short res_1 = (unsigned short) c & 0xff00;
6480 int res = (int) res_1;
6482 where the intermediate result res_1 has unsigned rather than
6484 if (sign
== SIGNED
&& !wi::neg_p (min_value
))
6487 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
6488 unsigned int precision1
= wi::min_precision (min_value
, sign
);
6489 unsigned int precision2
= wi::min_precision (max_value
, sign
);
6490 unsigned int value_precision
= MAX (precision1
, precision2
);
6491 if (value_precision
>= precision
)
6494 if (dump_enabled_p ())
6495 dump_printf_loc (MSG_NOTE
, vect_location
, "can narrow to %s:%d"
6496 " without loss of precision: %G",
6497 sign
== SIGNED
? "signed" : "unsigned",
6498 value_precision
, (gimple
*) stmt
);
6500 vect_set_operation_type (stmt_info
, type
, value_precision
, sign
);
6501 vect_set_min_input_precision (stmt_info
, type
, value_precision
);
6504 /* Use information about the users of STMT's result to decide whether
6505 STMT (described by STMT_INFO) could be done in a narrower type.
6506 This is effectively a backward propagation. */
6509 vect_determine_precisions_from_users (stmt_vec_info stmt_info
, gassign
*stmt
)
6511 tree_code code
= gimple_assign_rhs_code (stmt
);
6512 unsigned int opno
= (code
== COND_EXPR
? 2 : 1);
6513 tree type
= TREE_TYPE (gimple_op (stmt
, opno
));
6514 if (!vect_narrowable_type_p (type
))
6517 unsigned int precision
= TYPE_PRECISION (type
);
6518 unsigned int operation_precision
, min_input_precision
;
6522 /* Only the bits that contribute to the output matter. Don't change
6523 the precision of the operation itself. */
6524 operation_precision
= precision
;
6525 min_input_precision
= stmt_info
->min_output_precision
;
6531 tree shift
= gimple_assign_rhs2 (stmt
);
6532 if (TREE_CODE (shift
) != INTEGER_CST
6533 || !wi::ltu_p (wi::to_widest (shift
), precision
))
6535 unsigned int const_shift
= TREE_INT_CST_LOW (shift
);
6536 if (code
== LSHIFT_EXPR
)
6538 /* Avoid creating an undefined shift.
6540 ??? We could instead use min_output_precision as-is and
6541 optimize out-of-range shifts to zero. However, only
6542 degenerate testcases shift away all their useful input data,
6543 and it isn't natural to drop input operations in the middle
6544 of vectorization. This sort of thing should really be
6545 handled before vectorization. */
6546 operation_precision
= MAX (stmt_info
->min_output_precision
,
6548 /* We need CONST_SHIFT fewer bits of the input. */
6549 min_input_precision
= (MAX (operation_precision
, const_shift
)
6554 /* We need CONST_SHIFT extra bits to do the operation. */
6555 operation_precision
= (stmt_info
->min_output_precision
6557 min_input_precision
= operation_precision
;
6563 if (vect_truncatable_operation_p (code
))
6565 /* Input bit N has no effect on output bits N-1 and lower. */
6566 operation_precision
= stmt_info
->min_output_precision
;
6567 min_input_precision
= operation_precision
;
6573 if (operation_precision
< precision
)
6575 if (dump_enabled_p ())
6576 dump_printf_loc (MSG_NOTE
, vect_location
, "can narrow to %s:%d"
6577 " without affecting users: %G",
6578 TYPE_UNSIGNED (type
) ? "unsigned" : "signed",
6579 operation_precision
, (gimple
*) stmt
);
6580 vect_set_operation_type (stmt_info
, type
, operation_precision
,
6583 vect_set_min_input_precision (stmt_info
, type
, min_input_precision
);
6586 /* Return true if the statement described by STMT_INFO sets a boolean
6587 SSA_NAME and if we know how to vectorize this kind of statement using
6588 vector mask types. */
6591 possible_vector_mask_operation_p (stmt_vec_info stmt_info
)
6593 tree lhs
= gimple_get_lhs (stmt_info
->stmt
);
6595 || TREE_CODE (lhs
) != SSA_NAME
6596 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs
)))
6599 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
6601 tree_code rhs_code
= gimple_assign_rhs_code (assign
);
6613 return TREE_CODE_CLASS (rhs_code
) == tcc_comparison
;
6616 else if (is_a
<gphi
*> (stmt_info
->stmt
))
6621 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
6622 a vector mask type instead of a normal vector type. Record the
6623 result in STMT_INFO->mask_precision. */
6626 vect_determine_mask_precision (vec_info
*vinfo
, stmt_vec_info stmt_info
)
6628 if (!possible_vector_mask_operation_p (stmt_info
))
6631 /* If at least one boolean input uses a vector mask type,
6632 pick the mask type with the narrowest elements.
6634 ??? This is the traditional behavior. It should always produce
6635 the smallest number of operations, but isn't necessarily the
6636 optimal choice. For example, if we have:
6642 - the user of a wants it to have a mask type for 16-bit elements (M16)
6644 - c uses a mask type for 8-bit elements (M8)
6646 then picking M8 gives:
6648 - 1 M16->M8 pack for b
6650 - 2 M8->M16 unpacks for the user of a
6652 whereas picking M16 would have given:
6654 - 2 M8->M16 unpacks for c
6657 The number of operations are equal, but M16 would have given
6658 a shorter dependency chain and allowed more ILP. */
6659 unsigned int precision
= ~0U;
6660 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
6662 unsigned int nops
= gimple_num_ops (assign
);
6663 for (unsigned int i
= 1; i
< nops
; ++i
)
6665 tree rhs
= gimple_op (assign
, i
);
6666 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs
)))
6669 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (rhs
);
6671 /* Don't let external or constant operands influence the choice.
6672 We can convert them to whichever vector type we pick. */
6675 if (def_stmt_info
->mask_precision
)
6677 if (precision
> def_stmt_info
->mask_precision
)
6678 precision
= def_stmt_info
->mask_precision
;
6682 /* If the statement compares two values that shouldn't use vector masks,
6683 try comparing the values as normal scalars instead. */
6684 tree_code rhs_code
= gimple_assign_rhs_code (assign
);
6685 if (precision
== ~0U
6686 && TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
6688 tree rhs1_type
= TREE_TYPE (gimple_assign_rhs1 (assign
));
6690 tree vectype
, mask_type
;
6691 if (is_a
<scalar_mode
> (TYPE_MODE (rhs1_type
), &mode
)
6692 && (vectype
= get_vectype_for_scalar_type (vinfo
, rhs1_type
))
6693 && (mask_type
= get_mask_type_for_scalar_type (vinfo
, rhs1_type
))
6694 && expand_vec_cmp_expr_p (vectype
, mask_type
, rhs_code
))
6695 precision
= GET_MODE_BITSIZE (mode
);
6700 gphi
*phi
= as_a
<gphi
*> (stmt_info
->stmt
);
6701 for (unsigned i
= 0; i
< gimple_phi_num_args (phi
); ++i
)
6703 tree rhs
= gimple_phi_arg_def (phi
, i
);
6705 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (rhs
);
6707 /* Don't let external or constant operands influence the choice.
6708 We can convert them to whichever vector type we pick. */
6711 if (def_stmt_info
->mask_precision
)
6713 if (precision
> def_stmt_info
->mask_precision
)
6714 precision
= def_stmt_info
->mask_precision
;
6719 if (dump_enabled_p ())
6721 if (precision
== ~0U)
6722 dump_printf_loc (MSG_NOTE
, vect_location
,
6723 "using normal nonmask vectors for %G",
6726 dump_printf_loc (MSG_NOTE
, vect_location
,
6727 "using boolean precision %d for %G",
6728 precision
, stmt_info
->stmt
);
6731 stmt_info
->mask_precision
= precision
;
6734 /* Handle vect_determine_precisions for STMT_INFO, given that we
6735 have already done so for the users of its result. */
6738 vect_determine_stmt_precisions (vec_info
*vinfo
, stmt_vec_info stmt_info
)
6740 vect_determine_min_output_precision (vinfo
, stmt_info
);
6741 if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
6743 vect_determine_precisions_from_range (stmt_info
, stmt
);
6744 vect_determine_precisions_from_users (stmt_info
, stmt
);
6748 /* Walk backwards through the vectorizable region to determine the
6749 values of these fields:
6751 - min_output_precision
6752 - min_input_precision
6753 - operation_precision
6754 - operation_sign. */
6757 vect_determine_precisions (vec_info
*vinfo
)
6759 DUMP_VECT_SCOPE ("vect_determine_precisions");
6761 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
6763 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6764 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
6765 unsigned int nbbs
= loop
->num_nodes
;
6767 for (unsigned int i
= 0; i
< nbbs
; i
++)
6769 basic_block bb
= bbs
[i
];
6770 for (auto gsi
= gsi_start_phis (bb
);
6771 !gsi_end_p (gsi
); gsi_next (&gsi
))
6773 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi
.phi ());
6775 vect_determine_mask_precision (vinfo
, stmt_info
);
6777 for (auto si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
6778 if (!is_gimple_debug (gsi_stmt (si
)))
6779 vect_determine_mask_precision
6780 (vinfo
, vinfo
->lookup_stmt (gsi_stmt (si
)));
6782 for (unsigned int i
= 0; i
< nbbs
; i
++)
6784 basic_block bb
= bbs
[nbbs
- i
- 1];
6785 for (gimple_stmt_iterator si
= gsi_last_bb (bb
);
6786 !gsi_end_p (si
); gsi_prev (&si
))
6787 if (!is_gimple_debug (gsi_stmt (si
)))
6788 vect_determine_stmt_precisions
6789 (vinfo
, vinfo
->lookup_stmt (gsi_stmt (si
)));
6790 for (auto gsi
= gsi_start_phis (bb
);
6791 !gsi_end_p (gsi
); gsi_next (&gsi
))
6793 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi
.phi ());
6795 vect_determine_stmt_precisions (vinfo
, stmt_info
);
6801 bb_vec_info bb_vinfo
= as_a
<bb_vec_info
> (vinfo
);
6802 for (unsigned i
= 0; i
< bb_vinfo
->bbs
.length (); ++i
)
6804 basic_block bb
= bb_vinfo
->bbs
[i
];
6805 for (auto gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
6807 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi
.phi ());
6808 if (stmt_info
&& STMT_VINFO_VECTORIZABLE (stmt_info
))
6809 vect_determine_mask_precision (vinfo
, stmt_info
);
6811 for (auto gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
6813 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi_stmt (gsi
));
6814 if (stmt_info
&& STMT_VINFO_VECTORIZABLE (stmt_info
))
6815 vect_determine_mask_precision (vinfo
, stmt_info
);
6818 for (int i
= bb_vinfo
->bbs
.length () - 1; i
!= -1; --i
)
6820 for (gimple_stmt_iterator gsi
= gsi_last_bb (bb_vinfo
->bbs
[i
]);
6821 !gsi_end_p (gsi
); gsi_prev (&gsi
))
6823 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi_stmt (gsi
));
6824 if (stmt_info
&& STMT_VINFO_VECTORIZABLE (stmt_info
))
6825 vect_determine_stmt_precisions (vinfo
, stmt_info
);
6827 for (auto gsi
= gsi_start_phis (bb_vinfo
->bbs
[i
]);
6828 !gsi_end_p (gsi
); gsi_next (&gsi
))
6830 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi
.phi ());
6831 if (stmt_info
&& STMT_VINFO_VECTORIZABLE (stmt_info
))
6832 vect_determine_stmt_precisions (vinfo
, stmt_info
);
6838 typedef gimple
*(*vect_recog_func_ptr
) (vec_info
*, stmt_vec_info
, tree
*);
6840 struct vect_recog_func
6842 vect_recog_func_ptr fn
;
6846 /* Note that ordering matters - the first pattern matching on a stmt is
6847 taken which means usually the more complex one needs to preceed the
6848 less comples onex (widen_sum only after dot_prod or sad for example). */
6849 static vect_recog_func vect_vect_recog_func_ptrs
[] = {
6850 { vect_recog_bitfield_ref_pattern
, "bitfield_ref" },
6851 { vect_recog_bit_insert_pattern
, "bit_insert" },
6852 { vect_recog_abd_pattern
, "abd" },
6853 { vect_recog_over_widening_pattern
, "over_widening" },
6854 /* Must come after over_widening, which narrows the shift as much as
6855 possible beforehand. */
6856 { vect_recog_average_pattern
, "average" },
6857 { vect_recog_cond_expr_convert_pattern
, "cond_expr_convert" },
6858 { vect_recog_mulhs_pattern
, "mult_high" },
6859 { vect_recog_cast_forwprop_pattern
, "cast_forwprop" },
6860 { vect_recog_widen_mult_pattern
, "widen_mult" },
6861 { vect_recog_dot_prod_pattern
, "dot_prod" },
6862 { vect_recog_sad_pattern
, "sad" },
6863 { vect_recog_widen_sum_pattern
, "widen_sum" },
6864 { vect_recog_pow_pattern
, "pow" },
6865 { vect_recog_popcount_clz_ctz_ffs_pattern
, "popcount_clz_ctz_ffs" },
6866 { vect_recog_ctz_ffs_pattern
, "ctz_ffs" },
6867 { vect_recog_widen_shift_pattern
, "widen_shift" },
6868 { vect_recog_rotate_pattern
, "rotate" },
6869 { vect_recog_vector_vector_shift_pattern
, "vector_vector_shift" },
6870 { vect_recog_divmod_pattern
, "divmod" },
6871 { vect_recog_mult_pattern
, "mult" },
6872 { vect_recog_mixed_size_cond_pattern
, "mixed_size_cond" },
6873 { vect_recog_bool_pattern
, "bool" },
6874 /* This must come before mask conversion, and includes the parts
6875 of mask conversion that are needed for gather and scatter
6876 internal functions. */
6877 { vect_recog_gather_scatter_pattern
, "gather_scatter" },
6878 { vect_recog_mask_conversion_pattern
, "mask_conversion" },
6879 { vect_recog_widen_plus_pattern
, "widen_plus" },
6880 { vect_recog_widen_minus_pattern
, "widen_minus" },
6881 { vect_recog_widen_abd_pattern
, "widen_abd" },
6882 /* These must come after the double widening ones. */
6885 const unsigned int NUM_PATTERNS
= ARRAY_SIZE (vect_vect_recog_func_ptrs
);
6887 /* Mark statements that are involved in a pattern. */
6890 vect_mark_pattern_stmts (vec_info
*vinfo
,
6891 stmt_vec_info orig_stmt_info
, gimple
*pattern_stmt
,
6892 tree pattern_vectype
)
6894 stmt_vec_info orig_stmt_info_saved
= orig_stmt_info
;
6895 gimple
*def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
6897 gimple
*orig_pattern_stmt
= NULL
;
6898 if (is_pattern_stmt_p (orig_stmt_info
))
6900 /* We're replacing a statement in an existing pattern definition
6902 orig_pattern_stmt
= orig_stmt_info
->stmt
;
6903 if (dump_enabled_p ())
6904 dump_printf_loc (MSG_NOTE
, vect_location
,
6905 "replacing earlier pattern %G", orig_pattern_stmt
);
6907 /* To keep the book-keeping simple, just swap the lhs of the
6908 old and new statements, so that the old one has a valid but
6910 tree old_lhs
= gimple_get_lhs (orig_pattern_stmt
);
6911 gimple_set_lhs (orig_pattern_stmt
, gimple_get_lhs (pattern_stmt
));
6912 gimple_set_lhs (pattern_stmt
, old_lhs
);
6914 if (dump_enabled_p ())
6915 dump_printf_loc (MSG_NOTE
, vect_location
, "with %G", pattern_stmt
);
6917 /* Switch to the statement that ORIG replaces. */
6918 orig_stmt_info
= STMT_VINFO_RELATED_STMT (orig_stmt_info
);
6920 /* We shouldn't be replacing the main pattern statement. */
6921 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info
)->stmt
6922 != orig_pattern_stmt
);
6926 for (gimple_stmt_iterator si
= gsi_start (def_seq
);
6927 !gsi_end_p (si
); gsi_next (&si
))
6929 if (dump_enabled_p ())
6930 dump_printf_loc (MSG_NOTE
, vect_location
,
6931 "extra pattern stmt: %G", gsi_stmt (si
));
6932 stmt_vec_info pattern_stmt_info
6933 = vect_init_pattern_stmt (vinfo
, gsi_stmt (si
),
6934 orig_stmt_info
, pattern_vectype
);
6935 /* Stmts in the def sequence are not vectorizable cycle or
6936 induction defs, instead they should all be vect_internal_def
6937 feeding the main pattern stmt which retains this def type. */
6938 STMT_VINFO_DEF_TYPE (pattern_stmt_info
) = vect_internal_def
;
6941 if (orig_pattern_stmt
)
6943 vect_init_pattern_stmt (vinfo
, pattern_stmt
,
6944 orig_stmt_info
, pattern_vectype
);
6946 /* Insert all the new pattern statements before the original one. */
6947 gimple_seq
*orig_def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
6948 gimple_stmt_iterator gsi
= gsi_for_stmt (orig_pattern_stmt
,
6950 gsi_insert_seq_before_without_update (&gsi
, def_seq
, GSI_SAME_STMT
);
6951 gsi_insert_before_without_update (&gsi
, pattern_stmt
, GSI_SAME_STMT
);
6953 /* Remove the pattern statement that this new pattern replaces. */
6954 gsi_remove (&gsi
, false);
6957 vect_set_pattern_stmt (vinfo
,
6958 pattern_stmt
, orig_stmt_info
, pattern_vectype
);
6960 /* Transfer reduction path info to the pattern. */
6961 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved
) != -1)
6964 if (!gimple_extract_op (orig_stmt_info_saved
->stmt
, &op
))
6966 tree lookfor
= op
.ops
[STMT_VINFO_REDUC_IDX (orig_stmt_info
)];
6967 /* Search the pattern def sequence and the main pattern stmt. Note
6968 we may have inserted all into a containing pattern def sequence
6969 so the following is a bit awkward. */
6970 gimple_stmt_iterator si
;
6974 si
= gsi_start (def_seq
);
6986 if (gimple_extract_op (s
, &op
))
6987 for (unsigned i
= 0; i
< op
.num_ops
; ++i
)
6988 if (op
.ops
[i
] == lookfor
)
6990 STMT_VINFO_REDUC_IDX (vinfo
->lookup_stmt (s
)) = i
;
6991 lookfor
= gimple_get_lhs (s
);
6995 if (s
== pattern_stmt
)
6997 if (!found
&& dump_enabled_p ())
6998 dump_printf_loc (MSG_NOTE
, vect_location
,
6999 "failed to update reduction index.\n");
7007 if (s
== pattern_stmt
)
7008 /* Found the end inside a bigger pattern def seq. */
7017 /* Function vect_pattern_recog_1
7020 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
7021 computation pattern.
7022 STMT_INFO: A stmt from which the pattern search should start.
7024 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
7025 a sequence of statements that has the same functionality and can be
7026 used to replace STMT_INFO. It returns the last statement in the sequence
7027 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
7028 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
7029 statement, having first checked that the target supports the new operation
7032 This function also does some bookkeeping, as explained in the documentation
7033 for vect_recog_pattern. */
7036 vect_pattern_recog_1 (vec_info
*vinfo
,
7037 vect_recog_func
*recog_func
, stmt_vec_info stmt_info
)
7039 gimple
*pattern_stmt
;
7040 loop_vec_info loop_vinfo
;
7041 tree pattern_vectype
;
7043 /* If this statement has already been replaced with pattern statements,
7044 leave the original statement alone, since the first match wins.
7045 Instead try to match against the definition statements that feed
7046 the main pattern statement. */
7047 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
7049 gimple_stmt_iterator gsi
;
7050 for (gsi
= gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
7051 !gsi_end_p (gsi
); gsi_next (&gsi
))
7052 vect_pattern_recog_1 (vinfo
, recog_func
,
7053 vinfo
->lookup_stmt (gsi_stmt (gsi
)));
7057 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
7058 pattern_stmt
= recog_func
->fn (vinfo
, stmt_info
, &pattern_vectype
);
7061 /* Clear any half-formed pattern definition sequence. */
7062 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
7066 loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7068 /* Found a vectorizable pattern. */
7069 if (dump_enabled_p ())
7070 dump_printf_loc (MSG_NOTE
, vect_location
,
7071 "%s pattern recognized: %G",
7072 recog_func
->name
, pattern_stmt
);
7074 /* Mark the stmts that are involved in the pattern. */
7075 vect_mark_pattern_stmts (vinfo
, stmt_info
, pattern_stmt
, pattern_vectype
);
7077 /* Patterns cannot be vectorized using SLP, because they change the order of
7082 stmt_vec_info
*elem_ptr
;
7083 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo
), ix
, ix2
,
7084 elem_ptr
, *elem_ptr
== stmt_info
);
7089 /* Function vect_pattern_recog
7092 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
7095 Output - for each computation idiom that is detected we create a new stmt
7096 that provides the same functionality and that can be vectorized. We
7097 also record some information in the struct_stmt_info of the relevant
7098 stmts, as explained below:
7100 At the entry to this function we have the following stmts, with the
7101 following initial value in the STMT_VINFO fields:
7103 stmt in_pattern_p related_stmt vec_stmt
7104 S1: a_i = .... - - -
7105 S2: a_2 = ..use(a_i).. - - -
7106 S3: a_1 = ..use(a_2).. - - -
7107 S4: a_0 = ..use(a_1).. - - -
7108 S5: ... = ..use(a_0).. - - -
7110 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
7111 represented by a single stmt. We then:
7112 - create a new stmt S6 equivalent to the pattern (the stmt is not
7113 inserted into the code)
7114 - fill in the STMT_VINFO fields as follows:
7116 in_pattern_p related_stmt vec_stmt
7117 S1: a_i = .... - - -
7118 S2: a_2 = ..use(a_i).. - - -
7119 S3: a_1 = ..use(a_2).. - - -
7120 S4: a_0 = ..use(a_1).. true S6 -
7121 '---> S6: a_new = .... - S4 -
7122 S5: ... = ..use(a_0).. - - -
7124 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
7125 to each other through the RELATED_STMT field).
7127 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
7128 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
7129 remain irrelevant unless used by stmts other than S4.
7131 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
7132 (because they are marked as irrelevant). It will vectorize S6, and record
7133 a pointer to the new vector stmt VS6 from S6 (as usual).
7134 S4 will be skipped, and S5 will be vectorized as usual:
7136 in_pattern_p related_stmt vec_stmt
7137 S1: a_i = .... - - -
7138 S2: a_2 = ..use(a_i).. - - -
7139 S3: a_1 = ..use(a_2).. - - -
7140 > VS6: va_new = .... - - -
7141 S4: a_0 = ..use(a_1).. true S6 VS6
7142 '---> S6: a_new = .... - S4 VS6
7143 > VS5: ... = ..vuse(va_new).. - - -
7144 S5: ... = ..use(a_0).. - - -
7146 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
7147 elsewhere), and we'll end up with:
7150 VS5: ... = ..vuse(va_new)..
7152 In case of more than one pattern statements, e.g., widen-mult with
7156 S2 a_T = (TYPE) a_t;
7157 '--> S3: a_it = (interm_type) a_t;
7158 S4 prod_T = a_T * CONST;
7159 '--> S5: prod_T' = a_it w* CONST;
7161 there may be other users of a_T outside the pattern. In that case S2 will
7162 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
7163 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
7164 be recorded in S3. */
7167 vect_pattern_recog (vec_info
*vinfo
)
7172 gimple_stmt_iterator si
;
7175 vect_determine_precisions (vinfo
);
7177 DUMP_VECT_SCOPE ("vect_pattern_recog");
7179 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
7181 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7182 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
7183 nbbs
= loop
->num_nodes
;
7185 /* Scan through the loop stmts, applying the pattern recognition
7186 functions starting at each stmt visited: */
7187 for (i
= 0; i
< nbbs
; i
++)
7189 basic_block bb
= bbs
[i
];
7190 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
7192 if (is_gimple_debug (gsi_stmt (si
)))
7194 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi_stmt (si
));
7195 /* Scan over all generic vect_recog_xxx_pattern functions. */
7196 for (j
= 0; j
< NUM_PATTERNS
; j
++)
7197 vect_pattern_recog_1 (vinfo
, &vect_vect_recog_func_ptrs
[j
],
7204 bb_vec_info bb_vinfo
= as_a
<bb_vec_info
> (vinfo
);
7205 for (unsigned i
= 0; i
< bb_vinfo
->bbs
.length (); ++i
)
7206 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb_vinfo
->bbs
[i
]);
7207 !gsi_end_p (gsi
); gsi_next (&gsi
))
7209 stmt_vec_info stmt_info
= bb_vinfo
->lookup_stmt (gsi_stmt (gsi
));
7210 if (!stmt_info
|| !STMT_VINFO_VECTORIZABLE (stmt_info
))
7213 /* Scan over all generic vect_recog_xxx_pattern functions. */
7214 for (j
= 0; j
< NUM_PATTERNS
; j
++)
7215 vect_pattern_recog_1 (vinfo
,
7216 &vect_vect_recog_func_ptrs
[j
], stmt_info
);
7220 /* After this no more add_stmt calls are allowed. */
7221 vinfo
->stmt_vec_info_ro
= true;
7224 /* Build a GIMPLE_ASSIGN or GIMPLE_CALL with the tree_code,
7225 or internal_fn contained in ch, respectively. */
7227 vect_gimple_build (tree lhs
, code_helper ch
, tree op0
, tree op1
)
7229 gcc_assert (op0
!= NULL_TREE
);
7230 if (ch
.is_tree_code ())
7231 return gimple_build_assign (lhs
, (tree_code
) ch
, op0
, op1
);
7233 gcc_assert (ch
.is_internal_fn ());
7234 gimple
* stmt
= gimple_build_call_internal (as_internal_fn ((combined_fn
) ch
),
7235 op1
== NULL_TREE
? 1 : 2,
7237 gimple_call_set_lhs (stmt
, lhs
);