1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2023 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
28 #include "gimple-iterator.h"
29 #include "gimple-fold.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "fold-const.h"
36 #include "stor-layout.h"
39 #include "gimple-iterator.h"
40 #include "gimple-fold.h"
41 #include "gimplify-me.h"
43 #include "tree-vectorizer.h"
46 #include "internal-fn.h"
47 #include "case-cfn-macros.h"
48 #include "fold-const-call.h"
51 #include "omp-simd-clone.h"
53 #include "tree-vector-builder.h"
54 #include "vec-perm-indices.h"
55 #include "gimple-range.h"
58 /* TODO: Note the vectorizer still builds COND_EXPRs with GENERIC compares
59 in the first operand. Disentangling this is future work, the
60 IL is properly transfered to VEC_COND_EXPRs with separate compares. */
63 /* Return true if we have a useful VR_RANGE range for VAR, storing it
64 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
67 vect_get_range_info (tree var
, wide_int
*min_value
, wide_int
*max_value
)
71 get_range_query (cfun
)->range_of_expr (vr
, var
);
72 if (vr
.undefined_p ())
73 vr
.set_varying (TREE_TYPE (var
));
74 value_range_kind vr_type
= get_legacy_range (vr
, vr_min
, vr_max
);
75 *min_value
= wi::to_wide (vr_min
);
76 *max_value
= wi::to_wide (vr_max
);
77 wide_int nonzero
= get_nonzero_bits (var
);
78 signop sgn
= TYPE_SIGN (TREE_TYPE (var
));
79 if (intersect_range_with_nonzero_bits (vr_type
, min_value
, max_value
,
80 nonzero
, sgn
) == VR_RANGE
)
82 if (dump_enabled_p ())
84 dump_generic_expr_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, var
);
85 dump_printf (MSG_NOTE
, " has range [");
86 dump_hex (MSG_NOTE
, *min_value
);
87 dump_printf (MSG_NOTE
, ", ");
88 dump_hex (MSG_NOTE
, *max_value
);
89 dump_printf (MSG_NOTE
, "]\n");
95 if (dump_enabled_p ())
97 dump_generic_expr_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, var
);
98 dump_printf (MSG_NOTE
, " has no range info\n");
104 /* Report that we've found an instance of pattern PATTERN in
108 vect_pattern_detected (const char *name
, gimple
*stmt
)
110 if (dump_enabled_p ())
111 dump_printf_loc (MSG_NOTE
, vect_location
, "%s: detected: %G", name
, stmt
);
114 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
115 return the pattern statement's stmt_vec_info. Set its vector type to
116 VECTYPE if it doesn't have one already. */
119 vect_init_pattern_stmt (vec_info
*vinfo
, gimple
*pattern_stmt
,
120 stmt_vec_info orig_stmt_info
, tree vectype
)
122 stmt_vec_info pattern_stmt_info
= vinfo
->lookup_stmt (pattern_stmt
);
123 if (pattern_stmt_info
== NULL
)
124 pattern_stmt_info
= vinfo
->add_stmt (pattern_stmt
);
125 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt_info
->stmt
));
127 pattern_stmt_info
->pattern_stmt_p
= true;
128 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt_info
;
129 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
130 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
131 STMT_VINFO_TYPE (pattern_stmt_info
) = STMT_VINFO_TYPE (orig_stmt_info
);
132 if (!STMT_VINFO_VECTYPE (pattern_stmt_info
))
135 || (VECTOR_BOOLEAN_TYPE_P (vectype
)
136 == vect_use_mask_type_p (orig_stmt_info
)));
137 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vectype
;
138 pattern_stmt_info
->mask_precision
= orig_stmt_info
->mask_precision
;
140 return pattern_stmt_info
;
143 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
144 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
148 vect_set_pattern_stmt (vec_info
*vinfo
, gimple
*pattern_stmt
,
149 stmt_vec_info orig_stmt_info
, tree vectype
)
151 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
152 STMT_VINFO_RELATED_STMT (orig_stmt_info
)
153 = vect_init_pattern_stmt (vinfo
, pattern_stmt
, orig_stmt_info
, vectype
);
156 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
157 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
158 be different from the vector type of the final pattern statement.
159 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
160 from which it was derived. */
163 append_pattern_def_seq (vec_info
*vinfo
,
164 stmt_vec_info stmt_info
, gimple
*new_stmt
,
165 tree vectype
= NULL_TREE
,
166 tree scalar_type_for_mask
= NULL_TREE
)
168 gcc_assert (!scalar_type_for_mask
169 == (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
)));
172 stmt_vec_info new_stmt_info
= vinfo
->add_stmt (new_stmt
);
173 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
174 if (scalar_type_for_mask
)
175 new_stmt_info
->mask_precision
176 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask
));
178 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
182 /* The caller wants to perform new operations on vect_external variable
183 VAR, so that the result of the operations would also be vect_external.
184 Return the edge on which the operations can be performed, if one exists.
185 Return null if the operations should instead be treated as part of
186 the pattern that needs them. */
189 vect_get_external_def_edge (vec_info
*vinfo
, tree var
)
192 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
194 e
= loop_preheader_edge (loop_vinfo
->loop
);
195 if (!SSA_NAME_IS_DEFAULT_DEF (var
))
197 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
199 || !dominated_by_p (CDI_DOMINATORS
, e
->dest
, bb
))
206 /* Return true if the target supports a vector version of CODE,
207 where CODE is known to map to a direct optab with the given SUBTYPE.
208 ITYPE specifies the type of (some of) the scalar inputs and OTYPE
209 specifies the type of the scalar result.
211 If CODE allows the inputs and outputs to have different type
212 (such as for WIDEN_SUM_EXPR), it is the input mode rather
213 than the output mode that determines the appropriate target pattern.
214 Operand 0 of the target pattern then specifies the mode that the output
217 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
218 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
222 vect_supportable_direct_optab_p (vec_info
*vinfo
, tree otype
, tree_code code
,
223 tree itype
, tree
*vecotype_out
,
224 tree
*vecitype_out
= NULL
,
225 enum optab_subtype subtype
= optab_default
)
227 tree vecitype
= get_vectype_for_scalar_type (vinfo
, itype
);
231 tree vecotype
= get_vectype_for_scalar_type (vinfo
, otype
);
235 optab optab
= optab_for_tree_code (code
, vecitype
, subtype
);
239 insn_code icode
= optab_handler (optab
, TYPE_MODE (vecitype
));
240 if (icode
== CODE_FOR_nothing
241 || insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (vecotype
))
244 *vecotype_out
= vecotype
;
246 *vecitype_out
= vecitype
;
250 /* Round bit precision PRECISION up to a full element. */
253 vect_element_precision (unsigned int precision
)
255 precision
= 1 << ceil_log2 (precision
);
256 return MAX (precision
, BITS_PER_UNIT
);
259 /* If OP is defined by a statement that's being considered for vectorization,
260 return information about that statement, otherwise return NULL. */
263 vect_get_internal_def (vec_info
*vinfo
, tree op
)
265 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (op
);
267 && STMT_VINFO_DEF_TYPE (def_stmt_info
) == vect_internal_def
)
268 return def_stmt_info
;
272 /* Check whether NAME, an ssa-name used in STMT_VINFO,
273 is a result of a type promotion, such that:
274 DEF_STMT: NAME = NOP (name0)
275 If CHECK_SIGN is TRUE, check that either both types are signed or both are
279 type_conversion_p (vec_info
*vinfo
, tree name
, bool check_sign
,
280 tree
*orig_type
, gimple
**def_stmt
, bool *promotion
)
282 tree type
= TREE_TYPE (name
);
284 enum vect_def_type dt
;
286 stmt_vec_info def_stmt_info
;
287 if (!vect_is_simple_use (name
, vinfo
, &dt
, &def_stmt_info
, def_stmt
))
290 if (dt
!= vect_internal_def
291 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
297 if (!is_gimple_assign (*def_stmt
))
300 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt
)))
303 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
305 *orig_type
= TREE_TYPE (oprnd0
);
306 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*orig_type
)
307 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*orig_type
)) && check_sign
))
310 if (TYPE_PRECISION (type
) >= (TYPE_PRECISION (*orig_type
) * 2))
315 if (!vect_is_simple_use (oprnd0
, vinfo
, &dt
))
321 /* Holds information about an input operand after some sign changes
322 and type promotions have been peeled away. */
323 class vect_unpromoted_value
{
325 vect_unpromoted_value ();
327 void set_op (tree
, vect_def_type
, stmt_vec_info
= NULL
);
329 /* The value obtained after peeling away zero or more casts. */
332 /* The type of OP. */
335 /* The definition type of OP. */
338 /* If OP is the result of peeling at least one cast, and if the cast
339 of OP itself is a vectorizable statement, CASTER identifies that
340 statement, otherwise it is null. */
341 stmt_vec_info caster
;
344 inline vect_unpromoted_value::vect_unpromoted_value ()
347 dt (vect_uninitialized_def
),
352 /* Set the operand to OP_IN, its definition type to DT_IN, and the
353 statement that casts it to CASTER_IN. */
356 vect_unpromoted_value::set_op (tree op_in
, vect_def_type dt_in
,
357 stmt_vec_info caster_in
)
360 type
= TREE_TYPE (op
);
365 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
366 to reach some vectorizable inner operand OP', continuing as long as it
367 is possible to convert OP' back to OP using a possible sign change
368 followed by a possible promotion P. Return this OP', or null if OP is
369 not a vectorizable SSA name. If there is a promotion P, describe its
370 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
371 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
372 have more than one user.
374 A successful return means that it is possible to go from OP' to OP
375 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
376 whereas the cast from UNPROM to OP might be a promotion, a sign
381 signed short *ptr = ...;
382 signed short C = *ptr;
383 unsigned short B = (unsigned short) C; // sign change
384 signed int A = (signed int) B; // unsigned promotion
385 ...possible other uses of A...
386 unsigned int OP = (unsigned int) A; // sign change
388 In this case it's possible to go directly from C to OP using:
390 OP = (unsigned int) (unsigned short) C;
391 +------------+ +--------------+
392 promotion sign change
394 so OP' would be C. The input to the promotion is B, so UNPROM
398 vect_look_through_possible_promotion (vec_info
*vinfo
, tree op
,
399 vect_unpromoted_value
*unprom
,
400 bool *single_use_p
= NULL
)
402 tree op_type
= TREE_TYPE (op
);
403 if (!INTEGRAL_TYPE_P (op_type
))
406 tree res
= NULL_TREE
;
407 unsigned int orig_precision
= TYPE_PRECISION (op_type
);
408 unsigned int min_precision
= orig_precision
;
409 stmt_vec_info caster
= NULL
;
410 while (TREE_CODE (op
) == SSA_NAME
&& INTEGRAL_TYPE_P (op_type
))
412 /* See whether OP is simple enough to vectorize. */
413 stmt_vec_info def_stmt_info
;
416 if (!vect_is_simple_use (op
, vinfo
, &dt
, &def_stmt_info
, &def_stmt
))
419 /* If OP is the input of a demotion, skip over it to see whether
420 OP is itself the result of a promotion. If so, the combined
421 effect of the promotion and the demotion might fit the required
422 pattern, otherwise neither operation fits.
424 This copes with cases such as the result of an arithmetic
425 operation being truncated before being stored, and where that
426 arithmetic operation has been recognized as an over-widened one. */
427 if (TYPE_PRECISION (op_type
) <= min_precision
)
429 /* Use OP as the UNPROM described above if we haven't yet
430 found a promotion, or if using the new input preserves the
431 sign of the previous promotion. */
433 || TYPE_PRECISION (unprom
->type
) == orig_precision
434 || TYPE_SIGN (unprom
->type
) == TYPE_SIGN (op_type
))
436 unprom
->set_op (op
, dt
, caster
);
437 min_precision
= TYPE_PRECISION (op_type
);
439 /* Stop if we've already seen a promotion and if this
440 conversion does more than change the sign. */
441 else if (TYPE_PRECISION (op_type
)
442 != TYPE_PRECISION (unprom
->type
))
445 /* The sequence now extends to OP. */
449 /* See whether OP is defined by a cast. Record it as CASTER if
450 the cast is potentially vectorizable. */
453 caster
= def_stmt_info
;
455 /* Ignore pattern statements, since we don't link uses for them. */
458 && !STMT_VINFO_RELATED_STMT (caster
)
459 && !has_single_use (res
))
460 *single_use_p
= false;
462 gassign
*assign
= dyn_cast
<gassign
*> (def_stmt
);
463 if (!assign
|| !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt
)))
466 /* Continue with the input to the cast. */
467 op
= gimple_assign_rhs1 (def_stmt
);
468 op_type
= TREE_TYPE (op
);
473 /* OP is an integer operand to an operation that returns TYPE, and we
474 want to treat the operation as a widening one. So far we can treat
475 it as widening from *COMMON_TYPE.
477 Return true if OP is suitable for such a widening operation,
478 either widening from *COMMON_TYPE or from some supertype of it.
479 Update *COMMON_TYPE to the supertype in the latter case.
481 SHIFT_P is true if OP is a shift amount. */
484 vect_joust_widened_integer (tree type
, bool shift_p
, tree op
,
487 /* Calculate the minimum precision required by OP, without changing
488 the sign of either operand. */
489 unsigned int precision
;
492 if (!wi::leu_p (wi::to_widest (op
), TYPE_PRECISION (type
) / 2))
494 precision
= TREE_INT_CST_LOW (op
);
498 precision
= wi::min_precision (wi::to_widest (op
),
499 TYPE_SIGN (*common_type
));
500 if (precision
* 2 > TYPE_PRECISION (type
))
504 /* If OP requires a wider type, switch to that type. The checks
505 above ensure that this is still narrower than the result. */
506 precision
= vect_element_precision (precision
);
507 if (TYPE_PRECISION (*common_type
) < precision
)
508 *common_type
= build_nonstandard_integer_type
509 (precision
, TYPE_UNSIGNED (*common_type
));
513 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
514 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
517 vect_joust_widened_type (tree type
, tree new_type
, tree
*common_type
)
519 if (types_compatible_p (*common_type
, new_type
))
522 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
523 if ((TYPE_PRECISION (new_type
) < TYPE_PRECISION (*common_type
))
524 && (TYPE_UNSIGNED (new_type
) || !TYPE_UNSIGNED (*common_type
)))
527 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
528 if (TYPE_PRECISION (*common_type
) < TYPE_PRECISION (new_type
)
529 && (TYPE_UNSIGNED (*common_type
) || !TYPE_UNSIGNED (new_type
)))
531 *common_type
= new_type
;
535 /* We have mismatched signs, with the signed type being
536 no wider than the unsigned type. In this case we need
537 a wider signed type. */
538 unsigned int precision
= MAX (TYPE_PRECISION (*common_type
),
539 TYPE_PRECISION (new_type
));
542 if (precision
* 2 > TYPE_PRECISION (type
))
545 *common_type
= build_nonstandard_integer_type (precision
, false);
549 /* Check whether STMT_INFO can be viewed as a tree of integer operations
550 in which each node either performs CODE or WIDENED_CODE, and where
551 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
552 specifies the maximum number of leaf operands. SHIFT_P says whether
553 CODE and WIDENED_CODE are some sort of shift.
555 If STMT_INFO is such a tree, return the number of leaf operands
556 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
557 to a type that (a) is narrower than the result of STMT_INFO and
558 (b) can hold all leaf operand values.
560 If SUBTYPE then allow that the signs of the operands
561 may differ in signs but not in precision. SUBTYPE is updated to reflect
564 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
568 vect_widened_op_tree (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree_code code
,
569 code_helper widened_code
, bool shift_p
,
570 unsigned int max_nops
,
571 vect_unpromoted_value
*unprom
, tree
*common_type
,
572 enum optab_subtype
*subtype
= NULL
)
574 /* Check for an integer operation with the right code. */
575 gimple
* stmt
= stmt_info
->stmt
;
576 if (!(is_gimple_assign (stmt
) || is_gimple_call (stmt
)))
579 code_helper rhs_code
;
580 if (is_gimple_assign (stmt
))
581 rhs_code
= gimple_assign_rhs_code (stmt
);
582 else if (is_gimple_call (stmt
))
583 rhs_code
= gimple_call_combined_fn (stmt
);
588 && rhs_code
!= widened_code
)
591 tree lhs
= gimple_get_lhs (stmt
);
592 tree type
= TREE_TYPE (lhs
);
593 if (!INTEGRAL_TYPE_P (type
))
596 /* Assume that both operands will be leaf operands. */
599 /* Check the operands. */
600 unsigned int next_op
= 0;
601 for (unsigned int i
= 0; i
< 2; ++i
)
603 vect_unpromoted_value
*this_unprom
= &unprom
[next_op
];
604 unsigned int nops
= 1;
605 tree op
= gimple_arg (stmt
, i
);
606 if (i
== 1 && TREE_CODE (op
) == INTEGER_CST
)
608 /* We already have a common type from earlier operands.
609 Update it to account for OP. */
610 this_unprom
->set_op (op
, vect_constant_def
);
611 if (!vect_joust_widened_integer (type
, shift_p
, op
, common_type
))
616 /* Only allow shifts by constants. */
617 if (shift_p
&& i
== 1)
620 if (rhs_code
!= code
)
622 /* If rhs_code is widened_code, don't look through further
623 possible promotions, there is a promotion already embedded
624 in the WIDEN_*_EXPR. */
625 if (TREE_CODE (op
) != SSA_NAME
626 || !INTEGRAL_TYPE_P (TREE_TYPE (op
)))
629 stmt_vec_info def_stmt_info
;
632 if (!vect_is_simple_use (op
, vinfo
, &dt
, &def_stmt_info
,
635 this_unprom
->set_op (op
, dt
, NULL
);
637 else if (!vect_look_through_possible_promotion (vinfo
, op
,
641 if (TYPE_PRECISION (this_unprom
->type
) == TYPE_PRECISION (type
))
643 /* The operand isn't widened. If STMT_INFO has the code
644 for an unwidened operation, recursively check whether
645 this operand is a node of the tree. */
648 || this_unprom
->dt
!= vect_internal_def
)
651 /* Give back the leaf slot allocated above now that we're
652 not treating this as a leaf operand. */
655 /* Recursively process the definition of the operand. */
656 stmt_vec_info def_stmt_info
657 = vinfo
->lookup_def (this_unprom
->op
);
658 nops
= vect_widened_op_tree (vinfo
, def_stmt_info
, code
,
659 widened_code
, shift_p
, max_nops
,
660 this_unprom
, common_type
,
669 /* Make sure that the operand is narrower than the result. */
670 if (TYPE_PRECISION (this_unprom
->type
) * 2
671 > TYPE_PRECISION (type
))
674 /* Update COMMON_TYPE for the new operand. */
676 *common_type
= this_unprom
->type
;
677 else if (!vect_joust_widened_type (type
, this_unprom
->type
,
682 /* See if we can sign extend the smaller type. */
683 if (TYPE_PRECISION (this_unprom
->type
)
684 > TYPE_PRECISION (*common_type
))
685 *common_type
= this_unprom
->type
;
686 *subtype
= optab_vector_mixed_sign
;
698 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
699 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
702 vect_recog_temp_ssa_var (tree type
, gimple
*stmt
= NULL
)
704 return make_temp_ssa_name (type
, stmt
, "patt");
707 /* STMT2_INFO describes a type conversion that could be split into STMT1
708 followed by a version of STMT2_INFO that takes NEW_RHS as its first
709 input. Try to do this using pattern statements, returning true on
713 vect_split_statement (vec_info
*vinfo
, stmt_vec_info stmt2_info
, tree new_rhs
,
714 gimple
*stmt1
, tree vectype
)
716 if (is_pattern_stmt_p (stmt2_info
))
718 /* STMT2_INFO is part of a pattern. Get the statement to which
719 the pattern is attached. */
720 stmt_vec_info orig_stmt2_info
= STMT_VINFO_RELATED_STMT (stmt2_info
);
721 vect_init_pattern_stmt (vinfo
, stmt1
, orig_stmt2_info
, vectype
);
723 if (dump_enabled_p ())
724 dump_printf_loc (MSG_NOTE
, vect_location
,
725 "Splitting pattern statement: %G", stmt2_info
->stmt
);
727 /* Since STMT2_INFO is a pattern statement, we can change it
728 in-situ without worrying about changing the code for the
730 gimple_assign_set_rhs1 (stmt2_info
->stmt
, new_rhs
);
732 if (dump_enabled_p ())
734 dump_printf_loc (MSG_NOTE
, vect_location
, "into: %G", stmt1
);
735 dump_printf_loc (MSG_NOTE
, vect_location
, "and: %G",
739 gimple_seq
*def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info
);
740 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info
) == stmt2_info
)
741 /* STMT2_INFO is the actual pattern statement. Add STMT1
742 to the end of the definition sequence. */
743 gimple_seq_add_stmt_without_update (def_seq
, stmt1
);
746 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
748 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt2_info
->stmt
, def_seq
);
749 gsi_insert_before_without_update (&gsi
, stmt1
, GSI_SAME_STMT
);
755 /* STMT2_INFO doesn't yet have a pattern. Try to create a
756 two-statement pattern now. */
757 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info
));
758 tree lhs_type
= TREE_TYPE (gimple_get_lhs (stmt2_info
->stmt
));
759 tree lhs_vectype
= get_vectype_for_scalar_type (vinfo
, lhs_type
);
763 if (dump_enabled_p ())
764 dump_printf_loc (MSG_NOTE
, vect_location
,
765 "Splitting statement: %G", stmt2_info
->stmt
);
767 /* Add STMT1 as a singleton pattern definition sequence. */
768 gimple_seq
*def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info
);
769 vect_init_pattern_stmt (vinfo
, stmt1
, stmt2_info
, vectype
);
770 gimple_seq_add_stmt_without_update (def_seq
, stmt1
);
772 /* Build the second of the two pattern statements. */
773 tree new_lhs
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
774 gassign
*new_stmt2
= gimple_build_assign (new_lhs
, NOP_EXPR
, new_rhs
);
775 vect_set_pattern_stmt (vinfo
, new_stmt2
, stmt2_info
, lhs_vectype
);
777 if (dump_enabled_p ())
779 dump_printf_loc (MSG_NOTE
, vect_location
,
780 "into pattern statements: %G", stmt1
);
781 dump_printf_loc (MSG_NOTE
, vect_location
, "and: %G",
782 (gimple
*) new_stmt2
);
789 /* Look for the following pattern
795 ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
796 HALF_TYPE and UNPROM will be set should the statement be found to
797 be a widened operation.
798 DIFF_STMT will be set to the MINUS_EXPR
799 statement that precedes the ABS_STMT unless vect_widened_op_tree
803 vect_recog_absolute_difference (vec_info
*vinfo
, gassign
*abs_stmt
,
805 vect_unpromoted_value unprom
[2],
811 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
812 inside the loop (in case we are analyzing an outer-loop). */
813 enum tree_code code
= gimple_assign_rhs_code (abs_stmt
);
814 if (code
!= ABS_EXPR
&& code
!= ABSU_EXPR
)
817 tree abs_oprnd
= gimple_assign_rhs1 (abs_stmt
);
818 tree abs_type
= TREE_TYPE (abs_oprnd
);
821 if (!ANY_INTEGRAL_TYPE_P (abs_type
)
822 || TYPE_OVERFLOW_WRAPS (abs_type
)
823 || TYPE_UNSIGNED (abs_type
))
826 /* Peel off conversions from the ABS input. This can involve sign
827 changes (e.g. from an unsigned subtraction to a signed ABS input)
828 or signed promotion, but it can't include unsigned promotion.
829 (Note that ABS of an unsigned promotion should have been folded
830 away before now anyway.) */
831 vect_unpromoted_value unprom_diff
;
832 abs_oprnd
= vect_look_through_possible_promotion (vinfo
, abs_oprnd
,
836 if (TYPE_PRECISION (unprom_diff
.type
) != TYPE_PRECISION (abs_type
)
837 && TYPE_UNSIGNED (unprom_diff
.type
))
840 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
841 stmt_vec_info diff_stmt_vinfo
= vect_get_internal_def (vinfo
, abs_oprnd
);
842 if (!diff_stmt_vinfo
)
845 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
846 inside the loop (in case we are analyzing an outer-loop). */
847 if (vect_widened_op_tree (vinfo
, diff_stmt_vinfo
,
848 MINUS_EXPR
, IFN_VEC_WIDEN_MINUS
,
849 false, 2, unprom
, half_type
))
852 /* Failed to find a widen operation so we check for a regular MINUS_EXPR. */
853 gassign
*diff
= dyn_cast
<gassign
*> (STMT_VINFO_STMT (diff_stmt_vinfo
));
854 if (diff_stmt
&& diff
855 && gimple_assign_rhs_code (diff
) == MINUS_EXPR
856 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd
)))
859 *half_type
= NULL_TREE
;
866 /* Convert UNPROM to TYPE and return the result, adding new statements
867 to STMT_INFO's pattern definition statements if no better way is
868 available. VECTYPE is the vector form of TYPE.
870 If SUBTYPE then convert the type based on the subtype. */
873 vect_convert_input (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree type
,
874 vect_unpromoted_value
*unprom
, tree vectype
,
875 enum optab_subtype subtype
= optab_default
)
877 /* Update the type if the signs differ. */
878 if (subtype
== optab_vector_mixed_sign
)
880 gcc_assert (!TYPE_UNSIGNED (type
));
881 if (TYPE_UNSIGNED (TREE_TYPE (unprom
->op
)))
883 type
= unsigned_type_for (type
);
884 vectype
= unsigned_type_for (vectype
);
888 /* Check for a no-op conversion. */
889 if (types_compatible_p (type
, TREE_TYPE (unprom
->op
)))
892 /* Allow the caller to create constant vect_unpromoted_values. */
893 if (TREE_CODE (unprom
->op
) == INTEGER_CST
)
894 return wide_int_to_tree (type
, wi::to_widest (unprom
->op
));
896 tree input
= unprom
->op
;
899 tree lhs
= gimple_get_lhs (unprom
->caster
->stmt
);
900 tree lhs_type
= TREE_TYPE (lhs
);
902 /* If the result of the existing cast is the right width, use it
903 instead of the source of the cast. */
904 if (TYPE_PRECISION (lhs_type
) == TYPE_PRECISION (type
))
906 /* If the precision we want is between the source and result
907 precisions of the existing cast, try splitting the cast into
908 two and tapping into a mid-way point. */
909 else if (TYPE_PRECISION (lhs_type
) > TYPE_PRECISION (type
)
910 && TYPE_PRECISION (type
) > TYPE_PRECISION (unprom
->type
))
912 /* In order to preserve the semantics of the original cast,
913 give the mid-way point the same signedness as the input value.
915 It would be possible to use a signed type here instead if
916 TYPE is signed and UNPROM->TYPE is unsigned, but that would
917 make the sign of the midtype sensitive to the order in
918 which we process the statements, since the signedness of
919 TYPE is the signedness required by just one of possibly
920 many users. Also, unsigned promotions are usually as cheap
921 as or cheaper than signed ones, so it's better to keep an
922 unsigned promotion. */
923 tree midtype
= build_nonstandard_integer_type
924 (TYPE_PRECISION (type
), TYPE_UNSIGNED (unprom
->type
));
925 tree vec_midtype
= get_vectype_for_scalar_type (vinfo
, midtype
);
928 input
= vect_recog_temp_ssa_var (midtype
, NULL
);
929 gassign
*new_stmt
= gimple_build_assign (input
, NOP_EXPR
,
931 if (!vect_split_statement (vinfo
, unprom
->caster
, input
, new_stmt
,
933 append_pattern_def_seq (vinfo
, stmt_info
,
934 new_stmt
, vec_midtype
);
938 /* See if we can reuse an existing result. */
939 if (types_compatible_p (type
, TREE_TYPE (input
)))
943 /* We need a new conversion statement. */
944 tree new_op
= vect_recog_temp_ssa_var (type
, NULL
);
945 gassign
*new_stmt
= gimple_build_assign (new_op
, NOP_EXPR
, input
);
947 /* If OP is an external value, see if we can insert the new statement
948 on an incoming edge. */
949 if (input
== unprom
->op
&& unprom
->dt
== vect_external_def
)
950 if (edge e
= vect_get_external_def_edge (vinfo
, input
))
952 basic_block new_bb
= gsi_insert_on_edge_immediate (e
, new_stmt
);
953 gcc_assert (!new_bb
);
957 /* As a (common) last resort, add the statement to the pattern itself. */
958 append_pattern_def_seq (vinfo
, stmt_info
, new_stmt
, vectype
);
962 /* Invoke vect_convert_input for N elements of UNPROM and store the
963 result in the corresponding elements of RESULT.
965 If SUBTYPE then convert the type based on the subtype. */
968 vect_convert_inputs (vec_info
*vinfo
, stmt_vec_info stmt_info
, unsigned int n
,
969 tree
*result
, tree type
, vect_unpromoted_value
*unprom
,
970 tree vectype
, enum optab_subtype subtype
= optab_default
)
972 for (unsigned int i
= 0; i
< n
; ++i
)
975 for (j
= 0; j
< i
; ++j
)
976 if (unprom
[j
].op
== unprom
[i
].op
)
980 result
[i
] = result
[j
];
982 result
[i
] = vect_convert_input (vinfo
, stmt_info
,
983 type
, &unprom
[i
], vectype
, subtype
);
987 /* The caller has created a (possibly empty) sequence of pattern definition
988 statements followed by a single statement PATTERN_STMT. Cast the result
989 of this final statement to TYPE. If a new statement is needed, add
990 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
991 and return the new statement, otherwise return PATTERN_STMT as-is.
992 VECITYPE is the vector form of PATTERN_STMT's result type. */
995 vect_convert_output (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree type
,
996 gimple
*pattern_stmt
, tree vecitype
)
998 tree lhs
= gimple_get_lhs (pattern_stmt
);
999 if (!types_compatible_p (type
, TREE_TYPE (lhs
)))
1001 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
, vecitype
);
1002 tree cast_var
= vect_recog_temp_ssa_var (type
, NULL
);
1003 pattern_stmt
= gimple_build_assign (cast_var
, NOP_EXPR
, lhs
);
1005 return pattern_stmt
;
1008 /* Return true if STMT_VINFO describes a reduction for which reassociation
1009 is allowed. If STMT_INFO is part of a group, assume that it's part of
1010 a reduction chain and optimistically assume that all statements
1011 except the last allow reassociation.
1012 Also require it to have code CODE and to be a reduction
1013 in the outermost loop. When returning true, store the operands in
1014 *OP0_OUT and *OP1_OUT. */
1017 vect_reassociating_reduction_p (vec_info
*vinfo
,
1018 stmt_vec_info stmt_info
, tree_code code
,
1019 tree
*op0_out
, tree
*op1_out
)
1021 loop_vec_info loop_info
= dyn_cast
<loop_vec_info
> (vinfo
);
1025 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
1026 if (!assign
|| gimple_assign_rhs_code (assign
) != code
)
1029 /* We don't allow changing the order of the computation in the inner-loop
1030 when doing outer-loop vectorization. */
1031 class loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
1032 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
))
1035 if (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
)
1037 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign
)),
1041 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info
) == NULL
)
1044 *op0_out
= gimple_assign_rhs1 (assign
);
1045 *op1_out
= gimple_assign_rhs2 (assign
);
1046 if (commutative_tree_code (code
) && STMT_VINFO_REDUC_IDX (stmt_info
) == 0)
1047 std::swap (*op0_out
, *op1_out
);
1051 /* match.pd function to match
1052 (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
1054 1) @1, @2, c, d, a, b are all integral type.
1055 2) There's single_use for both @1 and @2.
1056 3) a, c have same precision.
1057 4) c and @1 have different precision.
1058 5) c, d are the same type or they can differ in sign when convert is
1061 record a and c and d and @3. */
1063 extern bool gimple_cond_expr_convert_p (tree
, tree
*, tree (*)(tree
));
1065 /* Function vect_recog_cond_expr_convert
1067 Try to find the following pattern:
1072 TYPE_E op_true = (TYPE_E) A;
1073 TYPE_E op_false = (TYPE_E) B;
1075 E = C cmp D ? op_true : op_false;
1078 TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
1079 TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
1080 single_use of op_true and op_false.
1081 TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
1085 * STMT_VINFO: The stmt from which the pattern search begins.
1086 here it starts with E = c cmp D ? op_true : op_false;
1090 TYPE1 E' = C cmp D ? A : B;
1091 TYPE3 E = (TYPE3) E';
1093 There may extra nop_convert for A or B to handle different signness.
1095 * TYPE_OUT: The vector type of the output of this pattern.
1097 * Return value: A new stmt that will be used to replace the sequence of
1098 stmts that constitute the pattern. In this case it will be:
1100 E' = C cmp D ? A : B; is recorded in pattern definition statements; */
1103 vect_recog_cond_expr_convert_pattern (vec_info
*vinfo
,
1104 stmt_vec_info stmt_vinfo
, tree
*type_out
)
1106 gassign
*last_stmt
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
);
1107 tree lhs
, match
[4], temp
, type
, new_lhs
, op2
;
1109 gimple
*pattern_stmt
;
1114 lhs
= gimple_assign_lhs (last_stmt
);
1116 /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
1117 TYPE_PRECISION (A) == TYPE_PRECISION (C). */
1118 if (!gimple_cond_expr_convert_p (lhs
, &match
[0], NULL
))
1121 vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt
);
1124 type
= TREE_TYPE (match
[1]);
1125 if (TYPE_SIGN (type
) != TYPE_SIGN (TREE_TYPE (match
[2])))
1127 op2
= vect_recog_temp_ssa_var (type
, NULL
);
1128 gimple
* nop_stmt
= gimple_build_assign (op2
, NOP_EXPR
, match
[2]);
1129 append_pattern_def_seq (vinfo
, stmt_vinfo
, nop_stmt
,
1130 get_vectype_for_scalar_type (vinfo
, type
));
1133 temp
= vect_recog_temp_ssa_var (type
, NULL
);
1134 cond_stmt
= gimple_build_assign (temp
, build3 (COND_EXPR
, type
, match
[3],
1136 append_pattern_def_seq (vinfo
, stmt_vinfo
, cond_stmt
,
1137 get_vectype_for_scalar_type (vinfo
, type
));
1138 new_lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
1139 pattern_stmt
= gimple_build_assign (new_lhs
, NOP_EXPR
, temp
);
1140 *type_out
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1142 if (dump_enabled_p ())
1143 dump_printf_loc (MSG_NOTE
, vect_location
,
1144 "created pattern stmt: %G", pattern_stmt
);
1145 return pattern_stmt
;
1148 /* Function vect_recog_dot_prod_pattern
1150 Try to find the following pattern:
1157 sum_0 = phi <init, sum_1>
1160 S3 x_T = (TYPE1) x_t;
1161 S4 y_T = (TYPE1) y_t;
1162 S5 prod = x_T * y_T;
1163 [S6 prod = (TYPE2) prod; #optional]
1164 S7 sum_1 = prod + sum_0;
1166 where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
1167 the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
1168 'type1a' and 'type1b' can differ.
1172 * STMT_VINFO: The stmt from which the pattern search begins. In the
1173 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
1178 * TYPE_OUT: The type of the output of this pattern.
1180 * Return value: A new stmt that will be used to replace the sequence of
1181 stmts that constitute the pattern. In this case it will be:
1182 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
1184 Note: The dot-prod idiom is a widening reduction pattern that is
1185 vectorized without preserving all the intermediate results. It
1186 produces only N/2 (widened) results (by summing up pairs of
1187 intermediate results) rather than all N results. Therefore, we
1188 cannot allow this pattern when we want to get all the results and in
1189 the correct order (as is the case when this computation is in an
1190 inner-loop nested in an outer-loop that us being vectorized). */
1193 vect_recog_dot_prod_pattern (vec_info
*vinfo
,
1194 stmt_vec_info stmt_vinfo
, tree
*type_out
)
1196 tree oprnd0
, oprnd1
;
1197 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1198 tree type
, half_type
;
1199 gimple
*pattern_stmt
;
1202 /* Look for the following pattern
1206 DDPROD = (TYPE2) DPROD;
1207 sum_1 = DDPROD + sum_0;
1209 - DX is double the size of X
1210 - DY is double the size of Y
1211 - DX, DY, DPROD all have the same type but the sign
1212 between X, Y and DPROD can differ.
1213 - sum is the same size of DPROD or bigger
1214 - sum has been recognized as a reduction variable.
1216 This is equivalent to:
1217 DPROD = X w* Y; #widen mult
1218 sum_1 = DPROD w+ sum_0; #widen summation
1220 DPROD = X w* Y; #widen mult
1221 sum_1 = DPROD + sum_0; #summation
1224 /* Starting from LAST_STMT, follow the defs of its uses in search
1225 of the above pattern. */
1227 if (!vect_reassociating_reduction_p (vinfo
, stmt_vinfo
, PLUS_EXPR
,
1231 type
= TREE_TYPE (gimple_get_lhs (last_stmt
));
1233 vect_unpromoted_value unprom_mult
;
1234 oprnd0
= vect_look_through_possible_promotion (vinfo
, oprnd0
, &unprom_mult
);
1236 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1237 we know that oprnd1 is the reduction variable (defined by a loop-header
1238 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1239 Left to check that oprnd0 is defined by a (widen_)mult_expr */
1243 stmt_vec_info mult_vinfo
= vect_get_internal_def (vinfo
, oprnd0
);
1247 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1248 inside the loop (in case we are analyzing an outer-loop). */
1249 vect_unpromoted_value unprom0
[2];
1250 enum optab_subtype subtype
= optab_vector
;
1251 if (!vect_widened_op_tree (vinfo
, mult_vinfo
, MULT_EXPR
, WIDEN_MULT_EXPR
,
1252 false, 2, unprom0
, &half_type
, &subtype
))
1255 /* If there are two widening operations, make sure they agree on the sign
1256 of the extension. The result of an optab_vector_mixed_sign operation
1257 is signed; otherwise, the result has the same sign as the operands. */
1258 if (TYPE_PRECISION (unprom_mult
.type
) != TYPE_PRECISION (type
)
1259 && (subtype
== optab_vector_mixed_sign
1260 ? TYPE_UNSIGNED (unprom_mult
.type
)
1261 : TYPE_SIGN (unprom_mult
.type
) != TYPE_SIGN (half_type
)))
1264 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt
);
1266 /* If the inputs have mixed signs, canonicalize on using the signed
1267 input type for analysis. This also helps when emulating mixed-sign
1268 operations using signed operations. */
1269 if (subtype
== optab_vector_mixed_sign
)
1270 half_type
= signed_type_for (half_type
);
1273 if (!vect_supportable_direct_optab_p (vinfo
, type
, DOT_PROD_EXPR
, half_type
,
1274 type_out
, &half_vectype
, subtype
))
1276 /* We can emulate a mixed-sign dot-product using a sequence of
1277 signed dot-products; see vect_emulate_mixed_dot_prod for details. */
1278 if (subtype
!= optab_vector_mixed_sign
1279 || !vect_supportable_direct_optab_p (vinfo
, signed_type_for (type
),
1280 DOT_PROD_EXPR
, half_type
,
1281 type_out
, &half_vectype
,
1285 *type_out
= signed_or_unsigned_type_for (TYPE_UNSIGNED (type
),
1289 /* Get the inputs in the appropriate types. */
1291 vect_convert_inputs (vinfo
, stmt_vinfo
, 2, mult_oprnd
, half_type
,
1292 unprom0
, half_vectype
, subtype
);
1294 var
= vect_recog_temp_ssa_var (type
, NULL
);
1295 pattern_stmt
= gimple_build_assign (var
, DOT_PROD_EXPR
,
1296 mult_oprnd
[0], mult_oprnd
[1], oprnd1
);
1298 return pattern_stmt
;
1302 /* Function vect_recog_sad_pattern
1304 Try to find the following Sum of Absolute Difference (SAD) pattern:
1307 signed TYPE1 diff, abs_diff;
1310 sum_0 = phi <init, sum_1>
1313 S3 x_T = (TYPE1) x_t;
1314 S4 y_T = (TYPE1) y_t;
1315 S5 diff = x_T - y_T;
1316 S6 abs_diff = ABS_EXPR <diff>;
1317 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1318 S8 sum_1 = abs_diff + sum_0;
1320 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1321 same size of 'TYPE1' or bigger. This is a special case of a reduction
1326 * STMT_VINFO: The stmt from which the pattern search begins. In the
1327 example, when this function is called with S8, the pattern
1328 {S3,S4,S5,S6,S7,S8} will be detected.
1332 * TYPE_OUT: The type of the output of this pattern.
1334 * Return value: A new stmt that will be used to replace the sequence of
1335 stmts that constitute the pattern. In this case it will be:
1336 SAD_EXPR <x_t, y_t, sum_0>
1340 vect_recog_sad_pattern (vec_info
*vinfo
,
1341 stmt_vec_info stmt_vinfo
, tree
*type_out
)
1343 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1346 /* Look for the following pattern
1350 DAD = ABS_EXPR <DDIFF>;
1351 DDPROD = (TYPE2) DPROD;
1352 sum_1 = DAD + sum_0;
1354 - DX is at least double the size of X
1355 - DY is at least double the size of Y
1356 - DX, DY, DDIFF, DAD all have the same type
1357 - sum is the same size of DAD or bigger
1358 - sum has been recognized as a reduction variable.
1360 This is equivalent to:
1361 DDIFF = X w- Y; #widen sub
1362 DAD = ABS_EXPR <DDIFF>;
1363 sum_1 = DAD w+ sum_0; #widen summation
1365 DDIFF = X w- Y; #widen sub
1366 DAD = ABS_EXPR <DDIFF>;
1367 sum_1 = DAD + sum_0; #summation
1370 /* Starting from LAST_STMT, follow the defs of its uses in search
1371 of the above pattern. */
1373 tree plus_oprnd0
, plus_oprnd1
;
1374 if (!vect_reassociating_reduction_p (vinfo
, stmt_vinfo
, PLUS_EXPR
,
1375 &plus_oprnd0
, &plus_oprnd1
))
1378 tree sum_type
= TREE_TYPE (gimple_get_lhs (last_stmt
));
1380 /* Any non-truncating sequence of conversions is OK here, since
1381 with a successful match, the result of the ABS(U) is known to fit
1382 within the nonnegative range of the result type. (It cannot be the
1383 negative of the minimum signed value due to the range of the widening
1385 vect_unpromoted_value unprom_abs
;
1386 plus_oprnd0
= vect_look_through_possible_promotion (vinfo
, plus_oprnd0
,
1389 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1390 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1391 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1392 Then check that plus_oprnd0 is defined by an abs_expr. */
1397 stmt_vec_info abs_stmt_vinfo
= vect_get_internal_def (vinfo
, plus_oprnd0
);
1398 if (!abs_stmt_vinfo
)
1401 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1402 inside the loop (in case we are analyzing an outer-loop). */
1403 gassign
*abs_stmt
= dyn_cast
<gassign
*> (abs_stmt_vinfo
->stmt
);
1404 vect_unpromoted_value unprom
[2];
1408 gcall
*abd_stmt
= dyn_cast
<gcall
*> (abs_stmt_vinfo
->stmt
);
1410 || !gimple_call_internal_p (abd_stmt
)
1411 || gimple_call_num_args (abd_stmt
) != 2)
1414 tree abd_oprnd0
= gimple_call_arg (abd_stmt
, 0);
1415 tree abd_oprnd1
= gimple_call_arg (abd_stmt
, 1);
1417 if (gimple_call_internal_fn (abd_stmt
) == IFN_ABD
)
1419 if (!vect_look_through_possible_promotion (vinfo
, abd_oprnd0
,
1421 || !vect_look_through_possible_promotion (vinfo
, abd_oprnd1
,
1425 else if (gimple_call_internal_fn (abd_stmt
) == IFN_VEC_WIDEN_ABD
)
1427 unprom
[0].op
= abd_oprnd0
;
1428 unprom
[0].type
= TREE_TYPE (abd_oprnd0
);
1429 unprom
[1].op
= abd_oprnd1
;
1430 unprom
[1].type
= TREE_TYPE (abd_oprnd1
);
1435 half_type
= unprom
[0].type
;
1437 else if (!vect_recog_absolute_difference (vinfo
, abs_stmt
, &half_type
,
1441 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt
);
1444 if (!vect_supportable_direct_optab_p (vinfo
, sum_type
, SAD_EXPR
, half_type
,
1445 type_out
, &half_vectype
))
1448 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1450 vect_convert_inputs (vinfo
, stmt_vinfo
, 2, sad_oprnd
, half_type
,
1451 unprom
, half_vectype
);
1453 tree var
= vect_recog_temp_ssa_var (sum_type
, NULL
);
1454 gimple
*pattern_stmt
= gimple_build_assign (var
, SAD_EXPR
, sad_oprnd
[0],
1455 sad_oprnd
[1], plus_oprnd1
);
1457 return pattern_stmt
;
1460 /* Function vect_recog_abd_pattern
1462 Try to find the following ABsolute Difference (ABD) or
1463 widening ABD (WIDEN_ABD) pattern:
1467 TYPE3 x_cast = (TYPE3) x; // widening or no-op
1468 TYPE3 y_cast = (TYPE3) y; // widening or no-op
1469 TYPE3 diff = x_cast - y_cast;
1470 TYPE4 diff_cast = (TYPE4) diff; // widening or no-op
1471 TYPE5 abs = ABS(U)_EXPR <diff_cast>;
1473 WIDEN_ABD exists to optimize the case where TYPE4 is at least
1474 twice as wide as TYPE3.
1478 * STMT_VINFO: The stmt from which the pattern search begins
1482 * TYPE_OUT: The type of the output of this pattern
1484 * Return value: A new stmt that will be used to replace the sequence of
1485 stmts that constitute the pattern, principally:
1486 out = IFN_ABD (x, y)
1487 out = IFN_WIDEN_ABD (x, y)
1491 vect_recog_abd_pattern (vec_info
*vinfo
,
1492 stmt_vec_info stmt_vinfo
, tree
*type_out
)
1494 gassign
*last_stmt
= dyn_cast
<gassign
*> (STMT_VINFO_STMT (stmt_vinfo
));
1498 tree out_type
= TREE_TYPE (gimple_assign_lhs (last_stmt
));
1500 vect_unpromoted_value unprom
[2];
1503 if (!vect_recog_absolute_difference (vinfo
, last_stmt
, &half_type
,
1504 unprom
, &diff_stmt
))
1507 tree abd_in_type
, abd_out_type
;
1511 abd_in_type
= half_type
;
1512 abd_out_type
= abd_in_type
;
1516 unprom
[0].op
= gimple_assign_rhs1 (diff_stmt
);
1517 unprom
[1].op
= gimple_assign_rhs2 (diff_stmt
);
1518 abd_in_type
= signed_type_for (out_type
);
1519 abd_out_type
= abd_in_type
;
1522 tree vectype_in
= get_vectype_for_scalar_type (vinfo
, abd_in_type
);
1526 internal_fn ifn
= IFN_ABD
;
1527 tree vectype_out
= vectype_in
;
1529 if (TYPE_PRECISION (out_type
) >= TYPE_PRECISION (abd_in_type
) * 2
1530 && stmt_vinfo
->min_output_precision
>= TYPE_PRECISION (abd_in_type
) * 2)
1533 = build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type
) * 2,
1534 TYPE_UNSIGNED (abd_in_type
));
1535 tree mid_vectype
= get_vectype_for_scalar_type (vinfo
, mid_type
);
1537 code_helper dummy_code
;
1539 auto_vec
<tree
> dummy_vec
;
1541 && supportable_widening_operation (vinfo
, IFN_VEC_WIDEN_ABD
,
1542 stmt_vinfo
, mid_vectype
,
1544 &dummy_code
, &dummy_code
,
1545 &dummy_int
, &dummy_vec
))
1547 ifn
= IFN_VEC_WIDEN_ABD
;
1548 abd_out_type
= mid_type
;
1549 vectype_out
= mid_vectype
;
1554 && !direct_internal_fn_supported_p (ifn
, vectype_in
,
1555 OPTIMIZE_FOR_SPEED
))
1558 vect_pattern_detected ("vect_recog_abd_pattern", last_stmt
);
1561 vect_convert_inputs (vinfo
, stmt_vinfo
, 2, abd_oprnds
,
1562 abd_in_type
, unprom
, vectype_in
);
1564 *type_out
= get_vectype_for_scalar_type (vinfo
, out_type
);
1566 tree abd_result
= vect_recog_temp_ssa_var (abd_out_type
, NULL
);
1567 gcall
*abd_stmt
= gimple_build_call_internal (ifn
, 2,
1568 abd_oprnds
[0], abd_oprnds
[1]);
1569 gimple_call_set_lhs (abd_stmt
, abd_result
);
1570 gimple_set_location (abd_stmt
, gimple_location (last_stmt
));
1572 gimple
*stmt
= abd_stmt
;
1573 if (TYPE_PRECISION (abd_in_type
) == TYPE_PRECISION (abd_out_type
)
1574 && TYPE_PRECISION (abd_out_type
) < TYPE_PRECISION (out_type
)
1575 && !TYPE_UNSIGNED (abd_out_type
))
1577 tree unsign
= unsigned_type_for (abd_out_type
);
1578 tree unsign_vectype
= get_vectype_for_scalar_type (vinfo
, unsign
);
1579 stmt
= vect_convert_output (vinfo
, stmt_vinfo
, unsign
, stmt
,
1583 return vect_convert_output (vinfo
, stmt_vinfo
, out_type
, stmt
, vectype_out
);
1586 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1587 so that it can be treated as though it had the form:
1591 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1592 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1593 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1594 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1595 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1597 Try to replace the pattern with:
1601 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1602 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1603 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1604 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1606 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1608 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1609 name of the pattern being matched, for dump purposes. */
1612 vect_recog_widen_op_pattern (vec_info
*vinfo
,
1613 stmt_vec_info last_stmt_info
, tree
*type_out
,
1614 tree_code orig_code
, code_helper wide_code
,
1615 bool shift_p
, const char *name
)
1617 gimple
*last_stmt
= last_stmt_info
->stmt
;
1619 vect_unpromoted_value unprom
[2];
1621 if (!vect_widened_op_tree (vinfo
, last_stmt_info
, orig_code
, orig_code
,
1622 shift_p
, 2, unprom
, &half_type
))
1626 /* Pattern detected. */
1627 vect_pattern_detected (name
, last_stmt
);
1629 tree type
= TREE_TYPE (gimple_get_lhs (last_stmt
));
1631 if (TYPE_PRECISION (type
) != TYPE_PRECISION (half_type
) * 2
1632 || TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type
))
1633 itype
= build_nonstandard_integer_type (TYPE_PRECISION (half_type
) * 2,
1634 TYPE_UNSIGNED (half_type
));
1636 /* Check target support */
1637 tree vectype
= get_vectype_for_scalar_type (vinfo
, half_type
);
1638 tree vecitype
= get_vectype_for_scalar_type (vinfo
, itype
);
1640 tree vecctype
= vecitype
;
1641 if (orig_code
== MINUS_EXPR
1642 && TYPE_UNSIGNED (itype
)
1643 && TYPE_PRECISION (type
) > TYPE_PRECISION (itype
))
1645 /* Subtraction is special, even if half_type is unsigned and no matter
1646 whether type is signed or unsigned, if type is wider than itype,
1647 we need to sign-extend from the widening operation result to the
1649 Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1650 itype unsigned short and type either int or unsigned int.
1651 Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1652 (unsigned short) 0xffff, but for type int we want the result -1
1653 and for type unsigned int 0xffffffff rather than 0xffff. */
1654 ctype
= build_nonstandard_integer_type (TYPE_PRECISION (itype
), 0);
1655 vecctype
= get_vectype_for_scalar_type (vinfo
, ctype
);
1658 code_helper dummy_code
;
1660 auto_vec
<tree
> dummy_vec
;
1664 || !supportable_widening_operation (vinfo
, wide_code
, last_stmt_info
,
1666 &dummy_code
, &dummy_code
,
1667 &dummy_int
, &dummy_vec
))
1670 *type_out
= get_vectype_for_scalar_type (vinfo
, type
);
1675 vect_convert_inputs (vinfo
, last_stmt_info
,
1676 2, oprnd
, half_type
, unprom
, vectype
);
1678 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
1679 gimple
*pattern_stmt
= vect_gimple_build (var
, wide_code
, oprnd
[0], oprnd
[1]);
1681 if (vecctype
!= vecitype
)
1682 pattern_stmt
= vect_convert_output (vinfo
, last_stmt_info
, ctype
,
1683 pattern_stmt
, vecitype
);
1685 return vect_convert_output (vinfo
, last_stmt_info
,
1686 type
, pattern_stmt
, vecctype
);
1689 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1690 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1693 vect_recog_widen_mult_pattern (vec_info
*vinfo
, stmt_vec_info last_stmt_info
,
1696 return vect_recog_widen_op_pattern (vinfo
, last_stmt_info
, type_out
,
1697 MULT_EXPR
, WIDEN_MULT_EXPR
, false,
1698 "vect_recog_widen_mult_pattern");
1701 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1702 to IFN_VEC_WIDEN_PLUS. See vect_recog_widen_op_pattern for details. */
1705 vect_recog_widen_plus_pattern (vec_info
*vinfo
, stmt_vec_info last_stmt_info
,
1708 return vect_recog_widen_op_pattern (vinfo
, last_stmt_info
, type_out
,
1709 PLUS_EXPR
, IFN_VEC_WIDEN_PLUS
,
1710 false, "vect_recog_widen_plus_pattern");
1713 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1714 to IFN_VEC_WIDEN_MINUS. See vect_recog_widen_op_pattern for details. */
1716 vect_recog_widen_minus_pattern (vec_info
*vinfo
, stmt_vec_info last_stmt_info
,
1719 return vect_recog_widen_op_pattern (vinfo
, last_stmt_info
, type_out
,
1720 MINUS_EXPR
, IFN_VEC_WIDEN_MINUS
,
1721 false, "vect_recog_widen_minus_pattern");
1724 /* Try to detect abd on widened inputs, converting IFN_ABD
1725 to IFN_VEC_WIDEN_ABD. */
1727 vect_recog_widen_abd_pattern (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
1730 gassign
*last_stmt
= dyn_cast
<gassign
*> (STMT_VINFO_STMT (stmt_vinfo
));
1731 if (!last_stmt
|| !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (last_stmt
)))
1734 tree last_rhs
= gimple_assign_rhs1 (last_stmt
);
1736 tree in_type
= TREE_TYPE (last_rhs
);
1737 tree out_type
= TREE_TYPE (gimple_assign_lhs (last_stmt
));
1738 if (!INTEGRAL_TYPE_P (in_type
)
1739 || !INTEGRAL_TYPE_P (out_type
)
1740 || TYPE_PRECISION (in_type
) * 2 != TYPE_PRECISION (out_type
)
1741 || !TYPE_UNSIGNED (in_type
))
1744 vect_unpromoted_value unprom
;
1745 tree op
= vect_look_through_possible_promotion (vinfo
, last_rhs
, &unprom
);
1746 if (!op
|| TYPE_PRECISION (TREE_TYPE (op
)) != TYPE_PRECISION (in_type
))
1749 stmt_vec_info abd_pattern_vinfo
= vect_get_internal_def (vinfo
, op
);
1750 if (!abd_pattern_vinfo
)
1753 abd_pattern_vinfo
= vect_stmt_to_vectorize (abd_pattern_vinfo
);
1754 gcall
*abd_stmt
= dyn_cast
<gcall
*> (STMT_VINFO_STMT (abd_pattern_vinfo
));
1756 || !gimple_call_internal_p (abd_stmt
)
1757 || gimple_call_internal_fn (abd_stmt
) != IFN_ABD
)
1760 tree vectype_in
= get_vectype_for_scalar_type (vinfo
, in_type
);
1761 tree vectype_out
= get_vectype_for_scalar_type (vinfo
, out_type
);
1763 code_helper dummy_code
;
1765 auto_vec
<tree
> dummy_vec
;
1766 if (!supportable_widening_operation (vinfo
, IFN_VEC_WIDEN_ABD
, stmt_vinfo
,
1767 vectype_out
, vectype_in
,
1768 &dummy_code
, &dummy_code
,
1769 &dummy_int
, &dummy_vec
))
1772 vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt
);
1774 *type_out
= vectype_out
;
1776 tree abd_oprnd0
= gimple_call_arg (abd_stmt
, 0);
1777 tree abd_oprnd1
= gimple_call_arg (abd_stmt
, 1);
1778 tree widen_abd_result
= vect_recog_temp_ssa_var (out_type
, NULL
);
1779 gcall
*widen_abd_stmt
= gimple_build_call_internal (IFN_VEC_WIDEN_ABD
, 2,
1780 abd_oprnd0
, abd_oprnd1
);
1781 gimple_call_set_lhs (widen_abd_stmt
, widen_abd_result
);
1782 gimple_set_location (widen_abd_stmt
, gimple_location (last_stmt
));
1783 return widen_abd_stmt
;
1786 /* Function vect_recog_ctz_ffs_pattern
1788 Try to find the following pattern:
1793 B = __builtin_ctz{,l,ll} (A);
1797 B = __builtin_ffs{,l,ll} (A);
1801 * STMT_VINFO: The stmt from which the pattern search begins.
1802 here it starts with B = __builtin_* (A);
1806 * TYPE_OUT: The vector type of the output of this pattern.
1808 * Return value: A new stmt that will be used to replace the sequence of
1809 stmts that constitute the pattern, using clz or popcount builtins. */
1812 vect_recog_ctz_ffs_pattern (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
1815 gimple
*call_stmt
= stmt_vinfo
->stmt
;
1816 gimple
*pattern_stmt
;
1817 tree rhs_oprnd
, rhs_type
, lhs_oprnd
, lhs_type
, vec_type
, vec_rhs_type
;
1819 internal_fn ifn
= IFN_LAST
, ifnnew
= IFN_LAST
;
1820 bool defined_at_zero
= true, defined_at_zero_new
= false;
1821 int val
= 0, val_new
= 0;
1823 int sub
= 0, add
= 0;
1826 if (!is_gimple_call (call_stmt
))
1829 if (gimple_call_num_args (call_stmt
) != 1)
1832 rhs_oprnd
= gimple_call_arg (call_stmt
, 0);
1833 rhs_type
= TREE_TYPE (rhs_oprnd
);
1834 lhs_oprnd
= gimple_call_lhs (call_stmt
);
1837 lhs_type
= TREE_TYPE (lhs_oprnd
);
1838 if (!INTEGRAL_TYPE_P (lhs_type
)
1839 || !INTEGRAL_TYPE_P (rhs_type
)
1840 || !type_has_mode_precision_p (rhs_type
)
1841 || TREE_CODE (rhs_oprnd
) != SSA_NAME
)
1844 switch (gimple_call_combined_fn (call_stmt
))
1848 if (!gimple_call_internal_p (call_stmt
)
1849 || CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type
),
1851 defined_at_zero
= false;
1860 prec
= TYPE_PRECISION (rhs_type
);
1861 loc
= gimple_location (call_stmt
);
1863 vec_type
= get_vectype_for_scalar_type (vinfo
, lhs_type
);
1867 vec_rhs_type
= get_vectype_for_scalar_type (vinfo
, rhs_type
);
1871 /* Do it only if the backend doesn't have ctz<vector_mode>2 or
1872 ffs<vector_mode>2 pattern but does have clz<vector_mode>2 or
1873 popcount<vector_mode>2. */
1875 || direct_internal_fn_supported_p (ifn
, vec_rhs_type
,
1876 OPTIMIZE_FOR_SPEED
))
1880 && direct_internal_fn_supported_p (IFN_CTZ
, vec_rhs_type
,
1881 OPTIMIZE_FOR_SPEED
))
1885 = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type
),
1888 else if (direct_internal_fn_supported_p (IFN_CLZ
, vec_rhs_type
,
1889 OPTIMIZE_FOR_SPEED
))
1893 = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type
),
1896 if ((ifnnew
== IFN_LAST
1897 || (defined_at_zero
&& !defined_at_zero_new
))
1898 && direct_internal_fn_supported_p (IFN_POPCOUNT
, vec_rhs_type
,
1899 OPTIMIZE_FOR_SPEED
))
1901 ifnnew
= IFN_POPCOUNT
;
1902 defined_at_zero_new
= true;
1905 if (ifnnew
== IFN_LAST
)
1908 vect_pattern_detected ("vec_recog_ctz_ffs_pattern", call_stmt
);
1910 if ((ifnnew
== IFN_CLZ
1912 && defined_at_zero_new
1915 || (ifnnew
== IFN_POPCOUNT
&& ifn
== IFN_CTZ
))
1917 /* .CTZ (X) = PREC - .CLZ ((X - 1) & ~X)
1918 .CTZ (X) = .POPCOUNT ((X - 1) & ~X). */
1919 if (ifnnew
== IFN_CLZ
)
1923 if (!TYPE_UNSIGNED (rhs_type
))
1925 rhs_type
= unsigned_type_for (rhs_type
);
1926 vec_rhs_type
= get_vectype_for_scalar_type (vinfo
, rhs_type
);
1927 new_var
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1928 pattern_stmt
= gimple_build_assign (new_var
, NOP_EXPR
, rhs_oprnd
);
1929 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
,
1931 rhs_oprnd
= new_var
;
1934 tree m1
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1935 pattern_stmt
= gimple_build_assign (m1
, PLUS_EXPR
, rhs_oprnd
,
1936 build_int_cst (rhs_type
, -1));
1937 gimple_set_location (pattern_stmt
, loc
);
1938 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
1940 new_var
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1941 pattern_stmt
= gimple_build_assign (new_var
, BIT_NOT_EXPR
, rhs_oprnd
);
1942 gimple_set_location (pattern_stmt
, loc
);
1943 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
1944 rhs_oprnd
= new_var
;
1946 new_var
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1947 pattern_stmt
= gimple_build_assign (new_var
, BIT_AND_EXPR
,
1949 gimple_set_location (pattern_stmt
, loc
);
1950 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
1951 rhs_oprnd
= new_var
;
1953 else if (ifnnew
== IFN_CLZ
)
1955 /* .CTZ (X) = (PREC - 1) - .CLZ (X & -X)
1956 .FFS (X) = PREC - .CLZ (X & -X). */
1957 sub
= prec
- (ifn
== IFN_CTZ
);
1958 val_new
= sub
- val_new
;
1960 tree neg
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1961 pattern_stmt
= gimple_build_assign (neg
, NEGATE_EXPR
, rhs_oprnd
);
1962 gimple_set_location (pattern_stmt
, loc
);
1963 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
1965 new_var
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1966 pattern_stmt
= gimple_build_assign (new_var
, BIT_AND_EXPR
,
1968 gimple_set_location (pattern_stmt
, loc
);
1969 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
1970 rhs_oprnd
= new_var
;
1972 else if (ifnnew
== IFN_POPCOUNT
)
1974 /* .CTZ (X) = PREC - .POPCOUNT (X | -X)
1975 .FFS (X) = (PREC + 1) - .POPCOUNT (X | -X). */
1976 sub
= prec
+ (ifn
== IFN_FFS
);
1979 tree neg
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1980 pattern_stmt
= gimple_build_assign (neg
, NEGATE_EXPR
, rhs_oprnd
);
1981 gimple_set_location (pattern_stmt
, loc
);
1982 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
1984 new_var
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1985 pattern_stmt
= gimple_build_assign (new_var
, BIT_IOR_EXPR
,
1987 gimple_set_location (pattern_stmt
, loc
);
1988 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
1989 rhs_oprnd
= new_var
;
1991 else if (ifnnew
== IFN_CTZ
)
1993 /* .FFS (X) = .CTZ (X) + 1. */
1998 /* Create B = .IFNNEW (A). */
1999 new_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
2000 pattern_stmt
= gimple_build_call_internal (ifnnew
, 1, rhs_oprnd
);
2001 gimple_call_set_lhs (pattern_stmt
, new_var
);
2002 gimple_set_location (pattern_stmt
, loc
);
2003 *type_out
= vec_type
;
2007 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_type
);
2008 tree ret_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
2009 pattern_stmt
= gimple_build_assign (ret_var
, MINUS_EXPR
,
2010 build_int_cst (lhs_type
, sub
),
2012 gimple_set_location (pattern_stmt
, loc
);
2017 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_type
);
2018 tree ret_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
2019 pattern_stmt
= gimple_build_assign (ret_var
, PLUS_EXPR
, new_var
,
2020 build_int_cst (lhs_type
, add
));
2021 gimple_set_location (pattern_stmt
, loc
);
2026 && (!defined_at_zero_new
|| val
!= val_new
))
2028 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_type
);
2029 tree ret_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
2030 rhs_oprnd
= gimple_call_arg (call_stmt
, 0);
2031 rhs_type
= TREE_TYPE (rhs_oprnd
);
2032 tree cmp
= build2_loc (loc
, NE_EXPR
, boolean_type_node
,
2033 rhs_oprnd
, build_zero_cst (rhs_type
));
2034 pattern_stmt
= gimple_build_assign (ret_var
, COND_EXPR
, cmp
,
2036 build_int_cst (lhs_type
, val
));
2039 if (dump_enabled_p ())
2040 dump_printf_loc (MSG_NOTE
, vect_location
,
2041 "created pattern stmt: %G", pattern_stmt
);
2043 return pattern_stmt
;
2046 /* Function vect_recog_popcount_clz_ctz_ffs_pattern
2048 Try to find the following pattern:
2054 temp_in = (UTYPE2)A;
2056 temp_out = __builtin_popcount{,l,ll} (temp_in);
2057 B = (TYPE1) temp_out;
2059 TYPE2 may or may not be equal to TYPE3.
2060 i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
2061 i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
2065 * STMT_VINFO: The stmt from which the pattern search begins.
2066 here it starts with B = (TYPE1) temp_out;
2070 * TYPE_OUT: The vector type of the output of this pattern.
2072 * Return value: A new stmt that will be used to replace the sequence of
2073 stmts that constitute the pattern. In this case it will be:
2076 Similarly for clz, ctz and ffs.
2080 vect_recog_popcount_clz_ctz_ffs_pattern (vec_info
*vinfo
,
2081 stmt_vec_info stmt_vinfo
,
2084 gassign
*last_stmt
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
);
2085 gimple
*call_stmt
, *pattern_stmt
;
2086 tree rhs_oprnd
, rhs_origin
, lhs_oprnd
, lhs_type
, vec_type
, new_var
;
2087 internal_fn ifn
= IFN_LAST
;
2090 /* Find B = (TYPE1) temp_out. */
2093 tree_code code
= gimple_assign_rhs_code (last_stmt
);
2094 if (!CONVERT_EXPR_CODE_P (code
))
2097 lhs_oprnd
= gimple_assign_lhs (last_stmt
);
2098 lhs_type
= TREE_TYPE (lhs_oprnd
);
2099 if (!INTEGRAL_TYPE_P (lhs_type
))
2102 rhs_oprnd
= gimple_assign_rhs1 (last_stmt
);
2103 if (TREE_CODE (rhs_oprnd
) != SSA_NAME
2104 || !has_single_use (rhs_oprnd
))
2106 call_stmt
= SSA_NAME_DEF_STMT (rhs_oprnd
);
2108 /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
2109 if (!is_gimple_call (call_stmt
))
2111 switch (gimple_call_combined_fn (call_stmt
))
2119 /* Punt if call result is unsigned and defined value at zero
2120 is negative, as the negative value doesn't extend correctly. */
2121 if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd
))
2122 && gimple_call_internal_p (call_stmt
)
2123 && CLZ_DEFINED_VALUE_AT_ZERO
2124 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd
)), val
) == 2
2130 /* Punt if call result is unsigned and defined value at zero
2131 is negative, as the negative value doesn't extend correctly. */
2132 if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd
))
2133 && gimple_call_internal_p (call_stmt
)
2134 && CTZ_DEFINED_VALUE_AT_ZERO
2135 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd
)), val
) == 2
2146 if (gimple_call_num_args (call_stmt
) != 1)
2149 rhs_oprnd
= gimple_call_arg (call_stmt
, 0);
2150 vect_unpromoted_value unprom_diff
;
2152 = vect_look_through_possible_promotion (vinfo
, rhs_oprnd
, &unprom_diff
);
2157 /* Input and output of .POPCOUNT should be same-precision integer. */
2158 if (TYPE_PRECISION (unprom_diff
.type
) != TYPE_PRECISION (lhs_type
))
2161 /* Also A should be unsigned or same precision as temp_in, otherwise
2162 different builtins/internal functions have different behaviors. */
2163 if (TYPE_PRECISION (unprom_diff
.type
)
2164 != TYPE_PRECISION (TREE_TYPE (rhs_oprnd
)))
2168 /* For popcount require zero extension, which doesn't add any
2169 further bits to the count. */
2170 if (!TYPE_UNSIGNED (unprom_diff
.type
))
2174 /* clzll (x) == clz (x) + 32 for unsigned x != 0, so ok
2175 if it is undefined at zero or if it matches also for the
2176 defined value there. */
2177 if (!TYPE_UNSIGNED (unprom_diff
.type
))
2179 if (!type_has_mode_precision_p (lhs_type
)
2180 || !type_has_mode_precision_p (TREE_TYPE (rhs_oprnd
)))
2182 addend
= (TYPE_PRECISION (TREE_TYPE (rhs_oprnd
))
2183 - TYPE_PRECISION (lhs_type
));
2184 if (gimple_call_internal_p (call_stmt
))
2188 = CLZ_DEFINED_VALUE_AT_ZERO
2189 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd
)), val1
);
2191 = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type
),
2195 if (d2
!= 2 || val1
!= val2
+ addend
)
2200 /* ctzll (x) == ctz (x) for unsigned or signed x != 0, so ok
2201 if it is undefined at zero or if it matches also for the
2202 defined value there. */
2203 if (gimple_call_internal_p (call_stmt
))
2207 = CTZ_DEFINED_VALUE_AT_ZERO
2208 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd
)), val1
);
2210 = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type
),
2214 if (d2
!= 2 || val1
!= val2
)
2219 /* ffsll (x) == ffs (x) for unsigned or signed x. */
2225 vec_type
= get_vectype_for_scalar_type (vinfo
, lhs_type
);
2226 /* Do it only if the backend has popcount<vector_mode>2 etc. pattern. */
2231 = direct_internal_fn_supported_p (ifn
, vec_type
, OPTIMIZE_FOR_SPEED
);
2239 /* vect_recog_ctz_ffs_pattern can implement ffs using ctz. */
2240 if (direct_internal_fn_supported_p (IFN_CTZ
, vec_type
,
2241 OPTIMIZE_FOR_SPEED
))
2245 /* vect_recog_ctz_ffs_pattern can implement ffs or ctz using
2247 if (direct_internal_fn_supported_p (IFN_CLZ
, vec_type
,
2248 OPTIMIZE_FOR_SPEED
))
2250 if (direct_internal_fn_supported_p (IFN_POPCOUNT
, vec_type
,
2251 OPTIMIZE_FOR_SPEED
))
2258 vect_pattern_detected ("vec_recog_popcount_clz_ctz_ffs_pattern",
2261 /* Create B = .POPCOUNT (A). */
2262 new_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
2263 pattern_stmt
= gimple_build_call_internal (ifn
, 1, unprom_diff
.op
);
2264 gimple_call_set_lhs (pattern_stmt
, new_var
);
2265 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
2266 *type_out
= vec_type
;
2268 if (dump_enabled_p ())
2269 dump_printf_loc (MSG_NOTE
, vect_location
,
2270 "created pattern stmt: %G", pattern_stmt
);
2274 gcc_assert (supported
);
2275 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_type
);
2276 tree ret_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
2277 pattern_stmt
= gimple_build_assign (ret_var
, PLUS_EXPR
, new_var
,
2278 build_int_cst (lhs_type
, addend
));
2280 else if (!supported
)
2282 stmt_vec_info new_stmt_info
= vinfo
->add_stmt (pattern_stmt
);
2283 STMT_VINFO_VECTYPE (new_stmt_info
) = vec_type
;
2285 = vect_recog_ctz_ffs_pattern (vinfo
, new_stmt_info
, type_out
);
2286 if (pattern_stmt
== NULL
)
2288 if (gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (new_stmt_info
))
2290 gimple_seq
*pseq
= &STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
);
2291 gimple_seq_add_seq_without_update (pseq
, seq
);
2294 return pattern_stmt
;
2297 /* Function vect_recog_pow_pattern
2299 Try to find the following pattern:
2303 with POW being one of pow, powf, powi, powif and N being
2308 * STMT_VINFO: The stmt from which the pattern search begins.
2312 * TYPE_OUT: The type of the output of this pattern.
2314 * Return value: A new stmt that will be used to replace the sequence of
2315 stmts that constitute the pattern. In this case it will be:
2322 vect_recog_pow_pattern (vec_info
*vinfo
,
2323 stmt_vec_info stmt_vinfo
, tree
*type_out
)
2325 gimple
*last_stmt
= stmt_vinfo
->stmt
;
2330 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
2333 switch (gimple_call_combined_fn (last_stmt
))
2343 base
= gimple_call_arg (last_stmt
, 0);
2344 exp
= gimple_call_arg (last_stmt
, 1);
2345 if (TREE_CODE (exp
) != REAL_CST
2346 && TREE_CODE (exp
) != INTEGER_CST
)
2348 if (flag_unsafe_math_optimizations
2349 && TREE_CODE (base
) == REAL_CST
2350 && gimple_call_builtin_p (last_stmt
, BUILT_IN_NORMAL
))
2352 combined_fn log_cfn
;
2353 built_in_function exp_bfn
;
2354 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt
)))
2357 log_cfn
= CFN_BUILT_IN_LOG
;
2358 exp_bfn
= BUILT_IN_EXP
;
2361 log_cfn
= CFN_BUILT_IN_LOGF
;
2362 exp_bfn
= BUILT_IN_EXPF
;
2365 log_cfn
= CFN_BUILT_IN_LOGL
;
2366 exp_bfn
= BUILT_IN_EXPL
;
2371 tree logc
= fold_const_call (log_cfn
, TREE_TYPE (base
), base
);
2372 tree exp_decl
= builtin_decl_implicit (exp_bfn
);
2373 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
2374 does that, but if C is a power of 2, we want to use
2375 exp2 (log2 (C) * x) in the non-vectorized version, but for
2376 vectorization we don't have vectorized exp2. */
2378 && TREE_CODE (logc
) == REAL_CST
2380 && lookup_attribute ("omp declare simd",
2381 DECL_ATTRIBUTES (exp_decl
)))
2383 cgraph_node
*node
= cgraph_node::get_create (exp_decl
);
2384 if (node
->simd_clones
== NULL
)
2386 if (targetm
.simd_clone
.compute_vecsize_and_simdlen
== NULL
2387 || node
->definition
)
2389 expand_simd_clones (node
);
2390 if (node
->simd_clones
== NULL
)
2393 *type_out
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (base
));
2396 tree def
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
2397 gimple
*g
= gimple_build_assign (def
, MULT_EXPR
, exp
, logc
);
2398 append_pattern_def_seq (vinfo
, stmt_vinfo
, g
);
2399 tree res
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
2400 g
= gimple_build_call (exp_decl
, 1, def
);
2401 gimple_call_set_lhs (g
, res
);
2409 /* We now have a pow or powi builtin function call with a constant
2412 /* Catch squaring. */
2413 if ((tree_fits_shwi_p (exp
)
2414 && tree_to_shwi (exp
) == 2)
2415 || (TREE_CODE (exp
) == REAL_CST
2416 && real_equal (&TREE_REAL_CST (exp
), &dconst2
)))
2418 if (!vect_supportable_direct_optab_p (vinfo
, TREE_TYPE (base
), MULT_EXPR
,
2419 TREE_TYPE (base
), type_out
))
2422 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
2423 stmt
= gimple_build_assign (var
, MULT_EXPR
, base
, base
);
2427 /* Catch square root. */
2428 if (TREE_CODE (exp
) == REAL_CST
2429 && real_equal (&TREE_REAL_CST (exp
), &dconsthalf
))
2431 *type_out
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (base
));
2433 && direct_internal_fn_supported_p (IFN_SQRT
, *type_out
,
2434 OPTIMIZE_FOR_SPEED
))
2436 gcall
*stmt
= gimple_build_call_internal (IFN_SQRT
, 1, base
);
2437 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
2438 gimple_call_set_lhs (stmt
, var
);
2439 gimple_call_set_nothrow (stmt
, true);
2448 /* Function vect_recog_widen_sum_pattern
2450 Try to find the following pattern:
2453 TYPE x_T, sum = init;
2455 sum_0 = phi <init, sum_1>
2457 S2 x_T = (TYPE) x_t;
2458 S3 sum_1 = x_T + sum_0;
2460 where type 'TYPE' is at least double the size of type 'type', i.e - we're
2461 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
2462 a special case of a reduction computation.
2466 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
2467 when this function is called with S3, the pattern {S2,S3} will be detected.
2471 * TYPE_OUT: The type of the output of this pattern.
2473 * Return value: A new stmt that will be used to replace the sequence of
2474 stmts that constitute the pattern. In this case it will be:
2475 WIDEN_SUM <x_t, sum_0>
2477 Note: The widening-sum idiom is a widening reduction pattern that is
2478 vectorized without preserving all the intermediate results. It
2479 produces only N/2 (widened) results (by summing up pairs of
2480 intermediate results) rather than all N results. Therefore, we
2481 cannot allow this pattern when we want to get all the results and in
2482 the correct order (as is the case when this computation is in an
2483 inner-loop nested in an outer-loop that us being vectorized). */
2486 vect_recog_widen_sum_pattern (vec_info
*vinfo
,
2487 stmt_vec_info stmt_vinfo
, tree
*type_out
)
2489 gimple
*last_stmt
= stmt_vinfo
->stmt
;
2490 tree oprnd0
, oprnd1
;
2492 gimple
*pattern_stmt
;
2495 /* Look for the following pattern
2498 In which DX is at least double the size of X, and sum_1 has been
2499 recognized as a reduction variable.
2502 /* Starting from LAST_STMT, follow the defs of its uses in search
2503 of the above pattern. */
2505 if (!vect_reassociating_reduction_p (vinfo
, stmt_vinfo
, PLUS_EXPR
,
2507 || TREE_CODE (oprnd0
) != SSA_NAME
2508 || !vinfo
->lookup_def (oprnd0
))
2511 type
= TREE_TYPE (gimple_get_lhs (last_stmt
));
2513 /* So far so good. Since last_stmt was detected as a (summation) reduction,
2514 we know that oprnd1 is the reduction variable (defined by a loop-header
2515 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
2516 Left to check that oprnd0 is defined by a cast from type 'type' to type
2519 vect_unpromoted_value unprom0
;
2520 if (!vect_look_through_possible_promotion (vinfo
, oprnd0
, &unprom0
)
2521 || TYPE_PRECISION (unprom0
.type
) * 2 > TYPE_PRECISION (type
))
2524 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt
);
2526 if (!vect_supportable_direct_optab_p (vinfo
, type
, WIDEN_SUM_EXPR
,
2527 unprom0
.type
, type_out
))
2530 var
= vect_recog_temp_ssa_var (type
, NULL
);
2531 pattern_stmt
= gimple_build_assign (var
, WIDEN_SUM_EXPR
, unprom0
.op
, oprnd1
);
2533 return pattern_stmt
;
2536 /* Function vect_recog_bitfield_ref_pattern
2538 Try to find the following pattern:
2540 bf_value = BIT_FIELD_REF (container, bitsize, bitpos);
2541 result = (type_out) bf_value;
2545 if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
2547 where type_out is a non-bitfield type, that is to say, it's precision matches
2548 2^(TYPE_SIZE(type_out) - (TYPE_UNSIGNED (type_out) ? 1 : 2)).
2552 * STMT_VINFO: The stmt from which the pattern search begins.
2553 here it starts with:
2554 result = (type_out) bf_value;
2558 if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
2562 * TYPE_OUT: The vector type of the output of this pattern.
2564 * Return value: A new stmt that will be used to replace the sequence of
2565 stmts that constitute the pattern. If the precision of type_out is bigger
2566 than the precision type of _1 we perform the widening before the shifting,
2567 since the new precision will be large enough to shift the value and moving
2568 widening operations up the statement chain enables the generation of
2569 widening loads. If we are widening and the operation after the pattern is
2570 an addition then we mask first and shift later, to enable the generation of
2571 shifting adds. In the case of narrowing we will always mask first, shift
2572 last and then perform a narrowing operation. This will enable the
2573 generation of narrowing shifts.
2575 Widening with mask first, shift later:
2576 container = (type_out) container;
2577 masked = container & (((1 << bitsize) - 1) << bitpos);
2578 result = masked >> bitpos;
2580 Widening with shift first, mask last:
2581 container = (type_out) container;
2582 shifted = container >> bitpos;
2583 result = shifted & ((1 << bitsize) - 1);
2586 masked = container & (((1 << bitsize) - 1) << bitpos);
2587 result = masked >> bitpos;
2588 result = (type_out) result;
2590 If the bitfield is signed and it's wider than type_out, we need to
2591 keep the result sign-extended:
2592 container = (type) container;
2593 masked = container << (prec - bitsize - bitpos);
2594 result = (type_out) (masked >> (prec - bitsize));
2596 Here type is the signed variant of the wider of type_out and the type
2599 The shifting is always optional depending on whether bitpos != 0.
2601 When the original bitfield was inside a gcond then an new gcond is also
2602 generated with the newly `result` as the operand to the comparison.
2607 vect_recog_bitfield_ref_pattern (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2610 gimple
*bf_stmt
= NULL
;
2611 tree lhs
= NULL_TREE
;
2612 tree ret_type
= NULL_TREE
;
2613 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
2614 if (gcond
*cond_stmt
= dyn_cast
<gcond
*> (stmt
))
2616 tree op
= gimple_cond_lhs (cond_stmt
);
2617 if (TREE_CODE (op
) != SSA_NAME
)
2619 bf_stmt
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (op
));
2620 if (TREE_CODE (gimple_cond_rhs (cond_stmt
)) != INTEGER_CST
)
2623 else if (is_gimple_assign (stmt
)
2624 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt
))
2625 && TREE_CODE (gimple_assign_rhs1 (stmt
)) == SSA_NAME
)
2627 gimple
*second_stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
2628 bf_stmt
= dyn_cast
<gassign
*> (second_stmt
);
2629 lhs
= gimple_assign_lhs (stmt
);
2630 ret_type
= TREE_TYPE (lhs
);
2634 || gimple_assign_rhs_code (bf_stmt
) != BIT_FIELD_REF
)
2637 tree bf_ref
= gimple_assign_rhs1 (bf_stmt
);
2638 tree container
= TREE_OPERAND (bf_ref
, 0);
2639 ret_type
= ret_type
? ret_type
: TREE_TYPE (container
);
2641 if (!bit_field_offset (bf_ref
).is_constant ()
2642 || !bit_field_size (bf_ref
).is_constant ()
2643 || !tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (container
))))
2646 if (!INTEGRAL_TYPE_P (TREE_TYPE (bf_ref
))
2647 || !INTEGRAL_TYPE_P (TREE_TYPE (container
))
2648 || TYPE_MODE (TREE_TYPE (container
)) == E_BLKmode
)
2651 gimple
*use_stmt
, *pattern_stmt
;
2652 use_operand_p use_p
;
2653 bool shift_first
= true;
2654 tree container_type
= TREE_TYPE (container
);
2655 tree vectype
= get_vectype_for_scalar_type (vinfo
, container_type
);
2657 /* Calculate shift_n before the adjustments for widening loads, otherwise
2658 the container may change and we have to consider offset change for
2659 widening loads on big endianness. The shift_n calculated here can be
2660 independent of widening. */
2661 unsigned HOST_WIDE_INT shift_n
= bit_field_offset (bf_ref
).to_constant ();
2662 unsigned HOST_WIDE_INT mask_width
= bit_field_size (bf_ref
).to_constant ();
2663 unsigned HOST_WIDE_INT prec
= tree_to_uhwi (TYPE_SIZE (container_type
));
2664 if (BYTES_BIG_ENDIAN
)
2665 shift_n
= prec
- shift_n
- mask_width
;
2667 bool ref_sext
= (!TYPE_UNSIGNED (TREE_TYPE (bf_ref
)) &&
2668 TYPE_PRECISION (ret_type
) > mask_width
);
2669 bool load_widen
= (TYPE_PRECISION (TREE_TYPE (container
)) <
2670 TYPE_PRECISION (ret_type
));
2672 /* We move the conversion earlier if the loaded type is smaller than the
2673 return type to enable the use of widening loads. And if we need a
2674 sign extension, we need to convert the loaded value early to a signed
2676 if (ref_sext
|| load_widen
)
2678 tree type
= load_widen
? ret_type
: container_type
;
2680 type
= gimple_signed_type (type
);
2681 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
),
2682 NOP_EXPR
, container
);
2683 container
= gimple_get_lhs (pattern_stmt
);
2684 container_type
= TREE_TYPE (container
);
2685 prec
= tree_to_uhwi (TYPE_SIZE (container_type
));
2686 vectype
= get_vectype_for_scalar_type (vinfo
, container_type
);
2687 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
, vectype
);
2689 else if (!useless_type_conversion_p (TREE_TYPE (container
), ret_type
))
2690 /* If we are doing the conversion last then also delay the shift as we may
2691 be able to combine the shift and conversion in certain cases. */
2692 shift_first
= false;
2694 /* If the only use of the result of this BIT_FIELD_REF + CONVERT is a
2695 PLUS_EXPR then do the shift last as some targets can combine the shift and
2696 add into a single instruction. */
2697 if (lhs
&& single_imm_use (lhs
, &use_p
, &use_stmt
))
2699 if (gimple_code (use_stmt
) == GIMPLE_ASSIGN
2700 && gimple_assign_rhs_code (use_stmt
) == PLUS_EXPR
)
2701 shift_first
= false;
2704 /* If we don't have to shift we only generate the mask, so just fix the
2705 code-path to shift_first. */
2710 if (shift_first
&& !ref_sext
)
2712 tree shifted
= container
;
2716 = gimple_build_assign (vect_recog_temp_ssa_var (container_type
),
2717 RSHIFT_EXPR
, container
,
2718 build_int_cst (sizetype
, shift_n
));
2719 shifted
= gimple_assign_lhs (pattern_stmt
);
2720 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
, vectype
);
2723 tree mask
= wide_int_to_tree (container_type
,
2724 wi::mask (mask_width
, false, prec
));
2727 = gimple_build_assign (vect_recog_temp_ssa_var (container_type
),
2728 BIT_AND_EXPR
, shifted
, mask
);
2729 result
= gimple_assign_lhs (pattern_stmt
);
2733 tree temp
= vect_recog_temp_ssa_var (container_type
);
2736 tree mask
= wide_int_to_tree (container_type
,
2737 wi::shifted_mask (shift_n
,
2740 pattern_stmt
= gimple_build_assign (temp
, BIT_AND_EXPR
,
2745 HOST_WIDE_INT shl
= prec
- shift_n
- mask_width
;
2747 pattern_stmt
= gimple_build_assign (temp
, LSHIFT_EXPR
,
2749 build_int_cst (sizetype
,
2753 tree masked
= gimple_assign_lhs (pattern_stmt
);
2754 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
, vectype
);
2756 = gimple_build_assign (vect_recog_temp_ssa_var (container_type
),
2757 RSHIFT_EXPR
, masked
,
2758 build_int_cst (sizetype
, shift_n
));
2759 result
= gimple_assign_lhs (pattern_stmt
);
2762 if (!useless_type_conversion_p (TREE_TYPE (result
), ret_type
))
2764 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
, vectype
);
2766 = gimple_build_assign (vect_recog_temp_ssa_var (ret_type
),
2772 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
, vectype
);
2773 gcond
*cond_stmt
= dyn_cast
<gcond
*> (stmt_info
->stmt
);
2774 tree cond_cst
= gimple_cond_rhs (cond_stmt
);
2776 = gimple_build_cond (gimple_cond_code (cond_stmt
),
2777 gimple_get_lhs (pattern_stmt
),
2778 fold_convert (ret_type
, cond_cst
),
2779 gimple_cond_true_label (cond_stmt
),
2780 gimple_cond_false_label (cond_stmt
));
2783 *type_out
= STMT_VINFO_VECTYPE (stmt_info
);
2784 vect_pattern_detected ("bitfield_ref pattern", stmt_info
->stmt
);
2786 return pattern_stmt
;
2789 /* Function vect_recog_bit_insert_pattern
2791 Try to find the following pattern:
2793 written = BIT_INSERT_EXPR (container, value, bitpos);
2797 * STMT_VINFO: The stmt we want to replace.
2801 * TYPE_OUT: The vector type of the output of this pattern.
2803 * Return value: A new stmt that will be used to replace the sequence of
2804 stmts that constitute the pattern. In this case it will be:
2805 value = (container_type) value; // Make sure
2806 shifted = value << bitpos; // Shift value into place
2807 masked = shifted & (mask << bitpos); // Mask off the non-relevant bits in
2808 // the 'to-write value'.
2809 cleared = container & ~(mask << bitpos); // Clearing the bits we want to
2810 // write to from the value we want
2812 written = cleared | masked; // Write bits.
2815 where mask = ((1 << TYPE_PRECISION (value)) - 1), a mask to keep the number of
2816 bits corresponding to the real size of the bitfield value we are writing to.
2817 The shifting is always optional depending on whether bitpos != 0.
2822 vect_recog_bit_insert_pattern (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2825 gassign
*bf_stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
2826 if (!bf_stmt
|| gimple_assign_rhs_code (bf_stmt
) != BIT_INSERT_EXPR
)
2829 tree container
= gimple_assign_rhs1 (bf_stmt
);
2830 tree value
= gimple_assign_rhs2 (bf_stmt
);
2831 tree shift
= gimple_assign_rhs3 (bf_stmt
);
2833 tree bf_type
= TREE_TYPE (value
);
2834 tree container_type
= TREE_TYPE (container
);
2836 if (!INTEGRAL_TYPE_P (container_type
)
2837 || !tree_fits_uhwi_p (TYPE_SIZE (container_type
)))
2840 gimple
*pattern_stmt
;
2842 vect_unpromoted_value unprom
;
2843 unprom
.set_op (value
, vect_internal_def
);
2844 value
= vect_convert_input (vinfo
, stmt_info
, container_type
, &unprom
,
2845 get_vectype_for_scalar_type (vinfo
,
2848 unsigned HOST_WIDE_INT mask_width
= TYPE_PRECISION (bf_type
);
2849 unsigned HOST_WIDE_INT prec
= tree_to_uhwi (TYPE_SIZE (container_type
));
2850 unsigned HOST_WIDE_INT shift_n
= tree_to_uhwi (shift
);
2851 if (BYTES_BIG_ENDIAN
)
2853 shift_n
= prec
- shift_n
- mask_width
;
2854 shift
= build_int_cst (TREE_TYPE (shift
), shift_n
);
2857 if (!useless_type_conversion_p (TREE_TYPE (value
), container_type
))
2860 gimple_build_assign (vect_recog_temp_ssa_var (container_type
),
2862 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
);
2863 value
= gimple_get_lhs (pattern_stmt
);
2866 /* Shift VALUE into place. */
2867 tree shifted
= value
;
2870 gimple_seq stmts
= NULL
;
2872 = gimple_build (&stmts
, LSHIFT_EXPR
, container_type
, value
, shift
);
2873 if (!gimple_seq_empty_p (stmts
))
2874 append_pattern_def_seq (vinfo
, stmt_info
,
2875 gimple_seq_first_stmt (stmts
));
2879 = wide_int_to_tree (container_type
,
2880 wi::shifted_mask (shift_n
, mask_width
, false, prec
));
2882 /* Clear bits we don't want to write back from SHIFTED. */
2883 gimple_seq stmts
= NULL
;
2884 tree masked
= gimple_build (&stmts
, BIT_AND_EXPR
, container_type
, shifted
,
2886 if (!gimple_seq_empty_p (stmts
))
2888 pattern_stmt
= gimple_seq_first_stmt (stmts
);
2889 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
);
2892 /* Mask off the bits in the container that we are to write to. */
2893 mask_t
= wide_int_to_tree (container_type
,
2894 wi::shifted_mask (shift_n
, mask_width
, true, prec
));
2895 tree cleared
= vect_recog_temp_ssa_var (container_type
);
2896 pattern_stmt
= gimple_build_assign (cleared
, BIT_AND_EXPR
, container
, mask_t
);
2897 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
);
2899 /* Write MASKED into CLEARED. */
2901 = gimple_build_assign (vect_recog_temp_ssa_var (container_type
),
2902 BIT_IOR_EXPR
, cleared
, masked
);
2904 *type_out
= STMT_VINFO_VECTYPE (stmt_info
);
2905 vect_pattern_detected ("bit_insert pattern", stmt_info
->stmt
);
2907 return pattern_stmt
;
2911 /* Recognize cases in which an operation is performed in one type WTYPE
2912 but could be done more efficiently in a narrower type NTYPE. For example,
2915 ATYPE a; // narrower than NTYPE
2916 BTYPE b; // narrower than NTYPE
2917 WTYPE aw = (WTYPE) a;
2918 WTYPE bw = (WTYPE) b;
2919 WTYPE res = aw + bw; // only uses of aw and bw
2921 then it would be more efficient to do:
2923 NTYPE an = (NTYPE) a;
2924 NTYPE bn = (NTYPE) b;
2925 NTYPE resn = an + bn;
2926 WTYPE res = (WTYPE) resn;
2928 Other situations include things like:
2930 ATYPE a; // NTYPE or narrower
2931 WTYPE aw = (WTYPE) a;
2934 when only "(NTYPE) res" is significant. In that case it's more efficient
2935 to truncate "b" and do the operation on NTYPE instead:
2937 NTYPE an = (NTYPE) a;
2938 NTYPE bn = (NTYPE) b; // truncation
2939 NTYPE resn = an + bn;
2940 WTYPE res = (WTYPE) resn;
2942 All users of "res" should then use "resn" instead, making the final
2943 statement dead (not marked as relevant). The final statement is still
2944 needed to maintain the type correctness of the IR.
2946 vect_determine_precisions has already determined the minimum
2947 precison of the operation and the minimum precision required
2948 by users of the result. */
2951 vect_recog_over_widening_pattern (vec_info
*vinfo
,
2952 stmt_vec_info last_stmt_info
, tree
*type_out
)
2954 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
2958 /* See whether we have found that this operation can be done on a
2959 narrower type without changing its semantics. */
2960 unsigned int new_precision
= last_stmt_info
->operation_precision
;
2964 tree lhs
= gimple_assign_lhs (last_stmt
);
2965 tree type
= TREE_TYPE (lhs
);
2966 tree_code code
= gimple_assign_rhs_code (last_stmt
);
2968 /* Punt for reductions where we don't handle the type conversions. */
2969 if (STMT_VINFO_DEF_TYPE (last_stmt_info
) == vect_reduction_def
)
2972 /* Keep the first operand of a COND_EXPR as-is: only the other two
2973 operands are interesting. */
2974 unsigned int first_op
= (code
== COND_EXPR
? 2 : 1);
2976 /* Check the operands. */
2977 unsigned int nops
= gimple_num_ops (last_stmt
) - first_op
;
2978 auto_vec
<vect_unpromoted_value
, 3> unprom (nops
);
2979 unprom
.quick_grow_cleared (nops
);
2980 unsigned int min_precision
= 0;
2981 bool single_use_p
= false;
2982 for (unsigned int i
= 0; i
< nops
; ++i
)
2984 tree op
= gimple_op (last_stmt
, first_op
+ i
);
2985 if (TREE_CODE (op
) == INTEGER_CST
)
2986 unprom
[i
].set_op (op
, vect_constant_def
);
2987 else if (TREE_CODE (op
) == SSA_NAME
)
2989 bool op_single_use_p
= true;
2990 if (!vect_look_through_possible_promotion (vinfo
, op
, &unprom
[i
],
2995 (1) N bits of the result are needed;
2996 (2) all inputs are widened from M<N bits; and
2997 (3) one operand OP is a single-use SSA name
2999 we can shift the M->N widening from OP to the output
3000 without changing the number or type of extensions involved.
3001 This then reduces the number of copies of STMT_INFO.
3003 If instead of (3) more than one operand is a single-use SSA name,
3004 shifting the extension to the output is even more of a win.
3008 (1) N bits of the result are needed;
3009 (2) one operand OP2 is widened from M2<N bits;
3010 (3) another operand OP1 is widened from M1<M2 bits; and
3011 (4) both OP1 and OP2 are single-use
3013 the choice is between:
3015 (a) truncating OP2 to M1, doing the operation on M1,
3016 and then widening the result to N
3018 (b) widening OP1 to M2, doing the operation on M2, and then
3019 widening the result to N
3021 Both shift the M2->N widening of the inputs to the output.
3022 (a) additionally shifts the M1->M2 widening to the output;
3023 it requires fewer copies of STMT_INFO but requires an extra
3026 Which is better will depend on the complexity and cost of
3027 STMT_INFO, which is hard to predict at this stage. However,
3028 a clear tie-breaker in favor of (b) is the fact that the
3029 truncation in (a) increases the length of the operation chain.
3031 If instead of (4) only one of OP1 or OP2 is single-use,
3032 (b) is still a win over doing the operation in N bits:
3033 it still shifts the M2->N widening on the single-use operand
3034 to the output and reduces the number of STMT_INFO copies.
3036 If neither operand is single-use then operating on fewer than
3037 N bits might lead to more extensions overall. Whether it does
3038 or not depends on global information about the vectorization
3039 region, and whether that's a good trade-off would again
3040 depend on the complexity and cost of the statements involved,
3041 as well as things like register pressure that are not normally
3042 modelled at this stage. We therefore ignore these cases
3043 and just optimize the clear single-use wins above.
3045 Thus we take the maximum precision of the unpromoted operands
3046 and record whether any operand is single-use. */
3047 if (unprom
[i
].dt
== vect_internal_def
)
3049 min_precision
= MAX (min_precision
,
3050 TYPE_PRECISION (unprom
[i
].type
));
3051 single_use_p
|= op_single_use_p
;
3058 /* Although the operation could be done in operation_precision, we have
3059 to balance that against introducing extra truncations or extensions.
3060 Calculate the minimum precision that can be handled efficiently.
3062 The loop above determined that the operation could be handled
3063 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
3064 extension from the inputs to the output without introducing more
3065 instructions, and would reduce the number of instructions required
3066 for STMT_INFO itself.
3068 vect_determine_precisions has also determined that the result only
3069 needs min_output_precision bits. Truncating by a factor of N times
3070 requires a tree of N - 1 instructions, so if TYPE is N times wider
3071 than min_output_precision, doing the operation in TYPE and truncating
3072 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
3075 - truncating the input to a unary operation and doing the operation
3076 in the new type requires at most N - 1 + 1 = N instructions per
3079 - doing the same for a binary operation requires at most
3080 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
3082 Both unary and binary operations require fewer instructions than
3083 this if the operands were extended from a suitable truncated form.
3084 Thus there is usually nothing to lose by doing operations in
3085 min_output_precision bits, but there can be something to gain. */
3087 min_precision
= last_stmt_info
->min_output_precision
;
3089 min_precision
= MIN (min_precision
, last_stmt_info
->min_output_precision
);
3091 /* Apply the minimum efficient precision we just calculated. */
3092 if (new_precision
< min_precision
)
3093 new_precision
= min_precision
;
3094 new_precision
= vect_element_precision (new_precision
);
3095 if (new_precision
>= TYPE_PRECISION (type
))
3098 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt
);
3100 *type_out
= get_vectype_for_scalar_type (vinfo
, type
);
3104 /* We've found a viable pattern. Get the new type of the operation. */
3105 bool unsigned_p
= (last_stmt_info
->operation_sign
== UNSIGNED
);
3106 tree new_type
= build_nonstandard_integer_type (new_precision
, unsigned_p
);
3108 /* If we're truncating an operation, we need to make sure that we
3109 don't introduce new undefined overflow. The codes tested here are
3110 a subset of those accepted by vect_truncatable_operation_p. */
3111 tree op_type
= new_type
;
3112 if (TYPE_OVERFLOW_UNDEFINED (new_type
)
3113 && (code
== PLUS_EXPR
|| code
== MINUS_EXPR
|| code
== MULT_EXPR
))
3114 op_type
= build_nonstandard_integer_type (new_precision
, true);
3116 /* We specifically don't check here whether the target supports the
3117 new operation, since it might be something that a later pattern
3118 wants to rewrite anyway. If targets have a minimum element size
3119 for some optabs, we should pattern-match smaller ops to larger ops
3120 where beneficial. */
3121 tree new_vectype
= get_vectype_for_scalar_type (vinfo
, new_type
);
3122 tree op_vectype
= get_vectype_for_scalar_type (vinfo
, op_type
);
3123 if (!new_vectype
|| !op_vectype
)
3126 if (dump_enabled_p ())
3127 dump_printf_loc (MSG_NOTE
, vect_location
, "demoting %T to %T\n",
3130 /* Calculate the rhs operands for an operation on OP_TYPE. */
3132 for (unsigned int i
= 1; i
< first_op
; ++i
)
3133 ops
[i
- 1] = gimple_op (last_stmt
, i
);
3134 /* For right shifts limit the shift operand. */
3135 vect_convert_inputs (vinfo
, last_stmt_info
, nops
, &ops
[first_op
- 1],
3136 op_type
, &unprom
[0], op_vectype
);
3138 /* Limit shift operands. */
3139 if (code
== RSHIFT_EXPR
)
3141 wide_int min_value
, max_value
;
3142 if (TREE_CODE (ops
[1]) == INTEGER_CST
)
3143 ops
[1] = wide_int_to_tree (op_type
,
3144 wi::umin (wi::to_wide (ops
[1]),
3145 new_precision
- 1));
3146 else if (!vect_get_range_info (ops
[1], &min_value
, &max_value
)
3147 || wi::ge_p (max_value
, new_precision
, TYPE_SIGN (op_type
)))
3149 /* ??? Note the following bad for SLP as that only supports
3150 same argument widened shifts and it un-CSEs same arguments. */
3151 tree new_var
= vect_recog_temp_ssa_var (op_type
, NULL
);
3152 gimple
*pattern_stmt
3153 = gimple_build_assign (new_var
, MIN_EXPR
, ops
[1],
3154 build_int_cst (op_type
, new_precision
- 1));
3155 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
3156 if (ops
[1] == unprom
[1].op
&& unprom
[1].dt
== vect_external_def
)
3158 if (edge e
= vect_get_external_def_edge (vinfo
, ops
[1]))
3161 = gsi_insert_on_edge_immediate (e
, pattern_stmt
);
3162 gcc_assert (!new_bb
);
3168 append_pattern_def_seq (vinfo
, last_stmt_info
, pattern_stmt
,
3174 /* Use the operation to produce a result of type OP_TYPE. */
3175 tree new_var
= vect_recog_temp_ssa_var (op_type
, NULL
);
3176 gimple
*pattern_stmt
= gimple_build_assign (new_var
, code
,
3177 ops
[0], ops
[1], ops
[2]);
3178 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
3180 if (dump_enabled_p ())
3181 dump_printf_loc (MSG_NOTE
, vect_location
,
3182 "created pattern stmt: %G", pattern_stmt
);
3184 /* Convert back to the original signedness, if OP_TYPE is different
3186 if (op_type
!= new_type
)
3187 pattern_stmt
= vect_convert_output (vinfo
, last_stmt_info
, new_type
,
3188 pattern_stmt
, op_vectype
);
3190 /* Promote the result to the original type. */
3191 pattern_stmt
= vect_convert_output (vinfo
, last_stmt_info
, type
,
3192 pattern_stmt
, new_vectype
);
3194 return pattern_stmt
;
3197 /* Recognize the following patterns:
3199 ATYPE a; // narrower than TYPE
3200 BTYPE b; // narrower than TYPE
3202 1) Multiply high with scaling
3203 TYPE res = ((TYPE) a * (TYPE) b) >> c;
3204 Here, c is bitsize (TYPE) / 2 - 1.
3206 2) ... or also with rounding
3207 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
3208 Here, d is bitsize (TYPE) / 2 - 2.
3210 3) Normal multiply high
3211 TYPE res = ((TYPE) a * (TYPE) b) >> e;
3212 Here, e is bitsize (TYPE) / 2.
3214 where only the bottom half of res is used. */
3217 vect_recog_mulhs_pattern (vec_info
*vinfo
,
3218 stmt_vec_info last_stmt_info
, tree
*type_out
)
3220 /* Check for a right shift. */
3221 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
3223 || gimple_assign_rhs_code (last_stmt
) != RSHIFT_EXPR
)
3226 /* Check that the shift result is wider than the users of the
3227 result need (i.e. that narrowing would be a natural choice). */
3228 tree lhs_type
= TREE_TYPE (gimple_assign_lhs (last_stmt
));
3229 unsigned int target_precision
3230 = vect_element_precision (last_stmt_info
->min_output_precision
);
3231 if (!INTEGRAL_TYPE_P (lhs_type
)
3232 || target_precision
>= TYPE_PRECISION (lhs_type
))
3235 /* Look through any change in sign on the outer shift input. */
3236 vect_unpromoted_value unprom_rshift_input
;
3237 tree rshift_input
= vect_look_through_possible_promotion
3238 (vinfo
, gimple_assign_rhs1 (last_stmt
), &unprom_rshift_input
);
3240 || TYPE_PRECISION (TREE_TYPE (rshift_input
))
3241 != TYPE_PRECISION (lhs_type
))
3244 /* Get the definition of the shift input. */
3245 stmt_vec_info rshift_input_stmt_info
3246 = vect_get_internal_def (vinfo
, rshift_input
);
3247 if (!rshift_input_stmt_info
)
3249 gassign
*rshift_input_stmt
3250 = dyn_cast
<gassign
*> (rshift_input_stmt_info
->stmt
);
3251 if (!rshift_input_stmt
)
3254 stmt_vec_info mulh_stmt_info
;
3256 bool rounding_p
= false;
3258 /* Check for the presence of the rounding term. */
3259 if (gimple_assign_rhs_code (rshift_input_stmt
) == PLUS_EXPR
)
3261 /* Check that the outer shift was by 1. */
3262 if (!integer_onep (gimple_assign_rhs2 (last_stmt
)))
3265 /* Check that the second operand of the PLUS_EXPR is 1. */
3266 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt
)))
3269 /* Look through any change in sign on the addition input. */
3270 vect_unpromoted_value unprom_plus_input
;
3271 tree plus_input
= vect_look_through_possible_promotion
3272 (vinfo
, gimple_assign_rhs1 (rshift_input_stmt
), &unprom_plus_input
);
3274 || TYPE_PRECISION (TREE_TYPE (plus_input
))
3275 != TYPE_PRECISION (TREE_TYPE (rshift_input
)))
3278 /* Get the definition of the multiply-high-scale part. */
3279 stmt_vec_info plus_input_stmt_info
3280 = vect_get_internal_def (vinfo
, plus_input
);
3281 if (!plus_input_stmt_info
)
3283 gassign
*plus_input_stmt
3284 = dyn_cast
<gassign
*> (plus_input_stmt_info
->stmt
);
3285 if (!plus_input_stmt
3286 || gimple_assign_rhs_code (plus_input_stmt
) != RSHIFT_EXPR
)
3289 /* Look through any change in sign on the scaling input. */
3290 vect_unpromoted_value unprom_scale_input
;
3291 tree scale_input
= vect_look_through_possible_promotion
3292 (vinfo
, gimple_assign_rhs1 (plus_input_stmt
), &unprom_scale_input
);
3294 || TYPE_PRECISION (TREE_TYPE (scale_input
))
3295 != TYPE_PRECISION (TREE_TYPE (plus_input
)))
3298 /* Get the definition of the multiply-high part. */
3299 mulh_stmt_info
= vect_get_internal_def (vinfo
, scale_input
);
3300 if (!mulh_stmt_info
)
3303 /* Get the scaling term. */
3304 scale_term
= gimple_assign_rhs2 (plus_input_stmt
);
3309 mulh_stmt_info
= rshift_input_stmt_info
;
3310 scale_term
= gimple_assign_rhs2 (last_stmt
);
3313 /* Check that the scaling factor is constant. */
3314 if (TREE_CODE (scale_term
) != INTEGER_CST
)
3317 /* Check whether the scaling input term can be seen as two widened
3318 inputs multiplied together. */
3319 vect_unpromoted_value unprom_mult
[2];
3322 = vect_widened_op_tree (vinfo
, mulh_stmt_info
, MULT_EXPR
, WIDEN_MULT_EXPR
,
3323 false, 2, unprom_mult
, &new_type
);
3327 /* Adjust output precision. */
3328 if (TYPE_PRECISION (new_type
) < target_precision
)
3329 new_type
= build_nonstandard_integer_type
3330 (target_precision
, TYPE_UNSIGNED (new_type
));
3332 unsigned mult_precision
= TYPE_PRECISION (new_type
);
3334 /* Check that the scaling factor is expected. Instead of
3335 target_precision, we should use the one that we actually
3336 use for internal function. */
3339 /* Check pattern 2). */
3340 if (wi::to_widest (scale_term
) + mult_precision
+ 2
3341 != TYPE_PRECISION (lhs_type
))
3348 /* Check for pattern 1). */
3349 if (wi::to_widest (scale_term
) + mult_precision
+ 1
3350 == TYPE_PRECISION (lhs_type
))
3352 /* Check for pattern 3). */
3353 else if (wi::to_widest (scale_term
) + mult_precision
3354 == TYPE_PRECISION (lhs_type
))
3360 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt
);
3362 /* Check for target support. */
3363 tree new_vectype
= get_vectype_for_scalar_type (vinfo
, new_type
);
3365 || !direct_internal_fn_supported_p
3366 (ifn
, new_vectype
, OPTIMIZE_FOR_SPEED
))
3369 /* The IR requires a valid vector type for the cast result, even though
3370 it's likely to be discarded. */
3371 *type_out
= get_vectype_for_scalar_type (vinfo
, lhs_type
);
3375 /* Generate the IFN_MULHRS call. */
3376 tree new_var
= vect_recog_temp_ssa_var (new_type
, NULL
);
3378 vect_convert_inputs (vinfo
, last_stmt_info
, 2, new_ops
, new_type
,
3379 unprom_mult
, new_vectype
);
3381 = gimple_build_call_internal (ifn
, 2, new_ops
[0], new_ops
[1]);
3382 gimple_call_set_lhs (mulhrs_stmt
, new_var
);
3383 gimple_set_location (mulhrs_stmt
, gimple_location (last_stmt
));
3385 if (dump_enabled_p ())
3386 dump_printf_loc (MSG_NOTE
, vect_location
,
3387 "created pattern stmt: %G", (gimple
*) mulhrs_stmt
);
3389 return vect_convert_output (vinfo
, last_stmt_info
, lhs_type
,
3390 mulhrs_stmt
, new_vectype
);
3393 /* Recognize the patterns:
3395 ATYPE a; // narrower than TYPE
3396 BTYPE b; // narrower than TYPE
3397 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
3398 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
3400 where only the bottom half of avg is used. Try to transform them into:
3402 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
3403 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
3407 TYPE avg = (TYPE) avg';
3409 where NTYPE is no wider than half of TYPE. Since only the bottom half
3410 of avg is used, all or part of the cast of avg' should become redundant.
3412 If there is no target support available, generate code to distribute rshift
3413 over plus and add a carry. */
3416 vect_recog_average_pattern (vec_info
*vinfo
,
3417 stmt_vec_info last_stmt_info
, tree
*type_out
)
3419 /* Check for a shift right by one bit. */
3420 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
3422 || gimple_assign_rhs_code (last_stmt
) != RSHIFT_EXPR
3423 || !integer_onep (gimple_assign_rhs2 (last_stmt
)))
3426 /* Check that the shift result is wider than the users of the
3427 result need (i.e. that narrowing would be a natural choice). */
3428 tree lhs
= gimple_assign_lhs (last_stmt
);
3429 tree type
= TREE_TYPE (lhs
);
3430 unsigned int target_precision
3431 = vect_element_precision (last_stmt_info
->min_output_precision
);
3432 if (!INTEGRAL_TYPE_P (type
) || target_precision
>= TYPE_PRECISION (type
))
3435 /* Look through any change in sign on the shift input. */
3436 tree rshift_rhs
= gimple_assign_rhs1 (last_stmt
);
3437 vect_unpromoted_value unprom_plus
;
3438 rshift_rhs
= vect_look_through_possible_promotion (vinfo
, rshift_rhs
,
3441 || TYPE_PRECISION (TREE_TYPE (rshift_rhs
)) != TYPE_PRECISION (type
))
3444 /* Get the definition of the shift input. */
3445 stmt_vec_info plus_stmt_info
= vect_get_internal_def (vinfo
, rshift_rhs
);
3446 if (!plus_stmt_info
)
3449 /* Check whether the shift input can be seen as a tree of additions on
3450 2 or 3 widened inputs.
3452 Note that the pattern should be a win even if the result of one or
3453 more additions is reused elsewhere: if the pattern matches, we'd be
3454 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
3455 internal_fn ifn
= IFN_AVG_FLOOR
;
3456 vect_unpromoted_value unprom
[3];
3458 unsigned int nops
= vect_widened_op_tree (vinfo
, plus_stmt_info
, PLUS_EXPR
,
3459 IFN_VEC_WIDEN_PLUS
, false, 3,
3465 /* Check that one operand is 1. */
3467 for (i
= 0; i
< 3; ++i
)
3468 if (integer_onep (unprom
[i
].op
))
3472 /* Throw away the 1 operand and keep the other two. */
3474 unprom
[i
] = unprom
[2];
3478 vect_pattern_detected ("vect_recog_average_pattern", last_stmt
);
3482 (a) the operation can be viewed as:
3484 TYPE widened0 = (TYPE) UNPROM[0];
3485 TYPE widened1 = (TYPE) UNPROM[1];
3486 TYPE tmp1 = widened0 + widened1 {+ 1};
3487 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
3489 (b) the first two statements are equivalent to:
3491 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
3492 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
3494 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
3497 (d) all the operations can be performed correctly at twice the width of
3498 NEW_TYPE, due to the nature of the average operation; and
3500 (e) users of the result of the right shift need only TARGET_PRECISION
3501 bits, where TARGET_PRECISION is no more than half of TYPE's
3504 Under these circumstances, the only situation in which NEW_TYPE
3505 could be narrower than TARGET_PRECISION is if widened0, widened1
3506 and an addition result are all used more than once. Thus we can
3507 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
3508 as "free", whereas widening the result of the average instruction
3509 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
3510 therefore better not to go narrower than TARGET_PRECISION. */
3511 if (TYPE_PRECISION (new_type
) < target_precision
)
3512 new_type
= build_nonstandard_integer_type (target_precision
,
3513 TYPE_UNSIGNED (new_type
));
3515 /* Check for target support. */
3516 tree new_vectype
= get_vectype_for_scalar_type (vinfo
, new_type
);
3520 bool fallback_p
= false;
3522 if (direct_internal_fn_supported_p (ifn
, new_vectype
, OPTIMIZE_FOR_SPEED
))
3524 else if (TYPE_UNSIGNED (new_type
)
3525 && optab_for_tree_code (RSHIFT_EXPR
, new_vectype
, optab_scalar
)
3526 && optab_for_tree_code (PLUS_EXPR
, new_vectype
, optab_default
)
3527 && optab_for_tree_code (BIT_IOR_EXPR
, new_vectype
, optab_default
)
3528 && optab_for_tree_code (BIT_AND_EXPR
, new_vectype
, optab_default
))
3533 /* The IR requires a valid vector type for the cast result, even though
3534 it's likely to be discarded. */
3535 *type_out
= get_vectype_for_scalar_type (vinfo
, type
);
3539 tree new_var
= vect_recog_temp_ssa_var (new_type
, NULL
);
3541 vect_convert_inputs (vinfo
, last_stmt_info
, 2, new_ops
, new_type
,
3542 unprom
, new_vectype
);
3546 /* As a fallback, generate code for following sequence:
3548 shifted_op0 = new_ops[0] >> 1;
3549 shifted_op1 = new_ops[1] >> 1;
3550 sum_of_shifted = shifted_op0 + shifted_op1;
3551 unmasked_carry = new_ops[0] and/or new_ops[1];
3552 carry = unmasked_carry & 1;
3553 new_var = sum_of_shifted + carry;
3556 tree one_cst
= build_one_cst (new_type
);
3559 tree shifted_op0
= vect_recog_temp_ssa_var (new_type
, NULL
);
3560 g
= gimple_build_assign (shifted_op0
, RSHIFT_EXPR
, new_ops
[0], one_cst
);
3561 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
3563 tree shifted_op1
= vect_recog_temp_ssa_var (new_type
, NULL
);
3564 g
= gimple_build_assign (shifted_op1
, RSHIFT_EXPR
, new_ops
[1], one_cst
);
3565 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
3567 tree sum_of_shifted
= vect_recog_temp_ssa_var (new_type
, NULL
);
3568 g
= gimple_build_assign (sum_of_shifted
, PLUS_EXPR
,
3569 shifted_op0
, shifted_op1
);
3570 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
3572 tree unmasked_carry
= vect_recog_temp_ssa_var (new_type
, NULL
);
3573 tree_code c
= (ifn
== IFN_AVG_CEIL
) ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
3574 g
= gimple_build_assign (unmasked_carry
, c
, new_ops
[0], new_ops
[1]);
3575 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
3577 tree carry
= vect_recog_temp_ssa_var (new_type
, NULL
);
3578 g
= gimple_build_assign (carry
, BIT_AND_EXPR
, unmasked_carry
, one_cst
);
3579 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
3581 g
= gimple_build_assign (new_var
, PLUS_EXPR
, sum_of_shifted
, carry
);
3582 return vect_convert_output (vinfo
, last_stmt_info
, type
, g
, new_vectype
);
3585 /* Generate the IFN_AVG* call. */
3586 gcall
*average_stmt
= gimple_build_call_internal (ifn
, 2, new_ops
[0],
3588 gimple_call_set_lhs (average_stmt
, new_var
);
3589 gimple_set_location (average_stmt
, gimple_location (last_stmt
));
3591 if (dump_enabled_p ())
3592 dump_printf_loc (MSG_NOTE
, vect_location
,
3593 "created pattern stmt: %G", (gimple
*) average_stmt
);
3595 return vect_convert_output (vinfo
, last_stmt_info
,
3596 type
, average_stmt
, new_vectype
);
3599 /* Recognize cases in which the input to a cast is wider than its
3600 output, and the input is fed by a widening operation. Fold this
3601 by removing the unnecessary intermediate widening. E.g.:
3604 unsigned int b = (unsigned int) a;
3605 unsigned short c = (unsigned short) b;
3609 unsigned short c = (unsigned short) a;
3611 Although this is rare in input IR, it is an expected side-effect
3612 of the over-widening pattern above.
3614 This is beneficial also for integer-to-float conversions, if the
3615 widened integer has more bits than the float, and if the unwidened
3619 vect_recog_cast_forwprop_pattern (vec_info
*vinfo
,
3620 stmt_vec_info last_stmt_info
, tree
*type_out
)
3622 /* Check for a cast, including an integer-to-float conversion. */
3623 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
3626 tree_code code
= gimple_assign_rhs_code (last_stmt
);
3627 if (!CONVERT_EXPR_CODE_P (code
) && code
!= FLOAT_EXPR
)
3630 /* Make sure that the rhs is a scalar with a natural bitsize. */
3631 tree lhs
= gimple_assign_lhs (last_stmt
);
3634 tree lhs_type
= TREE_TYPE (lhs
);
3635 scalar_mode lhs_mode
;
3636 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type
)
3637 || !is_a
<scalar_mode
> (TYPE_MODE (lhs_type
), &lhs_mode
))
3640 /* Check for a narrowing operation (from a vector point of view). */
3641 tree rhs
= gimple_assign_rhs1 (last_stmt
);
3642 tree rhs_type
= TREE_TYPE (rhs
);
3643 if (!INTEGRAL_TYPE_P (rhs_type
)
3644 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type
)
3645 || TYPE_PRECISION (rhs_type
) <= GET_MODE_BITSIZE (lhs_mode
))
3648 /* Try to find an unpromoted input. */
3649 vect_unpromoted_value unprom
;
3650 if (!vect_look_through_possible_promotion (vinfo
, rhs
, &unprom
)
3651 || TYPE_PRECISION (unprom
.type
) >= TYPE_PRECISION (rhs_type
))
3654 /* If the bits above RHS_TYPE matter, make sure that they're the
3655 same when extending from UNPROM as they are when extending from RHS. */
3656 if (!INTEGRAL_TYPE_P (lhs_type
)
3657 && TYPE_SIGN (rhs_type
) != TYPE_SIGN (unprom
.type
))
3660 /* We can get the same result by casting UNPROM directly, to avoid
3661 the unnecessary widening and narrowing. */
3662 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt
);
3664 *type_out
= get_vectype_for_scalar_type (vinfo
, lhs_type
);
3668 tree new_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
3669 gimple
*pattern_stmt
= gimple_build_assign (new_var
, code
, unprom
.op
);
3670 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
3672 return pattern_stmt
;
3675 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
3676 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
3679 vect_recog_widen_shift_pattern (vec_info
*vinfo
,
3680 stmt_vec_info last_stmt_info
, tree
*type_out
)
3682 return vect_recog_widen_op_pattern (vinfo
, last_stmt_info
, type_out
,
3683 LSHIFT_EXPR
, WIDEN_LSHIFT_EXPR
, true,
3684 "vect_recog_widen_shift_pattern");
3687 /* Detect a rotate pattern wouldn't be otherwise vectorized:
3691 S0 a_t = b_t r<< c_t;
3695 * STMT_VINFO: The stmt from which the pattern search begins,
3696 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
3700 S2 e_t = d_t & (B - 1);
3701 S3 f_t = b_t << c_t;
3702 S4 g_t = b_t >> e_t;
3705 where B is element bitsize of type.
3709 * TYPE_OUT: The type of the output of this pattern.
3711 * Return value: A new stmt that will be used to replace the rotate
3715 vect_recog_rotate_pattern (vec_info
*vinfo
,
3716 stmt_vec_info stmt_vinfo
, tree
*type_out
)
3718 gimple
*last_stmt
= stmt_vinfo
->stmt
;
3719 tree oprnd0
, oprnd1
, lhs
, var
, var1
, var2
, vectype
, type
, stype
, def
, def2
;
3720 gimple
*pattern_stmt
, *def_stmt
;
3721 enum tree_code rhs_code
;
3722 enum vect_def_type dt
;
3723 optab optab1
, optab2
;
3724 edge ext_def
= NULL
;
3725 bool bswap16_p
= false;
3727 if (is_gimple_assign (last_stmt
))
3729 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3739 lhs
= gimple_assign_lhs (last_stmt
);
3740 oprnd0
= gimple_assign_rhs1 (last_stmt
);
3741 type
= TREE_TYPE (oprnd0
);
3742 oprnd1
= gimple_assign_rhs2 (last_stmt
);
3744 else if (gimple_call_builtin_p (last_stmt
, BUILT_IN_BSWAP16
))
3746 /* __builtin_bswap16 (x) is another form of x r>> 8.
3747 The vectorizer has bswap support, but only if the argument isn't
3749 lhs
= gimple_call_lhs (last_stmt
);
3750 oprnd0
= gimple_call_arg (last_stmt
, 0);
3751 type
= TREE_TYPE (oprnd0
);
3753 || TYPE_PRECISION (TREE_TYPE (lhs
)) != 16
3754 || TYPE_PRECISION (type
) <= 16
3755 || TREE_CODE (oprnd0
) != SSA_NAME
3756 || BITS_PER_UNIT
!= 8)
3759 stmt_vec_info def_stmt_info
;
3760 if (!vect_is_simple_use (oprnd0
, vinfo
, &dt
, &def_stmt_info
, &def_stmt
))
3763 if (dt
!= vect_internal_def
)
3766 if (gimple_assign_cast_p (def_stmt
))
3768 def
= gimple_assign_rhs1 (def_stmt
);
3769 if (INTEGRAL_TYPE_P (TREE_TYPE (def
))
3770 && TYPE_PRECISION (TREE_TYPE (def
)) == 16)
3774 type
= TREE_TYPE (lhs
);
3775 vectype
= get_vectype_for_scalar_type (vinfo
, type
);
3776 if (vectype
== NULL_TREE
)
3779 if (tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype
))
3781 /* The encoding uses one stepped pattern for each byte in the
3783 vec_perm_builder
elts (TYPE_VECTOR_SUBPARTS (char_vectype
), 2, 3);
3784 for (unsigned i
= 0; i
< 3; ++i
)
3785 for (unsigned j
= 0; j
< 2; ++j
)
3786 elts
.quick_push ((i
+ 1) * 2 - j
- 1);
3788 vec_perm_indices
indices (elts
, 1,
3789 TYPE_VECTOR_SUBPARTS (char_vectype
));
3790 machine_mode vmode
= TYPE_MODE (char_vectype
);
3791 if (can_vec_perm_const_p (vmode
, vmode
, indices
))
3793 /* vectorizable_bswap can handle the __builtin_bswap16 if we
3794 undo the argument promotion. */
3795 if (!useless_type_conversion_p (type
, TREE_TYPE (oprnd0
)))
3797 def
= vect_recog_temp_ssa_var (type
, NULL
);
3798 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd0
);
3799 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3803 /* Pattern detected. */
3804 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt
);
3806 *type_out
= vectype
;
3808 /* Pattern supported. Create a stmt to be used to replace the
3809 pattern, with the unpromoted argument. */
3810 var
= vect_recog_temp_ssa_var (type
, NULL
);
3811 pattern_stmt
= gimple_build_call (gimple_call_fndecl (last_stmt
),
3813 gimple_call_set_lhs (pattern_stmt
, var
);
3814 gimple_call_set_fntype (as_a
<gcall
*> (pattern_stmt
),
3815 gimple_call_fntype (last_stmt
));
3816 return pattern_stmt
;
3820 oprnd1
= build_int_cst (integer_type_node
, 8);
3821 rhs_code
= LROTATE_EXPR
;
3827 if (TREE_CODE (oprnd0
) != SSA_NAME
3828 || !INTEGRAL_TYPE_P (type
)
3829 || TYPE_PRECISION (TREE_TYPE (lhs
)) != TYPE_PRECISION (type
))
3832 stmt_vec_info def_stmt_info
;
3833 if (!vect_is_simple_use (oprnd1
, vinfo
, &dt
, &def_stmt_info
, &def_stmt
))
3836 if (dt
!= vect_internal_def
3837 && dt
!= vect_constant_def
3838 && dt
!= vect_external_def
)
3841 vectype
= get_vectype_for_scalar_type (vinfo
, type
);
3842 if (vectype
== NULL_TREE
)
3845 /* If vector/vector or vector/scalar rotate is supported by the target,
3846 don't do anything here. */
3847 optab1
= optab_for_tree_code (rhs_code
, vectype
, optab_vector
);
3849 && optab_handler (optab1
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
3854 if (!useless_type_conversion_p (type
, TREE_TYPE (oprnd0
)))
3856 def
= vect_recog_temp_ssa_var (type
, NULL
);
3857 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd0
);
3858 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3862 /* Pattern detected. */
3863 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt
);
3865 *type_out
= vectype
;
3867 /* Pattern supported. Create a stmt to be used to replace the
3869 var
= vect_recog_temp_ssa_var (type
, NULL
);
3870 pattern_stmt
= gimple_build_assign (var
, LROTATE_EXPR
, oprnd0
,
3872 return pattern_stmt
;
3877 if (is_a
<bb_vec_info
> (vinfo
) || dt
!= vect_internal_def
)
3879 optab2
= optab_for_tree_code (rhs_code
, vectype
, optab_scalar
);
3881 && optab_handler (optab2
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
3885 tree utype
= unsigned_type_for (type
);
3886 tree uvectype
= get_vectype_for_scalar_type (vinfo
, utype
);
3890 /* If vector/vector or vector/scalar shifts aren't supported by the target,
3891 don't do anything here either. */
3892 optab1
= optab_for_tree_code (LSHIFT_EXPR
, uvectype
, optab_vector
);
3893 optab2
= optab_for_tree_code (RSHIFT_EXPR
, uvectype
, optab_vector
);
3895 || optab_handler (optab1
, TYPE_MODE (uvectype
)) == CODE_FOR_nothing
3897 || optab_handler (optab2
, TYPE_MODE (uvectype
)) == CODE_FOR_nothing
)
3899 if (! is_a
<bb_vec_info
> (vinfo
) && dt
== vect_internal_def
)
3901 optab1
= optab_for_tree_code (LSHIFT_EXPR
, uvectype
, optab_scalar
);
3902 optab2
= optab_for_tree_code (RSHIFT_EXPR
, uvectype
, optab_scalar
);
3904 || optab_handler (optab1
, TYPE_MODE (uvectype
)) == CODE_FOR_nothing
3906 || optab_handler (optab2
, TYPE_MODE (uvectype
)) == CODE_FOR_nothing
)
3910 *type_out
= vectype
;
3912 if (!useless_type_conversion_p (utype
, TREE_TYPE (oprnd0
)))
3914 def
= vect_recog_temp_ssa_var (utype
, NULL
);
3915 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd0
);
3916 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, uvectype
);
3920 if (dt
== vect_external_def
&& TREE_CODE (oprnd1
) == SSA_NAME
)
3921 ext_def
= vect_get_external_def_edge (vinfo
, oprnd1
);
3924 scalar_int_mode mode
= SCALAR_INT_TYPE_MODE (utype
);
3925 if (dt
!= vect_internal_def
|| TYPE_MODE (TREE_TYPE (oprnd1
)) == mode
)
3927 else if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
3929 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
3930 if (TYPE_MODE (TREE_TYPE (rhs1
)) == mode
3931 && TYPE_PRECISION (TREE_TYPE (rhs1
))
3932 == TYPE_PRECISION (type
))
3936 if (def
== NULL_TREE
)
3938 def
= vect_recog_temp_ssa_var (utype
, NULL
);
3939 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
3940 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, uvectype
);
3942 stype
= TREE_TYPE (def
);
3944 if (TREE_CODE (def
) == INTEGER_CST
)
3946 if (!tree_fits_uhwi_p (def
)
3947 || tree_to_uhwi (def
) >= GET_MODE_PRECISION (mode
)
3948 || integer_zerop (def
))
3950 def2
= build_int_cst (stype
,
3951 GET_MODE_PRECISION (mode
) - tree_to_uhwi (def
));
3955 tree vecstype
= get_vectype_for_scalar_type (vinfo
, stype
);
3957 if (vecstype
== NULL_TREE
)
3959 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
3960 def_stmt
= gimple_build_assign (def2
, NEGATE_EXPR
, def
);
3964 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
3965 gcc_assert (!new_bb
);
3968 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecstype
);
3970 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
3971 tree mask
= build_int_cst (stype
, GET_MODE_PRECISION (mode
) - 1);
3972 def_stmt
= gimple_build_assign (def2
, BIT_AND_EXPR
,
3973 gimple_assign_lhs (def_stmt
), mask
);
3977 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
3978 gcc_assert (!new_bb
);
3981 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecstype
);
3984 var1
= vect_recog_temp_ssa_var (utype
, NULL
);
3985 def_stmt
= gimple_build_assign (var1
, rhs_code
== LROTATE_EXPR
3986 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
3988 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, uvectype
);
3990 var2
= vect_recog_temp_ssa_var (utype
, NULL
);
3991 def_stmt
= gimple_build_assign (var2
, rhs_code
== LROTATE_EXPR
3992 ? RSHIFT_EXPR
: LSHIFT_EXPR
,
3994 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, uvectype
);
3996 /* Pattern detected. */
3997 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt
);
3999 /* Pattern supported. Create a stmt to be used to replace the pattern. */
4000 var
= vect_recog_temp_ssa_var (utype
, NULL
);
4001 pattern_stmt
= gimple_build_assign (var
, BIT_IOR_EXPR
, var1
, var2
);
4003 if (!useless_type_conversion_p (type
, utype
))
4005 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, uvectype
);
4006 tree result
= vect_recog_temp_ssa_var (type
, NULL
);
4007 pattern_stmt
= gimple_build_assign (result
, NOP_EXPR
, var
);
4009 return pattern_stmt
;
4012 /* Detect a vector by vector shift pattern that wouldn't be otherwise
4020 S3 res_T = b_T op a_t;
4022 where type 'TYPE' is a type with different size than 'type',
4023 and op is <<, >> or rotate.
4028 TYPE b_T, c_T, res_T;
4031 S1 a_t = (type) c_T;
4033 S3 res_T = b_T op a_t;
4037 * STMT_VINFO: The stmt from which the pattern search begins,
4038 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
4039 with a shift/rotate which has same type on both operands, in the
4040 second case just b_T op c_T, in the first case with added cast
4041 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
4045 * TYPE_OUT: The type of the output of this pattern.
4047 * Return value: A new stmt that will be used to replace the shift/rotate
4051 vect_recog_vector_vector_shift_pattern (vec_info
*vinfo
,
4052 stmt_vec_info stmt_vinfo
,
4055 gimple
*last_stmt
= stmt_vinfo
->stmt
;
4056 tree oprnd0
, oprnd1
, lhs
, var
;
4057 gimple
*pattern_stmt
;
4058 enum tree_code rhs_code
;
4060 if (!is_gimple_assign (last_stmt
))
4063 rhs_code
= gimple_assign_rhs_code (last_stmt
);
4075 lhs
= gimple_assign_lhs (last_stmt
);
4076 oprnd0
= gimple_assign_rhs1 (last_stmt
);
4077 oprnd1
= gimple_assign_rhs2 (last_stmt
);
4078 if (TREE_CODE (oprnd0
) != SSA_NAME
4079 || TREE_CODE (oprnd1
) != SSA_NAME
4080 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
4081 || !INTEGRAL_TYPE_P (TREE_TYPE (oprnd0
))
4082 || !type_has_mode_precision_p (TREE_TYPE (oprnd1
))
4083 || TYPE_PRECISION (TREE_TYPE (lhs
))
4084 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
4087 stmt_vec_info def_vinfo
= vect_get_internal_def (vinfo
, oprnd1
);
4091 *type_out
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (oprnd0
));
4092 if (*type_out
== NULL_TREE
)
4095 tree def
= NULL_TREE
;
4096 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_vinfo
->stmt
);
4097 if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
4099 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
4100 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
4101 && TYPE_PRECISION (TREE_TYPE (rhs1
))
4102 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
4104 if (TYPE_PRECISION (TREE_TYPE (oprnd1
))
4105 >= TYPE_PRECISION (TREE_TYPE (rhs1
)))
4110 = build_low_bits_mask (TREE_TYPE (rhs1
),
4111 TYPE_PRECISION (TREE_TYPE (oprnd1
)));
4112 def
= vect_recog_temp_ssa_var (TREE_TYPE (rhs1
), NULL
);
4113 def_stmt
= gimple_build_assign (def
, BIT_AND_EXPR
, rhs1
, mask
);
4114 tree vecstype
= get_vectype_for_scalar_type (vinfo
,
4116 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecstype
);
4121 if (def
== NULL_TREE
)
4123 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
4124 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
4125 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4128 /* Pattern detected. */
4129 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt
);
4131 /* Pattern supported. Create a stmt to be used to replace the pattern. */
4132 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
4133 pattern_stmt
= gimple_build_assign (var
, rhs_code
, oprnd0
, def
);
4135 return pattern_stmt
;
4138 /* Return true iff the target has a vector optab implementing the operation
4139 CODE on type VECTYPE. */
4142 target_has_vecop_for_code (tree_code code
, tree vectype
)
4144 optab voptab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4146 && optab_handler (voptab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
;
4149 /* Verify that the target has optabs of VECTYPE to perform all the steps
4150 needed by the multiplication-by-immediate synthesis algorithm described by
4151 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
4152 present. Return true iff the target supports all the steps. */
4155 target_supports_mult_synth_alg (struct algorithm
*alg
, mult_variant var
,
4156 tree vectype
, bool synth_shift_p
)
4158 if (alg
->op
[0] != alg_zero
&& alg
->op
[0] != alg_m
)
4161 bool supports_vminus
= target_has_vecop_for_code (MINUS_EXPR
, vectype
);
4162 bool supports_vplus
= target_has_vecop_for_code (PLUS_EXPR
, vectype
);
4164 if (var
== negate_variant
4165 && !target_has_vecop_for_code (NEGATE_EXPR
, vectype
))
4168 /* If we must synthesize shifts with additions make sure that vector
4169 addition is available. */
4170 if ((var
== add_variant
|| synth_shift_p
) && !supports_vplus
)
4173 for (int i
= 1; i
< alg
->ops
; i
++)
4181 case alg_add_factor
:
4182 if (!supports_vplus
)
4187 case alg_sub_factor
:
4188 if (!supports_vminus
)
4194 case alg_impossible
:
4204 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
4205 putting the final result in DEST. Append all statements but the last into
4206 VINFO. Return the last statement. */
4209 synth_lshift_by_additions (vec_info
*vinfo
,
4210 tree dest
, tree op
, HOST_WIDE_INT amnt
,
4211 stmt_vec_info stmt_info
)
4214 tree itype
= TREE_TYPE (op
);
4216 gcc_assert (amnt
>= 0);
4217 for (i
= 0; i
< amnt
; i
++)
4219 tree tmp_var
= (i
< amnt
- 1) ? vect_recog_temp_ssa_var (itype
, NULL
)
4222 = gimple_build_assign (tmp_var
, PLUS_EXPR
, prev_res
, prev_res
);
4225 append_pattern_def_seq (vinfo
, stmt_info
, stmt
);
4233 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
4234 CODE to operands OP1 and OP2, creating a new temporary SSA var in
4235 the process if necessary. Append the resulting assignment statements
4236 to the sequence in STMT_VINFO. Return the SSA variable that holds the
4237 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
4238 left shifts using additions. */
4241 apply_binop_and_append_stmt (vec_info
*vinfo
,
4242 tree_code code
, tree op1
, tree op2
,
4243 stmt_vec_info stmt_vinfo
, bool synth_shift_p
)
4245 if (integer_zerop (op2
)
4246 && (code
== LSHIFT_EXPR
4247 || code
== PLUS_EXPR
))
4249 gcc_assert (TREE_CODE (op1
) == SSA_NAME
);
4254 tree itype
= TREE_TYPE (op1
);
4255 tree tmp_var
= vect_recog_temp_ssa_var (itype
, NULL
);
4257 if (code
== LSHIFT_EXPR
4260 stmt
= synth_lshift_by_additions (vinfo
, tmp_var
, op1
,
4261 TREE_INT_CST_LOW (op2
), stmt_vinfo
);
4262 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
4266 stmt
= gimple_build_assign (tmp_var
, code
, op1
, op2
);
4267 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
4271 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
4272 and simple arithmetic operations to be vectorized. Record the statements
4273 produced in STMT_VINFO and return the last statement in the sequence or
4274 NULL if it's not possible to synthesize such a multiplication.
4275 This function mirrors the behavior of expand_mult_const in expmed.cc but
4276 works on tree-ssa form. */
4279 vect_synth_mult_by_constant (vec_info
*vinfo
, tree op
, tree val
,
4280 stmt_vec_info stmt_vinfo
)
4282 tree itype
= TREE_TYPE (op
);
4283 machine_mode mode
= TYPE_MODE (itype
);
4284 struct algorithm alg
;
4285 mult_variant variant
;
4286 if (!tree_fits_shwi_p (val
))
4289 /* Multiplication synthesis by shifts, adds and subs can introduce
4290 signed overflow where the original operation didn't. Perform the
4291 operations on an unsigned type and cast back to avoid this.
4292 In the future we may want to relax this for synthesis algorithms
4293 that we can prove do not cause unexpected overflow. */
4294 bool cast_to_unsigned_p
= !TYPE_OVERFLOW_WRAPS (itype
);
4296 tree multtype
= cast_to_unsigned_p
? unsigned_type_for (itype
) : itype
;
4297 tree vectype
= get_vectype_for_scalar_type (vinfo
, multtype
);
4301 /* Targets that don't support vector shifts but support vector additions
4302 can synthesize shifts that way. */
4303 bool synth_shift_p
= !vect_supportable_shift (vinfo
, LSHIFT_EXPR
, multtype
);
4305 HOST_WIDE_INT hwval
= tree_to_shwi (val
);
4306 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
4307 The vectorizer's benefit analysis will decide whether it's beneficial
4309 bool possible
= choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype
))
4310 ? TYPE_MODE (vectype
) : mode
,
4311 hwval
, &alg
, &variant
, MAX_COST
);
4315 if (!target_supports_mult_synth_alg (&alg
, variant
, vectype
, synth_shift_p
))
4320 /* Clear out the sequence of statements so we can populate it below. */
4321 gimple
*stmt
= NULL
;
4323 if (cast_to_unsigned_p
)
4325 tree tmp_op
= vect_recog_temp_ssa_var (multtype
, NULL
);
4326 stmt
= gimple_build_assign (tmp_op
, CONVERT_EXPR
, op
);
4327 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
4331 if (alg
.op
[0] == alg_zero
)
4332 accumulator
= build_int_cst (multtype
, 0);
4336 bool needs_fixup
= (variant
== negate_variant
)
4337 || (variant
== add_variant
);
4339 for (int i
= 1; i
< alg
.ops
; i
++)
4341 tree shft_log
= build_int_cst (multtype
, alg
.log
[i
]);
4342 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
4343 tree tmp_var
= NULL_TREE
;
4350 = synth_lshift_by_additions (vinfo
, accum_tmp
, accumulator
,
4351 alg
.log
[i
], stmt_vinfo
);
4353 stmt
= gimple_build_assign (accum_tmp
, LSHIFT_EXPR
, accumulator
,
4358 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, op
, shft_log
,
4359 stmt_vinfo
, synth_shift_p
);
4360 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
,
4364 tmp_var
= apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, op
,
4365 shft_log
, stmt_vinfo
,
4367 /* In some algorithms the first step involves zeroing the
4368 accumulator. If subtracting from such an accumulator
4369 just emit the negation directly. */
4370 if (integer_zerop (accumulator
))
4371 stmt
= gimple_build_assign (accum_tmp
, NEGATE_EXPR
, tmp_var
);
4373 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, accumulator
,
4378 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, accumulator
,
4379 shft_log
, stmt_vinfo
, synth_shift_p
);
4380 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, tmp_var
, op
);
4384 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, accumulator
,
4385 shft_log
, stmt_vinfo
, synth_shift_p
);
4386 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, tmp_var
, op
);
4388 case alg_add_factor
:
4390 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, accumulator
,
4391 shft_log
, stmt_vinfo
, synth_shift_p
);
4392 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
,
4395 case alg_sub_factor
:
4397 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, accumulator
,
4398 shft_log
, stmt_vinfo
, synth_shift_p
);
4399 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, tmp_var
,
4405 /* We don't want to append the last stmt in the sequence to stmt_vinfo
4406 but rather return it directly. */
4408 if ((i
< alg
.ops
- 1) || needs_fixup
|| cast_to_unsigned_p
)
4409 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
4410 accumulator
= accum_tmp
;
4412 if (variant
== negate_variant
)
4414 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
4415 stmt
= gimple_build_assign (accum_tmp
, NEGATE_EXPR
, accumulator
);
4416 accumulator
= accum_tmp
;
4417 if (cast_to_unsigned_p
)
4418 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
4420 else if (variant
== add_variant
)
4422 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
4423 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
, op
);
4424 accumulator
= accum_tmp
;
4425 if (cast_to_unsigned_p
)
4426 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
4428 /* Move back to a signed if needed. */
4429 if (cast_to_unsigned_p
)
4431 tree accum_tmp
= vect_recog_temp_ssa_var (itype
, NULL
);
4432 stmt
= gimple_build_assign (accum_tmp
, CONVERT_EXPR
, accumulator
);
4438 /* Detect multiplication by constant and convert it into a sequence of
4439 shifts and additions, subtractions, negations. We reuse the
4440 choose_mult_variant algorithms from expmed.cc
4444 STMT_VINFO: The stmt from which the pattern search begins,
4449 * TYPE_OUT: The type of the output of this pattern.
4451 * Return value: A new stmt that will be used to replace
4452 the multiplication. */
4455 vect_recog_mult_pattern (vec_info
*vinfo
,
4456 stmt_vec_info stmt_vinfo
, tree
*type_out
)
4458 gimple
*last_stmt
= stmt_vinfo
->stmt
;
4459 tree oprnd0
, oprnd1
, vectype
, itype
;
4460 gimple
*pattern_stmt
;
4462 if (!is_gimple_assign (last_stmt
))
4465 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
4468 oprnd0
= gimple_assign_rhs1 (last_stmt
);
4469 oprnd1
= gimple_assign_rhs2 (last_stmt
);
4470 itype
= TREE_TYPE (oprnd0
);
4472 if (TREE_CODE (oprnd0
) != SSA_NAME
4473 || TREE_CODE (oprnd1
) != INTEGER_CST
4474 || !INTEGRAL_TYPE_P (itype
)
4475 || !type_has_mode_precision_p (itype
))
4478 vectype
= get_vectype_for_scalar_type (vinfo
, itype
);
4479 if (vectype
== NULL_TREE
)
4482 /* If the target can handle vectorized multiplication natively,
4483 don't attempt to optimize this. */
4484 optab mul_optab
= optab_for_tree_code (MULT_EXPR
, vectype
, optab_default
);
4485 if (mul_optab
!= unknown_optab
)
4487 machine_mode vec_mode
= TYPE_MODE (vectype
);
4488 int icode
= (int) optab_handler (mul_optab
, vec_mode
);
4489 if (icode
!= CODE_FOR_nothing
)
4493 pattern_stmt
= vect_synth_mult_by_constant (vinfo
,
4494 oprnd0
, oprnd1
, stmt_vinfo
);
4498 /* Pattern detected. */
4499 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt
);
4501 *type_out
= vectype
;
4503 return pattern_stmt
;
4506 /* Detect a signed division by a constant that wouldn't be
4507 otherwise vectorized:
4513 where type 'type' is an integral type and N is a constant.
4515 Similarly handle modulo by a constant:
4521 * STMT_VINFO: The stmt from which the pattern search begins,
4522 i.e. the division stmt. S1 is replaced by if N is a power
4523 of two constant and type is signed:
4524 S3 y_t = b_t < 0 ? N - 1 : 0;
4526 S1' a_t = x_t >> log2 (N);
4528 S4 is replaced if N is a power of two constant and
4529 type is signed by (where *_T temporaries have unsigned type):
4530 S9 y_T = b_t < 0 ? -1U : 0U;
4531 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
4532 S7 z_t = (type) z_T;
4534 S5 x_t = w_t & (N - 1);
4535 S4' a_t = x_t - z_t;
4539 * TYPE_OUT: The type of the output of this pattern.
4541 * Return value: A new stmt that will be used to replace the division
4542 S1 or modulo S4 stmt. */
4545 vect_recog_divmod_pattern (vec_info
*vinfo
,
4546 stmt_vec_info stmt_vinfo
, tree
*type_out
)
4548 gimple
*last_stmt
= stmt_vinfo
->stmt
;
4549 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
4550 gimple
*pattern_stmt
, *def_stmt
;
4551 enum tree_code rhs_code
;
4554 int dummy_int
, prec
;
4556 if (!is_gimple_assign (last_stmt
))
4559 rhs_code
= gimple_assign_rhs_code (last_stmt
);
4562 case TRUNC_DIV_EXPR
:
4563 case EXACT_DIV_EXPR
:
4564 case TRUNC_MOD_EXPR
:
4570 oprnd0
= gimple_assign_rhs1 (last_stmt
);
4571 oprnd1
= gimple_assign_rhs2 (last_stmt
);
4572 itype
= TREE_TYPE (oprnd0
);
4573 if (TREE_CODE (oprnd0
) != SSA_NAME
4574 || TREE_CODE (oprnd1
) != INTEGER_CST
4575 || TREE_CODE (itype
) != INTEGER_TYPE
4576 || !type_has_mode_precision_p (itype
))
4579 scalar_int_mode itype_mode
= SCALAR_INT_TYPE_MODE (itype
);
4580 vectype
= get_vectype_for_scalar_type (vinfo
, itype
);
4581 if (vectype
== NULL_TREE
)
4584 if (optimize_bb_for_size_p (gimple_bb (last_stmt
)))
4586 /* If the target can handle vectorized division or modulo natively,
4587 don't attempt to optimize this, since native division is likely
4588 to give smaller code. */
4589 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
4590 if (optab
!= unknown_optab
)
4592 machine_mode vec_mode
= TYPE_MODE (vectype
);
4593 int icode
= (int) optab_handler (optab
, vec_mode
);
4594 if (icode
!= CODE_FOR_nothing
)
4599 prec
= TYPE_PRECISION (itype
);
4600 if (integer_pow2p (oprnd1
))
4602 if (TYPE_UNSIGNED (itype
) || tree_int_cst_sgn (oprnd1
) != 1)
4605 /* Pattern detected. */
4606 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt
);
4608 *type_out
= vectype
;
4610 /* Check if the target supports this internal function. */
4611 internal_fn ifn
= IFN_DIV_POW2
;
4612 if (direct_internal_fn_supported_p (ifn
, vectype
, OPTIMIZE_FOR_SPEED
))
4614 tree shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
4616 tree var_div
= vect_recog_temp_ssa_var (itype
, NULL
);
4617 gimple
*div_stmt
= gimple_build_call_internal (ifn
, 2, oprnd0
, shift
);
4618 gimple_call_set_lhs (div_stmt
, var_div
);
4620 if (rhs_code
== TRUNC_MOD_EXPR
)
4622 append_pattern_def_seq (vinfo
, stmt_vinfo
, div_stmt
);
4624 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4625 LSHIFT_EXPR
, var_div
, shift
);
4626 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4628 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4630 gimple_assign_lhs (def_stmt
));
4633 pattern_stmt
= div_stmt
;
4634 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
4636 return pattern_stmt
;
4639 cond
= build2 (LT_EXPR
, boolean_type_node
, oprnd0
,
4640 build_int_cst (itype
, 0));
4641 if (rhs_code
== TRUNC_DIV_EXPR
4642 || rhs_code
== EXACT_DIV_EXPR
)
4644 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
4647 = gimple_build_assign (var
, COND_EXPR
, cond
,
4648 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
4649 build_int_cst (itype
, 1)),
4650 build_int_cst (itype
, 0));
4651 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4652 var
= vect_recog_temp_ssa_var (itype
, NULL
);
4654 = gimple_build_assign (var
, PLUS_EXPR
, oprnd0
,
4655 gimple_assign_lhs (def_stmt
));
4656 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4658 shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
4660 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4661 RSHIFT_EXPR
, var
, shift
);
4666 if (compare_tree_int (oprnd1
, 2) == 0)
4668 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
4669 def_stmt
= gimple_build_assign (signmask
, COND_EXPR
, cond
,
4670 build_int_cst (itype
, 1),
4671 build_int_cst (itype
, 0));
4672 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4677 = build_nonstandard_integer_type (prec
, 1);
4678 tree vecutype
= get_vectype_for_scalar_type (vinfo
, utype
);
4680 = build_int_cst (utype
, GET_MODE_BITSIZE (itype_mode
)
4681 - tree_log2 (oprnd1
));
4682 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
4684 def_stmt
= gimple_build_assign (var
, COND_EXPR
, cond
,
4685 build_int_cst (utype
, -1),
4686 build_int_cst (utype
, 0));
4687 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecutype
);
4688 var
= vect_recog_temp_ssa_var (utype
, NULL
);
4689 def_stmt
= gimple_build_assign (var
, RSHIFT_EXPR
,
4690 gimple_assign_lhs (def_stmt
),
4692 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecutype
);
4693 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
4695 = gimple_build_assign (signmask
, NOP_EXPR
, var
);
4696 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4699 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4700 PLUS_EXPR
, oprnd0
, signmask
);
4701 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4703 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4704 BIT_AND_EXPR
, gimple_assign_lhs (def_stmt
),
4705 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
4706 build_int_cst (itype
, 1)));
4707 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4710 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4711 MINUS_EXPR
, gimple_assign_lhs (def_stmt
),
4715 return pattern_stmt
;
4718 if ((cst
= uniform_integer_cst_p (oprnd1
))
4719 && TYPE_UNSIGNED (itype
)
4720 && rhs_code
== TRUNC_DIV_EXPR
4722 && targetm
.vectorize
.preferred_div_as_shifts_over_mult (vectype
))
4724 /* We can use the relationship:
4726 x // N == ((x+N+2) // (N+1) + x) // (N+1) for 0 <= x < N(N+3)
4728 to optimize cases where N+1 is a power of 2, and where // (N+1)
4729 is therefore a shift right. When operating in modes that are
4730 multiples of a byte in size, there are two cases:
4732 (1) N(N+3) is not representable, in which case the question
4733 becomes whether the replacement expression overflows.
4734 It is enough to test that x+N+2 does not overflow,
4735 i.e. that x < MAX-(N+1).
4737 (2) N(N+3) is representable, in which case it is the (only)
4738 bound that we need to check.
4740 ??? For now we just handle the case where // (N+1) is a shift
4741 right by half the precision, since some architectures can
4742 optimize the associated addition and shift combinations
4743 into single instructions. */
4745 auto wcst
= wi::to_wide (cst
);
4746 int pow
= wi::exact_log2 (wcst
+ 1);
4747 if (pow
== prec
/ 2)
4749 gimple
*stmt
= SSA_NAME_DEF_STMT (oprnd0
);
4751 gimple_ranger ranger
;
4754 /* Check that no overflow will occur. If we don't have range
4755 information we can't perform the optimization. */
4757 if (ranger
.range_of_expr (r
, oprnd0
, stmt
) && !r
.undefined_p ())
4759 wide_int max
= r
.upper_bound ();
4760 wide_int one
= wi::shwi (1, prec
);
4761 wide_int adder
= wi::add (one
, wi::lshift (one
, pow
));
4762 wi::overflow_type ovf
;
4763 wi::add (max
, adder
, UNSIGNED
, &ovf
);
4764 if (ovf
== wi::OVF_NONE
)
4766 *type_out
= vectype
;
4767 tree tadder
= wide_int_to_tree (itype
, adder
);
4768 tree rshift
= wide_int_to_tree (itype
, pow
);
4770 tree new_lhs1
= vect_recog_temp_ssa_var (itype
, NULL
);
4772 = gimple_build_assign (new_lhs1
, PLUS_EXPR
, oprnd0
, tadder
);
4773 append_pattern_def_seq (vinfo
, stmt_vinfo
, patt1
, vectype
);
4775 tree new_lhs2
= vect_recog_temp_ssa_var (itype
, NULL
);
4776 patt1
= gimple_build_assign (new_lhs2
, RSHIFT_EXPR
, new_lhs1
,
4778 append_pattern_def_seq (vinfo
, stmt_vinfo
, patt1
, vectype
);
4780 tree new_lhs3
= vect_recog_temp_ssa_var (itype
, NULL
);
4781 patt1
= gimple_build_assign (new_lhs3
, PLUS_EXPR
, new_lhs2
,
4783 append_pattern_def_seq (vinfo
, stmt_vinfo
, patt1
, vectype
);
4785 tree new_lhs4
= vect_recog_temp_ssa_var (itype
, NULL
);
4786 pattern_stmt
= gimple_build_assign (new_lhs4
, RSHIFT_EXPR
,
4789 return pattern_stmt
;
4795 if (prec
> HOST_BITS_PER_WIDE_INT
4796 || integer_zerop (oprnd1
))
4799 if (!can_mult_highpart_p (TYPE_MODE (vectype
), TYPE_UNSIGNED (itype
)))
4802 if (TYPE_UNSIGNED (itype
))
4804 unsigned HOST_WIDE_INT mh
, ml
;
4805 int pre_shift
, post_shift
;
4806 unsigned HOST_WIDE_INT d
= (TREE_INT_CST_LOW (oprnd1
)
4807 & GET_MODE_MASK (itype_mode
));
4808 tree t1
, t2
, t3
, t4
;
4810 if (d
>= (HOST_WIDE_INT_1U
<< (prec
- 1)))
4811 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
4814 /* Find a suitable multiplier and right shift count
4815 instead of multiplying with D. */
4816 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
4818 /* If the suggested multiplier is more than SIZE bits, we can do better
4819 for even divisors, using an initial right shift. */
4820 if (mh
!= 0 && (d
& 1) == 0)
4822 pre_shift
= ctz_or_zero (d
);
4823 mh
= choose_multiplier (d
>> pre_shift
, prec
, prec
- pre_shift
,
4824 &ml
, &post_shift
, &dummy_int
);
4832 if (post_shift
- 1 >= prec
)
4835 /* t1 = oprnd0 h* ml;
4839 q = t4 >> (post_shift - 1); */
4840 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
4841 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
4842 build_int_cst (itype
, ml
));
4843 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4845 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
4847 = gimple_build_assign (t2
, MINUS_EXPR
, oprnd0
, t1
);
4848 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4850 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
4852 = gimple_build_assign (t3
, RSHIFT_EXPR
, t2
, integer_one_node
);
4853 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4855 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
4857 = gimple_build_assign (t4
, PLUS_EXPR
, t1
, t3
);
4859 if (post_shift
!= 1)
4861 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4863 q
= vect_recog_temp_ssa_var (itype
, NULL
);
4865 = gimple_build_assign (q
, RSHIFT_EXPR
, t4
,
4866 build_int_cst (itype
, post_shift
- 1));
4871 pattern_stmt
= def_stmt
;
4876 if (pre_shift
>= prec
|| post_shift
>= prec
)
4879 /* t1 = oprnd0 >> pre_shift;
4881 q = t2 >> post_shift; */
4884 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
4886 = gimple_build_assign (t1
, RSHIFT_EXPR
, oprnd0
,
4887 build_int_cst (NULL
, pre_shift
));
4888 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4893 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
4894 def_stmt
= gimple_build_assign (t2
, MULT_HIGHPART_EXPR
, t1
,
4895 build_int_cst (itype
, ml
));
4899 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4901 q
= vect_recog_temp_ssa_var (itype
, NULL
);
4903 = gimple_build_assign (q
, RSHIFT_EXPR
, t2
,
4904 build_int_cst (itype
, post_shift
));
4909 pattern_stmt
= def_stmt
;
4914 unsigned HOST_WIDE_INT ml
;
4916 HOST_WIDE_INT d
= TREE_INT_CST_LOW (oprnd1
);
4917 unsigned HOST_WIDE_INT abs_d
;
4919 tree t1
, t2
, t3
, t4
;
4921 /* Give up for -1. */
4925 /* Since d might be INT_MIN, we have to cast to
4926 unsigned HOST_WIDE_INT before negating to avoid
4927 undefined signed overflow. */
4929 ? (unsigned HOST_WIDE_INT
) d
4930 : - (unsigned HOST_WIDE_INT
) d
);
4932 /* n rem d = n rem -d */
4933 if (rhs_code
== TRUNC_MOD_EXPR
&& d
< 0)
4936 oprnd1
= build_int_cst (itype
, abs_d
);
4938 if (HOST_BITS_PER_WIDE_INT
>= prec
4939 && abs_d
== HOST_WIDE_INT_1U
<< (prec
- 1))
4940 /* This case is not handled correctly below. */
4943 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
, &post_shift
, &dummy_int
);
4944 if (ml
>= HOST_WIDE_INT_1U
<< (prec
- 1))
4947 ml
|= HOST_WIDE_INT_M1U
<< (prec
- 1);
4949 if (post_shift
>= prec
)
4952 /* t1 = oprnd0 h* ml; */
4953 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
4954 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
4955 build_int_cst (itype
, ml
));
4959 /* t2 = t1 + oprnd0; */
4960 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4961 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
4962 def_stmt
= gimple_build_assign (t2
, PLUS_EXPR
, t1
, oprnd0
);
4969 /* t3 = t2 >> post_shift; */
4970 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4971 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
4972 def_stmt
= gimple_build_assign (t3
, RSHIFT_EXPR
, t2
,
4973 build_int_cst (itype
, post_shift
));
4980 get_range_query (cfun
)->range_of_expr (r
, oprnd0
);
4981 if (!r
.varying_p () && !r
.undefined_p ())
4983 if (!wi::neg_p (r
.lower_bound (), TYPE_SIGN (itype
)))
4985 else if (wi::neg_p (r
.upper_bound (), TYPE_SIGN (itype
)))
4989 if (msb
== 0 && d
>= 0)
4993 pattern_stmt
= def_stmt
;
4997 /* t4 = oprnd0 >> (prec - 1);
4998 or if we know from VRP that oprnd0 >= 0
5000 or if we know from VRP that oprnd0 < 0
5002 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
5003 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
5005 def_stmt
= gimple_build_assign (t4
, INTEGER_CST
,
5006 build_int_cst (itype
, msb
));
5008 def_stmt
= gimple_build_assign (t4
, RSHIFT_EXPR
, oprnd0
,
5009 build_int_cst (itype
, prec
- 1));
5010 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
5012 /* q = t3 - t4; or q = t4 - t3; */
5013 q
= vect_recog_temp_ssa_var (itype
, NULL
);
5014 pattern_stmt
= gimple_build_assign (q
, MINUS_EXPR
, d
< 0 ? t4
: t3
,
5019 if (rhs_code
== TRUNC_MOD_EXPR
)
5023 /* We divided. Now finish by:
5026 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
);
5028 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
5029 def_stmt
= gimple_build_assign (t1
, MULT_EXPR
, q
, oprnd1
);
5030 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
5032 r
= vect_recog_temp_ssa_var (itype
, NULL
);
5033 pattern_stmt
= gimple_build_assign (r
, MINUS_EXPR
, oprnd0
, t1
);
5036 /* Pattern detected. */
5037 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt
);
5039 *type_out
= vectype
;
5040 return pattern_stmt
;
5043 /* Function vect_recog_mixed_size_cond_pattern
5045 Try to find the following pattern:
5050 S1 a_T = x_t CMP y_t ? b_T : c_T;
5052 where type 'TYPE' is an integral type which has different size
5053 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
5054 than 'type', the constants need to fit into an integer type
5055 with the same width as 'type') or results of conversion from 'type'.
5059 * STMT_VINFO: The stmt from which the pattern search begins.
5063 * TYPE_OUT: The type of the output of this pattern.
5065 * Return value: A new stmt that will be used to replace the pattern.
5066 Additionally a def_stmt is added.
5068 a_it = x_t CMP y_t ? b_it : c_it;
5069 a_T = (TYPE) a_it; */
5072 vect_recog_mixed_size_cond_pattern (vec_info
*vinfo
,
5073 stmt_vec_info stmt_vinfo
, tree
*type_out
)
5075 gimple
*last_stmt
= stmt_vinfo
->stmt
;
5076 tree cond_expr
, then_clause
, else_clause
;
5077 tree type
, vectype
, comp_vectype
, itype
= NULL_TREE
, vecitype
;
5078 gimple
*pattern_stmt
, *def_stmt
;
5079 tree orig_type0
= NULL_TREE
, orig_type1
= NULL_TREE
;
5080 gimple
*def_stmt0
= NULL
, *def_stmt1
= NULL
;
5082 tree comp_scalar_type
;
5084 if (!is_gimple_assign (last_stmt
)
5085 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
5086 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
5089 cond_expr
= gimple_assign_rhs1 (last_stmt
);
5090 then_clause
= gimple_assign_rhs2 (last_stmt
);
5091 else_clause
= gimple_assign_rhs3 (last_stmt
);
5093 if (!COMPARISON_CLASS_P (cond_expr
))
5096 comp_scalar_type
= TREE_TYPE (TREE_OPERAND (cond_expr
, 0));
5097 comp_vectype
= get_vectype_for_scalar_type (vinfo
, comp_scalar_type
);
5098 if (comp_vectype
== NULL_TREE
)
5101 type
= TREE_TYPE (gimple_assign_lhs (last_stmt
));
5102 if (types_compatible_p (type
, comp_scalar_type
)
5103 || ((TREE_CODE (then_clause
) != INTEGER_CST
5104 || TREE_CODE (else_clause
) != INTEGER_CST
)
5105 && !INTEGRAL_TYPE_P (comp_scalar_type
))
5106 || !INTEGRAL_TYPE_P (type
))
5109 if ((TREE_CODE (then_clause
) != INTEGER_CST
5110 && !type_conversion_p (vinfo
, then_clause
, false,
5111 &orig_type0
, &def_stmt0
, &promotion
))
5112 || (TREE_CODE (else_clause
) != INTEGER_CST
5113 && !type_conversion_p (vinfo
, else_clause
, false,
5114 &orig_type1
, &def_stmt1
, &promotion
)))
5117 if (orig_type0
&& orig_type1
5118 && !types_compatible_p (orig_type0
, orig_type1
))
5123 if (!types_compatible_p (orig_type0
, comp_scalar_type
))
5125 then_clause
= gimple_assign_rhs1 (def_stmt0
);
5131 if (!types_compatible_p (orig_type1
, comp_scalar_type
))
5133 else_clause
= gimple_assign_rhs1 (def_stmt1
);
5138 HOST_WIDE_INT cmp_mode_size
5139 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype
));
5141 scalar_int_mode type_mode
= SCALAR_INT_TYPE_MODE (type
);
5142 if (GET_MODE_BITSIZE (type_mode
) == cmp_mode_size
)
5145 vectype
= get_vectype_for_scalar_type (vinfo
, type
);
5146 if (vectype
== NULL_TREE
)
5149 if (expand_vec_cond_expr_p (vectype
, comp_vectype
, TREE_CODE (cond_expr
)))
5152 if (itype
== NULL_TREE
)
5153 itype
= build_nonstandard_integer_type (cmp_mode_size
,
5154 TYPE_UNSIGNED (type
));
5156 if (itype
== NULL_TREE
5157 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype
)) != cmp_mode_size
)
5160 vecitype
= get_vectype_for_scalar_type (vinfo
, itype
);
5161 if (vecitype
== NULL_TREE
)
5164 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
, TREE_CODE (cond_expr
)))
5167 if (GET_MODE_BITSIZE (type_mode
) > cmp_mode_size
)
5169 if ((TREE_CODE (then_clause
) == INTEGER_CST
5170 && !int_fits_type_p (then_clause
, itype
))
5171 || (TREE_CODE (else_clause
) == INTEGER_CST
5172 && !int_fits_type_p (else_clause
, itype
)))
5176 def_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
5177 COND_EXPR
, unshare_expr (cond_expr
),
5178 fold_convert (itype
, then_clause
),
5179 fold_convert (itype
, else_clause
));
5180 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
5181 NOP_EXPR
, gimple_assign_lhs (def_stmt
));
5183 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecitype
);
5184 *type_out
= vectype
;
5186 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt
);
5188 return pattern_stmt
;
5192 /* Helper function of vect_recog_bool_pattern. Called recursively, return
5193 true if bool VAR can and should be optimized that way. Assume it shouldn't
5194 in case it's a result of a comparison which can be directly vectorized into
5195 a vector comparison. Fills in STMTS with all stmts visited during the
5199 check_bool_pattern (tree var
, vec_info
*vinfo
, hash_set
<gimple
*> &stmts
)
5202 enum tree_code rhs_code
;
5204 stmt_vec_info def_stmt_info
= vect_get_internal_def (vinfo
, var
);
5208 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_stmt_info
->stmt
);
5212 if (stmts
.contains (def_stmt
))
5215 rhs1
= gimple_assign_rhs1 (def_stmt
);
5216 rhs_code
= gimple_assign_rhs_code (def_stmt
);
5220 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
5225 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
5227 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
5232 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
5239 if (! check_bool_pattern (rhs1
, vinfo
, stmts
)
5240 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt
), vinfo
, stmts
))
5245 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
5247 tree vecitype
, comp_vectype
;
5249 /* If the comparison can throw, then is_gimple_condexpr will be
5250 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
5251 if (stmt_could_throw_p (cfun
, def_stmt
))
5254 comp_vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs1
));
5255 if (comp_vectype
== NULL_TREE
)
5258 tree mask_type
= get_mask_type_for_scalar_type (vinfo
,
5261 && expand_vec_cmp_expr_p (comp_vectype
, mask_type
, rhs_code
))
5264 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
5266 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
5268 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
5269 vecitype
= get_vectype_for_scalar_type (vinfo
, itype
);
5270 if (vecitype
== NULL_TREE
)
5274 vecitype
= comp_vectype
;
5275 if (! expand_vec_cond_expr_p (vecitype
, comp_vectype
, rhs_code
))
5283 bool res
= stmts
.add (def_stmt
);
5284 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
5291 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
5292 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
5293 pattern sequence. */
5296 adjust_bool_pattern_cast (vec_info
*vinfo
,
5297 tree type
, tree var
, stmt_vec_info stmt_info
)
5299 gimple
*cast_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
5301 append_pattern_def_seq (vinfo
, stmt_info
, cast_stmt
,
5302 get_vectype_for_scalar_type (vinfo
, type
));
5303 return gimple_assign_lhs (cast_stmt
);
5306 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
5307 VAR is an SSA_NAME that should be transformed from bool to a wider integer
5308 type, OUT_TYPE is the desired final integer type of the whole pattern.
5309 STMT_INFO is the info of the pattern root and is where pattern stmts should
5310 be associated with. DEFS is a map of pattern defs. */
5313 adjust_bool_pattern (vec_info
*vinfo
, tree var
, tree out_type
,
5314 stmt_vec_info stmt_info
, hash_map
<tree
, tree
> &defs
)
5316 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
5317 enum tree_code rhs_code
, def_rhs_code
;
5318 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
5320 gimple
*pattern_stmt
, *def_stmt
;
5321 tree trueval
= NULL_TREE
;
5323 rhs1
= gimple_assign_rhs1 (stmt
);
5324 rhs2
= gimple_assign_rhs2 (stmt
);
5325 rhs_code
= gimple_assign_rhs_code (stmt
);
5326 loc
= gimple_location (stmt
);
5331 irhs1
= *defs
.get (rhs1
);
5332 itype
= TREE_TYPE (irhs1
);
5334 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
5339 irhs1
= *defs
.get (rhs1
);
5340 itype
= TREE_TYPE (irhs1
);
5342 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
5343 BIT_XOR_EXPR
, irhs1
, build_int_cst (itype
, 1));
5347 /* Try to optimize x = y & (a < b ? 1 : 0); into
5348 x = (a < b ? y : 0);
5354 S1 a_b = x1 CMP1 y1;
5355 S2 b_b = x2 CMP2 y2;
5357 S4 d_T = (TYPE) c_b;
5359 we would normally emit:
5361 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5362 S2' b_T = x2 CMP2 y2 ? 1 : 0;
5363 S3' c_T = a_T & b_T;
5366 but we can save one stmt by using the
5367 result of one of the COND_EXPRs in the other COND_EXPR and leave
5368 BIT_AND_EXPR stmt out:
5370 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5371 S3' c_T = x2 CMP2 y2 ? a_T : 0;
5374 At least when VEC_COND_EXPR is implemented using masks
5375 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
5376 computes the comparison masks and ands it, in one case with
5377 all ones vector, in the other case with a vector register.
5378 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
5379 often more expensive. */
5380 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
5381 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
5382 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
5384 irhs1
= *defs
.get (rhs1
);
5385 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
5386 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
5387 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1
))))
5389 rhs_code
= def_rhs_code
;
5391 rhs2
= gimple_assign_rhs2 (def_stmt
);
5396 irhs2
= *defs
.get (rhs2
);
5399 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
5400 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
5401 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
5403 irhs2
= *defs
.get (rhs2
);
5404 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
5405 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
5406 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1
))))
5408 rhs_code
= def_rhs_code
;
5410 rhs2
= gimple_assign_rhs2 (def_stmt
);
5415 irhs1
= *defs
.get (rhs1
);
5421 irhs1
= *defs
.get (rhs1
);
5422 irhs2
= *defs
.get (rhs2
);
5424 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
5425 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
5427 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
5428 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
5429 int out_prec
= TYPE_PRECISION (out_type
);
5430 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
5431 irhs2
= adjust_bool_pattern_cast (vinfo
, TREE_TYPE (irhs1
), irhs2
,
5433 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
5434 irhs1
= adjust_bool_pattern_cast (vinfo
, TREE_TYPE (irhs2
), irhs1
,
5438 irhs1
= adjust_bool_pattern_cast (vinfo
,
5439 out_type
, irhs1
, stmt_info
);
5440 irhs2
= adjust_bool_pattern_cast (vinfo
,
5441 out_type
, irhs2
, stmt_info
);
5444 itype
= TREE_TYPE (irhs1
);
5446 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
5447 rhs_code
, irhs1
, irhs2
);
5452 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
5453 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
5454 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
))
5455 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1
)),
5456 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1
)))))
5458 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
5460 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
5463 itype
= TREE_TYPE (rhs1
);
5464 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
5465 if (trueval
== NULL_TREE
)
5466 trueval
= build_int_cst (itype
, 1);
5468 gcc_checking_assert (useless_type_conversion_p (itype
,
5469 TREE_TYPE (trueval
)));
5471 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
5472 COND_EXPR
, cond_expr
, trueval
,
5473 build_int_cst (itype
, 0));
5477 gimple_set_location (pattern_stmt
, loc
);
5478 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
,
5479 get_vectype_for_scalar_type (vinfo
, itype
));
5480 defs
.put (var
, gimple_assign_lhs (pattern_stmt
));
5483 /* Comparison function to qsort a vector of gimple stmts after UID. */
5486 sort_after_uid (const void *p1
, const void *p2
)
5488 const gimple
*stmt1
= *(const gimple
* const *)p1
;
5489 const gimple
*stmt2
= *(const gimple
* const *)p2
;
5490 return gimple_uid (stmt1
) - gimple_uid (stmt2
);
5493 /* Create pattern stmts for all stmts participating in the bool pattern
5494 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
5495 OUT_TYPE. Return the def of the pattern root. */
5498 adjust_bool_stmts (vec_info
*vinfo
, hash_set
<gimple
*> &bool_stmt_set
,
5499 tree out_type
, stmt_vec_info stmt_info
)
5501 /* Gather original stmts in the bool pattern in their order of appearance
5503 auto_vec
<gimple
*> bool_stmts (bool_stmt_set
.elements ());
5504 for (hash_set
<gimple
*>::iterator i
= bool_stmt_set
.begin ();
5505 i
!= bool_stmt_set
.end (); ++i
)
5506 bool_stmts
.quick_push (*i
);
5507 bool_stmts
.qsort (sort_after_uid
);
5509 /* Now process them in that order, producing pattern stmts. */
5510 hash_map
<tree
, tree
> defs
;
5511 for (unsigned i
= 0; i
< bool_stmts
.length (); ++i
)
5512 adjust_bool_pattern (vinfo
, gimple_assign_lhs (bool_stmts
[i
]),
5513 out_type
, stmt_info
, defs
);
5515 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
5516 gimple
*pattern_stmt
5517 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
5518 return gimple_assign_lhs (pattern_stmt
);
5521 /* Return the proper type for converting bool VAR into
5522 an integer value or NULL_TREE if no such type exists.
5523 The type is chosen so that the converted value has the
5524 same number of elements as VAR's vector type. */
5527 integer_type_for_mask (tree var
, vec_info
*vinfo
)
5529 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var
)))
5532 stmt_vec_info def_stmt_info
= vect_get_internal_def (vinfo
, var
);
5533 if (!def_stmt_info
|| !vect_use_mask_type_p (def_stmt_info
))
5536 return build_nonstandard_integer_type (def_stmt_info
->mask_precision
, 1);
5539 /* Function vect_recog_bool_pattern
5541 Try to find pattern like following:
5543 bool a_b, b_b, c_b, d_b, e_b;
5546 S1 a_b = x1 CMP1 y1;
5547 S2 b_b = x2 CMP2 y2;
5549 S4 d_b = x3 CMP3 y3;
5551 S6 f_T = (TYPE) e_b;
5553 where type 'TYPE' is an integral type. Or a similar pattern
5556 S6 f_Y = e_b ? r_Y : s_Y;
5558 as results from if-conversion of a complex condition.
5562 * STMT_VINFO: The stmt at the end from which the pattern
5563 search begins, i.e. cast of a bool to
5568 * TYPE_OUT: The type of the output of this pattern.
5570 * Return value: A new stmt that will be used to replace the pattern.
5572 Assuming size of TYPE is the same as size of all comparisons
5573 (otherwise some casts would be added where needed), the above
5574 sequence we create related pattern stmts:
5575 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5576 S3' c_T = x2 CMP2 y2 ? a_T : 0;
5577 S4' d_T = x3 CMP3 y3 ? 1 : 0;
5578 S5' e_T = c_T | d_T;
5581 Instead of the above S3' we could emit:
5582 S2' b_T = x2 CMP2 y2 ? 1 : 0;
5583 S3' c_T = a_T | b_T;
5584 but the above is more efficient. */
5587 vect_recog_bool_pattern (vec_info
*vinfo
,
5588 stmt_vec_info stmt_vinfo
, tree
*type_out
)
5590 gimple
*last_stmt
= stmt_vinfo
->stmt
;
5591 enum tree_code rhs_code
;
5592 tree var
, lhs
, rhs
, vectype
;
5593 gimple
*pattern_stmt
;
5595 if (!is_gimple_assign (last_stmt
))
5598 var
= gimple_assign_rhs1 (last_stmt
);
5599 lhs
= gimple_assign_lhs (last_stmt
);
5600 rhs_code
= gimple_assign_rhs_code (last_stmt
);
5602 if (rhs_code
== VIEW_CONVERT_EXPR
)
5603 var
= TREE_OPERAND (var
, 0);
5605 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var
)))
5608 hash_set
<gimple
*> bool_stmts
;
5610 if (CONVERT_EXPR_CODE_P (rhs_code
)
5611 || rhs_code
== VIEW_CONVERT_EXPR
)
5613 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
5614 || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs
)))
5616 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
5618 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
5620 rhs
= adjust_bool_stmts (vinfo
, bool_stmts
,
5621 TREE_TYPE (lhs
), stmt_vinfo
);
5622 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
5623 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
5624 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
5627 = gimple_build_assign (lhs
, NOP_EXPR
, rhs
);
5631 tree type
= integer_type_for_mask (var
, vinfo
);
5632 tree cst0
, cst1
, tmp
;
5637 /* We may directly use cond with narrowed type to avoid
5638 multiple cond exprs with following result packing and
5639 perform single cond with packed mask instead. In case
5640 of widening we better make cond first and then extract
5642 if (TYPE_MODE (type
) == TYPE_MODE (TREE_TYPE (lhs
)))
5643 type
= TREE_TYPE (lhs
);
5645 cst0
= build_int_cst (type
, 0);
5646 cst1
= build_int_cst (type
, 1);
5647 tmp
= vect_recog_temp_ssa_var (type
, NULL
);
5648 pattern_stmt
= gimple_build_assign (tmp
, COND_EXPR
, var
, cst1
, cst0
);
5650 if (!useless_type_conversion_p (type
, TREE_TYPE (lhs
)))
5652 tree new_vectype
= get_vectype_for_scalar_type (vinfo
, type
);
5653 append_pattern_def_seq (vinfo
, stmt_vinfo
,
5654 pattern_stmt
, new_vectype
);
5656 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
5657 pattern_stmt
= gimple_build_assign (lhs
, CONVERT_EXPR
, tmp
);
5661 *type_out
= vectype
;
5662 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
5664 return pattern_stmt
;
5666 else if (rhs_code
== COND_EXPR
5667 && TREE_CODE (var
) == SSA_NAME
)
5669 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
5670 if (vectype
== NULL_TREE
)
5673 /* Build a scalar type for the boolean result that when
5674 vectorized matches the vector type of the result in
5675 size and number of elements. */
5677 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype
)),
5678 TYPE_VECTOR_SUBPARTS (vectype
));
5681 = build_nonstandard_integer_type (prec
,
5682 TYPE_UNSIGNED (TREE_TYPE (var
)));
5683 if (get_vectype_for_scalar_type (vinfo
, type
) == NULL_TREE
)
5686 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
5687 var
= adjust_bool_stmts (vinfo
, bool_stmts
, type
, stmt_vinfo
);
5688 else if (integer_type_for_mask (var
, vinfo
))
5691 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
5693 = gimple_build_assign (lhs
, COND_EXPR
,
5694 build2 (NE_EXPR
, boolean_type_node
,
5695 var
, build_int_cst (TREE_TYPE (var
), 0)),
5696 gimple_assign_rhs2 (last_stmt
),
5697 gimple_assign_rhs3 (last_stmt
));
5698 *type_out
= vectype
;
5699 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
5701 return pattern_stmt
;
5703 else if (rhs_code
== SSA_NAME
5704 && STMT_VINFO_DATA_REF (stmt_vinfo
))
5706 stmt_vec_info pattern_stmt_info
;
5707 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
5708 if (!vectype
|| !VECTOR_MODE_P (TYPE_MODE (vectype
)))
5711 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
5712 rhs
= adjust_bool_stmts (vinfo
, bool_stmts
,
5713 TREE_TYPE (vectype
), stmt_vinfo
);
5716 tree type
= integer_type_for_mask (var
, vinfo
);
5717 tree cst0
, cst1
, new_vectype
;
5722 if (TYPE_MODE (type
) == TYPE_MODE (TREE_TYPE (vectype
)))
5723 type
= TREE_TYPE (vectype
);
5725 cst0
= build_int_cst (type
, 0);
5726 cst1
= build_int_cst (type
, 1);
5727 new_vectype
= get_vectype_for_scalar_type (vinfo
, type
);
5729 rhs
= vect_recog_temp_ssa_var (type
, NULL
);
5730 pattern_stmt
= gimple_build_assign (rhs
, COND_EXPR
, var
, cst1
, cst0
);
5731 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, new_vectype
);
5734 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
5735 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
5737 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
5738 gimple
*cast_stmt
= gimple_build_assign (rhs2
, NOP_EXPR
, rhs
);
5739 append_pattern_def_seq (vinfo
, stmt_vinfo
, cast_stmt
);
5742 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
5743 pattern_stmt_info
= vinfo
->add_stmt (pattern_stmt
);
5744 vinfo
->move_dr (pattern_stmt_info
, stmt_vinfo
);
5745 *type_out
= vectype
;
5746 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
5748 return pattern_stmt
;
5755 /* A helper for vect_recog_mask_conversion_pattern. Build
5756 conversion of MASK to a type suitable for masking VECTYPE.
5757 Built statement gets required vectype and is appended to
5758 a pattern sequence of STMT_VINFO.
5760 Return converted mask. */
5763 build_mask_conversion (vec_info
*vinfo
,
5764 tree mask
, tree vectype
, stmt_vec_info stmt_vinfo
)
5769 masktype
= truth_type_for (vectype
);
5770 tmp
= vect_recog_temp_ssa_var (TREE_TYPE (masktype
), NULL
);
5771 stmt
= gimple_build_assign (tmp
, CONVERT_EXPR
, mask
);
5772 append_pattern_def_seq (vinfo
, stmt_vinfo
,
5773 stmt
, masktype
, TREE_TYPE (vectype
));
5779 /* Function vect_recog_mask_conversion_pattern
5781 Try to find statements which require boolean type
5782 converison. Additional conversion statements are
5783 added to handle such cases. For example:
5793 S4 c_1 = m_3 ? c_2 : c_3;
5795 Will be transformed into:
5799 S3'' m_2' = (_Bool[bitsize=32])m_2
5800 S3' m_3' = m_1 & m_2';
5801 S4'' m_3'' = (_Bool[bitsize=8])m_3'
5802 S4' c_1' = m_3'' ? c_2 : c_3; */
5805 vect_recog_mask_conversion_pattern (vec_info
*vinfo
,
5806 stmt_vec_info stmt_vinfo
, tree
*type_out
)
5808 gimple
*last_stmt
= stmt_vinfo
->stmt
;
5809 enum tree_code rhs_code
;
5810 tree lhs
= NULL_TREE
, rhs1
, rhs2
, tmp
, rhs1_type
, rhs2_type
;
5811 tree vectype1
, vectype2
;
5812 stmt_vec_info pattern_stmt_info
;
5813 tree rhs1_op0
= NULL_TREE
, rhs1_op1
= NULL_TREE
;
5814 tree rhs1_op0_type
= NULL_TREE
, rhs1_op1_type
= NULL_TREE
;
5816 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
5817 if (is_gimple_call (last_stmt
)
5818 && gimple_call_internal_p (last_stmt
))
5820 gcall
*pattern_stmt
;
5822 internal_fn ifn
= gimple_call_internal_fn (last_stmt
);
5823 int mask_argno
= internal_fn_mask_index (ifn
);
5827 bool store_p
= internal_store_fn_p (ifn
);
5830 int rhs_index
= internal_fn_stored_value_index (ifn
);
5831 tree rhs
= gimple_call_arg (last_stmt
, rhs_index
);
5832 vectype1
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs
));
5836 lhs
= gimple_call_lhs (last_stmt
);
5839 vectype1
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
5842 tree mask_arg
= gimple_call_arg (last_stmt
, mask_argno
);
5843 tree mask_arg_type
= integer_type_for_mask (mask_arg
, vinfo
);
5846 vectype2
= get_mask_type_for_scalar_type (vinfo
, mask_arg_type
);
5848 if (!vectype1
|| !vectype2
5849 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1
),
5850 TYPE_VECTOR_SUBPARTS (vectype2
)))
5853 tmp
= build_mask_conversion (vinfo
, mask_arg
, vectype1
, stmt_vinfo
);
5855 auto_vec
<tree
, 8> args
;
5856 unsigned int nargs
= gimple_call_num_args (last_stmt
);
5857 args
.safe_grow (nargs
, true);
5858 for (unsigned int i
= 0; i
< nargs
; ++i
)
5859 args
[i
] = ((int) i
== mask_argno
5861 : gimple_call_arg (last_stmt
, i
));
5862 pattern_stmt
= gimple_build_call_internal_vec (ifn
, args
);
5866 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
5867 gimple_call_set_lhs (pattern_stmt
, lhs
);
5869 gimple_call_set_nothrow (pattern_stmt
, true);
5871 pattern_stmt_info
= vinfo
->add_stmt (pattern_stmt
);
5872 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
5873 vinfo
->move_dr (pattern_stmt_info
, stmt_vinfo
);
5875 *type_out
= vectype1
;
5876 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
5878 return pattern_stmt
;
5881 if (!is_gimple_assign (last_stmt
))
5884 gimple
*pattern_stmt
;
5885 lhs
= gimple_assign_lhs (last_stmt
);
5886 rhs1
= gimple_assign_rhs1 (last_stmt
);
5887 rhs_code
= gimple_assign_rhs_code (last_stmt
);
5889 /* Check for cond expression requiring mask conversion. */
5890 if (rhs_code
== COND_EXPR
)
5892 vectype1
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
5894 if (TREE_CODE (rhs1
) == SSA_NAME
)
5896 rhs1_type
= integer_type_for_mask (rhs1
, vinfo
);
5900 else if (COMPARISON_CLASS_P (rhs1
))
5902 /* Check whether we're comparing scalar booleans and (if so)
5903 whether a better mask type exists than the mask associated
5904 with boolean-sized elements. This avoids unnecessary packs
5905 and unpacks if the booleans are set from comparisons of
5906 wider types. E.g. in:
5908 int x1, x2, x3, x4, y1, y1;
5910 bool b1 = (x1 == x2);
5911 bool b2 = (x3 == x4);
5912 ... = b1 == b2 ? y1 : y2;
5914 it is better for b1 and b2 to use the mask type associated
5915 with int elements rather bool (byte) elements. */
5916 rhs1_op0
= TREE_OPERAND (rhs1
, 0);
5917 rhs1_op1
= TREE_OPERAND (rhs1
, 1);
5918 if (!rhs1_op0
|| !rhs1_op1
)
5920 rhs1_op0_type
= integer_type_for_mask (rhs1_op0
, vinfo
);
5921 rhs1_op1_type
= integer_type_for_mask (rhs1_op1
, vinfo
);
5924 rhs1_type
= TREE_TYPE (rhs1_op0
);
5925 else if (!rhs1_op1_type
)
5926 rhs1_type
= TREE_TYPE (rhs1_op1
);
5927 else if (TYPE_PRECISION (rhs1_op0_type
)
5928 != TYPE_PRECISION (rhs1_op1_type
))
5930 int tmp0
= (int) TYPE_PRECISION (rhs1_op0_type
)
5931 - (int) TYPE_PRECISION (TREE_TYPE (lhs
));
5932 int tmp1
= (int) TYPE_PRECISION (rhs1_op1_type
)
5933 - (int) TYPE_PRECISION (TREE_TYPE (lhs
));
5934 if ((tmp0
> 0 && tmp1
> 0) || (tmp0
< 0 && tmp1
< 0))
5936 if (abs (tmp0
) > abs (tmp1
))
5937 rhs1_type
= rhs1_op1_type
;
5939 rhs1_type
= rhs1_op0_type
;
5942 rhs1_type
= build_nonstandard_integer_type
5943 (TYPE_PRECISION (TREE_TYPE (lhs
)), 1);
5946 rhs1_type
= rhs1_op0_type
;
5951 vectype2
= get_mask_type_for_scalar_type (vinfo
, rhs1_type
);
5953 if (!vectype1
|| !vectype2
)
5956 /* Continue if a conversion is needed. Also continue if we have
5957 a comparison whose vector type would normally be different from
5958 VECTYPE2 when considered in isolation. In that case we'll
5959 replace the comparison with an SSA name (so that we can record
5960 its vector type) and behave as though the comparison was an SSA
5961 name from the outset. */
5962 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1
),
5963 TYPE_VECTOR_SUBPARTS (vectype2
))
5968 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
5969 in place, we can handle it in vectorizable_condition. This avoids
5970 unnecessary promotion stmts and increased vectorization factor. */
5971 if (COMPARISON_CLASS_P (rhs1
)
5972 && INTEGRAL_TYPE_P (rhs1_type
)
5973 && known_le (TYPE_VECTOR_SUBPARTS (vectype1
),
5974 TYPE_VECTOR_SUBPARTS (vectype2
)))
5976 enum vect_def_type dt
;
5977 if (vect_is_simple_use (TREE_OPERAND (rhs1
, 0), vinfo
, &dt
)
5978 && dt
== vect_external_def
5979 && vect_is_simple_use (TREE_OPERAND (rhs1
, 1), vinfo
, &dt
)
5980 && (dt
== vect_external_def
5981 || dt
== vect_constant_def
))
5983 tree wide_scalar_type
= build_nonstandard_integer_type
5984 (vector_element_bits (vectype1
), TYPE_UNSIGNED (rhs1_type
));
5985 tree vectype3
= get_vectype_for_scalar_type (vinfo
,
5987 if (expand_vec_cond_expr_p (vectype1
, vectype3
, TREE_CODE (rhs1
)))
5992 /* If rhs1 is a comparison we need to move it into a
5993 separate statement. */
5994 if (TREE_CODE (rhs1
) != SSA_NAME
)
5996 tmp
= vect_recog_temp_ssa_var (TREE_TYPE (rhs1
), NULL
);
5998 && TYPE_PRECISION (rhs1_op0_type
) != TYPE_PRECISION (rhs1_type
))
5999 rhs1_op0
= build_mask_conversion (vinfo
, rhs1_op0
,
6000 vectype2
, stmt_vinfo
);
6002 && TYPE_PRECISION (rhs1_op1_type
) != TYPE_PRECISION (rhs1_type
))
6003 rhs1_op1
= build_mask_conversion (vinfo
, rhs1_op1
,
6004 vectype2
, stmt_vinfo
);
6005 pattern_stmt
= gimple_build_assign (tmp
, TREE_CODE (rhs1
),
6006 rhs1_op0
, rhs1_op1
);
6008 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vectype2
,
6012 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
6013 TYPE_VECTOR_SUBPARTS (vectype2
)))
6014 tmp
= build_mask_conversion (vinfo
, rhs1
, vectype1
, stmt_vinfo
);
6018 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
6019 pattern_stmt
= gimple_build_assign (lhs
, COND_EXPR
, tmp
,
6020 gimple_assign_rhs2 (last_stmt
),
6021 gimple_assign_rhs3 (last_stmt
));
6023 *type_out
= vectype1
;
6024 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
6026 return pattern_stmt
;
6029 /* Now check for binary boolean operations requiring conversion for
6031 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs
)))
6034 if (rhs_code
!= BIT_IOR_EXPR
6035 && rhs_code
!= BIT_XOR_EXPR
6036 && rhs_code
!= BIT_AND_EXPR
6037 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
)
6040 rhs2
= gimple_assign_rhs2 (last_stmt
);
6042 rhs1_type
= integer_type_for_mask (rhs1
, vinfo
);
6043 rhs2_type
= integer_type_for_mask (rhs2
, vinfo
);
6045 if (!rhs1_type
|| !rhs2_type
6046 || TYPE_PRECISION (rhs1_type
) == TYPE_PRECISION (rhs2_type
))
6049 if (TYPE_PRECISION (rhs1_type
) < TYPE_PRECISION (rhs2_type
))
6051 vectype1
= get_mask_type_for_scalar_type (vinfo
, rhs1_type
);
6054 rhs2
= build_mask_conversion (vinfo
, rhs2
, vectype1
, stmt_vinfo
);
6058 vectype1
= get_mask_type_for_scalar_type (vinfo
, rhs2_type
);
6061 rhs1
= build_mask_conversion (vinfo
, rhs1
, vectype1
, stmt_vinfo
);
6064 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
6065 pattern_stmt
= gimple_build_assign (lhs
, rhs_code
, rhs1
, rhs2
);
6067 *type_out
= vectype1
;
6068 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
6070 return pattern_stmt
;
6073 /* STMT_INFO is a load or store. If the load or store is conditional, return
6074 the boolean condition under which it occurs, otherwise return null. */
6077 vect_get_load_store_mask (stmt_vec_info stmt_info
)
6079 if (gassign
*def_assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
6081 gcc_assert (gimple_assign_single_p (def_assign
));
6085 if (gcall
*def_call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
6087 internal_fn ifn
= gimple_call_internal_fn (def_call
);
6088 int mask_index
= internal_fn_mask_index (ifn
);
6089 return gimple_call_arg (def_call
, mask_index
);
6095 /* Return MASK if MASK is suitable for masking an operation on vectors
6096 of type VECTYPE, otherwise convert it into such a form and return
6097 the result. Associate any conversion statements with STMT_INFO's
6101 vect_convert_mask_for_vectype (tree mask
, tree vectype
,
6102 stmt_vec_info stmt_info
, vec_info
*vinfo
)
6104 tree mask_type
= integer_type_for_mask (mask
, vinfo
);
6107 tree mask_vectype
= get_mask_type_for_scalar_type (vinfo
, mask_type
);
6109 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
),
6110 TYPE_VECTOR_SUBPARTS (mask_vectype
)))
6111 mask
= build_mask_conversion (vinfo
, mask
, vectype
, stmt_info
);
6116 /* Return the equivalent of:
6118 fold_convert (TYPE, VALUE)
6120 with the expectation that the operation will be vectorized.
6121 If new statements are needed, add them as pattern statements
6125 vect_add_conversion_to_pattern (vec_info
*vinfo
,
6126 tree type
, tree value
, stmt_vec_info stmt_info
)
6128 if (useless_type_conversion_p (type
, TREE_TYPE (value
)))
6131 tree new_value
= vect_recog_temp_ssa_var (type
, NULL
);
6132 gassign
*conversion
= gimple_build_assign (new_value
, CONVERT_EXPR
, value
);
6133 append_pattern_def_seq (vinfo
, stmt_info
, conversion
,
6134 get_vectype_for_scalar_type (vinfo
, type
));
6138 /* Try to convert STMT_INFO into a call to a gather load or scatter store
6139 internal function. Return the final statement on success and set
6140 *TYPE_OUT to the vector type being loaded or stored.
6142 This function only handles gathers and scatters that were recognized
6143 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
6146 vect_recog_gather_scatter_pattern (vec_info
*vinfo
,
6147 stmt_vec_info stmt_info
, tree
*type_out
)
6149 /* Currently we only support this for loop vectorization. */
6150 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6154 /* Make sure that we're looking at a gather load or scatter store. */
6155 data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
6156 if (!dr
|| !STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
6159 /* Get the boolean that controls whether the load or store happens.
6160 This is null if the operation is unconditional. */
6161 tree mask
= vect_get_load_store_mask (stmt_info
);
6163 /* Make sure that the target supports an appropriate internal
6164 function for the gather/scatter operation. */
6165 gather_scatter_info gs_info
;
6166 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, &gs_info
)
6167 || gs_info
.ifn
== IFN_LAST
)
6170 /* Convert the mask to the right form. */
6171 tree gs_vectype
= get_vectype_for_scalar_type (loop_vinfo
,
6172 gs_info
.element_type
);
6174 mask
= vect_convert_mask_for_vectype (mask
, gs_vectype
, stmt_info
,
6176 else if (gs_info
.ifn
== IFN_MASK_SCATTER_STORE
6177 || gs_info
.ifn
== IFN_MASK_GATHER_LOAD
6178 || gs_info
.ifn
== IFN_MASK_LEN_SCATTER_STORE
6179 || gs_info
.ifn
== IFN_MASK_LEN_GATHER_LOAD
)
6180 mask
= build_int_cst (TREE_TYPE (truth_type_for (gs_vectype
)), -1);
6182 /* Get the invariant base and non-invariant offset, converting the
6183 latter to the same width as the vector elements. */
6184 tree base
= gs_info
.base
;
6185 tree offset_type
= TREE_TYPE (gs_info
.offset_vectype
);
6186 tree offset
= vect_add_conversion_to_pattern (vinfo
, offset_type
,
6187 gs_info
.offset
, stmt_info
);
6189 /* Build the new pattern statement. */
6190 tree scale
= size_int (gs_info
.scale
);
6191 gcall
*pattern_stmt
;
6192 if (DR_IS_READ (dr
))
6194 tree zero
= build_zero_cst (gs_info
.element_type
);
6196 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 5, base
,
6197 offset
, scale
, zero
, mask
);
6199 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 4, base
,
6200 offset
, scale
, zero
);
6201 tree load_lhs
= vect_recog_temp_ssa_var (gs_info
.element_type
, NULL
);
6202 gimple_call_set_lhs (pattern_stmt
, load_lhs
);
6206 tree rhs
= vect_get_store_rhs (stmt_info
);
6208 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 5,
6209 base
, offset
, scale
, rhs
,
6212 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 4,
6213 base
, offset
, scale
, rhs
);
6215 gimple_call_set_nothrow (pattern_stmt
, true);
6217 /* Copy across relevant vectorization info and associate DR with the
6218 new pattern statement instead of the original statement. */
6219 stmt_vec_info pattern_stmt_info
= loop_vinfo
->add_stmt (pattern_stmt
);
6220 loop_vinfo
->move_dr (pattern_stmt_info
, stmt_info
);
6222 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6223 *type_out
= vectype
;
6224 vect_pattern_detected ("gather/scatter pattern", stmt_info
->stmt
);
6226 return pattern_stmt
;
6229 /* Return true if TYPE is a non-boolean integer type. These are the types
6230 that we want to consider for narrowing. */
6233 vect_narrowable_type_p (tree type
)
6235 return INTEGRAL_TYPE_P (type
) && !VECT_SCALAR_BOOLEAN_TYPE_P (type
);
6238 /* Return true if the operation given by CODE can be truncated to N bits
6239 when only N bits of the output are needed. This is only true if bit N+1
6240 of the inputs has no effect on the low N bits of the result. */
6243 vect_truncatable_operation_p (tree_code code
)
6261 /* Record that STMT_INFO could be changed from operating on TYPE to
6262 operating on a type with the precision and sign given by PRECISION
6263 and SIGN respectively. PRECISION is an arbitrary bit precision;
6264 it might not be a whole number of bytes. */
6267 vect_set_operation_type (stmt_vec_info stmt_info
, tree type
,
6268 unsigned int precision
, signop sign
)
6270 /* Round the precision up to a whole number of bytes. */
6271 precision
= vect_element_precision (precision
);
6272 if (precision
< TYPE_PRECISION (type
)
6273 && (!stmt_info
->operation_precision
6274 || stmt_info
->operation_precision
> precision
))
6276 stmt_info
->operation_precision
= precision
;
6277 stmt_info
->operation_sign
= sign
;
6281 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
6282 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
6283 is an arbitrary bit precision; it might not be a whole number of bytes. */
6286 vect_set_min_input_precision (stmt_vec_info stmt_info
, tree type
,
6287 unsigned int min_input_precision
)
6289 /* This operation in isolation only requires the inputs to have
6290 MIN_INPUT_PRECISION of precision, However, that doesn't mean
6291 that MIN_INPUT_PRECISION is a natural precision for the chain
6292 as a whole. E.g. consider something like:
6294 unsigned short *x, *y;
6295 *y = ((*x & 0xf0) >> 4) | (*y << 4);
6297 The right shift can be done on unsigned chars, and only requires the
6298 result of "*x & 0xf0" to be done on unsigned chars. But taking that
6299 approach would mean turning a natural chain of single-vector unsigned
6300 short operations into one that truncates "*x" and then extends
6301 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
6302 operation and one vector for each unsigned char operation.
6303 This would be a significant pessimization.
6305 Instead only propagate the maximum of this precision and the precision
6306 required by the users of the result. This means that we don't pessimize
6307 the case above but continue to optimize things like:
6311 *y = ((*x & 0xf0) >> 4) | (*y << 4);
6313 Here we would truncate two vectors of *x to a single vector of
6314 unsigned chars and use single-vector unsigned char operations for
6315 everything else, rather than doing two unsigned short copies of
6316 "(*x & 0xf0) >> 4" and then truncating the result. */
6317 min_input_precision
= MAX (min_input_precision
,
6318 stmt_info
->min_output_precision
);
6320 if (min_input_precision
< TYPE_PRECISION (type
)
6321 && (!stmt_info
->min_input_precision
6322 || stmt_info
->min_input_precision
> min_input_precision
))
6323 stmt_info
->min_input_precision
= min_input_precision
;
6326 /* Subroutine of vect_determine_min_output_precision. Return true if
6327 we can calculate a reduced number of output bits for STMT_INFO,
6328 whose result is LHS. */
6331 vect_determine_min_output_precision_1 (vec_info
*vinfo
,
6332 stmt_vec_info stmt_info
, tree lhs
)
6334 /* Take the maximum precision required by users of the result. */
6335 unsigned int precision
= 0;
6336 imm_use_iterator iter
;
6338 FOR_EACH_IMM_USE_FAST (use
, iter
, lhs
)
6340 gimple
*use_stmt
= USE_STMT (use
);
6341 if (is_gimple_debug (use_stmt
))
6343 stmt_vec_info use_stmt_info
= vinfo
->lookup_stmt (use_stmt
);
6344 if (!use_stmt_info
|| !use_stmt_info
->min_input_precision
)
6346 /* The input precision recorded for COND_EXPRs applies only to the
6347 "then" and "else" values. */
6348 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
6350 && gimple_assign_rhs_code (assign
) == COND_EXPR
6351 && use
->use
!= gimple_assign_rhs2_ptr (assign
)
6352 && use
->use
!= gimple_assign_rhs3_ptr (assign
))
6354 precision
= MAX (precision
, use_stmt_info
->min_input_precision
);
6357 if (dump_enabled_p ())
6358 dump_printf_loc (MSG_NOTE
, vect_location
,
6359 "only the low %d bits of %T are significant\n",
6361 stmt_info
->min_output_precision
= precision
;
6365 /* Calculate min_output_precision for STMT_INFO. */
6368 vect_determine_min_output_precision (vec_info
*vinfo
, stmt_vec_info stmt_info
)
6370 /* We're only interested in statements with a narrowable result. */
6371 tree lhs
= gimple_get_lhs (stmt_info
->stmt
);
6373 || TREE_CODE (lhs
) != SSA_NAME
6374 || !vect_narrowable_type_p (TREE_TYPE (lhs
)))
6377 if (!vect_determine_min_output_precision_1 (vinfo
, stmt_info
, lhs
))
6378 stmt_info
->min_output_precision
= TYPE_PRECISION (TREE_TYPE (lhs
));
6381 /* Use range information to decide whether STMT (described by STMT_INFO)
6382 could be done in a narrower type. This is effectively a forward
6383 propagation, since it uses context-independent information that applies
6384 to all users of an SSA name. */
6387 vect_determine_precisions_from_range (stmt_vec_info stmt_info
, gassign
*stmt
)
6389 tree lhs
= gimple_assign_lhs (stmt
);
6390 if (!lhs
|| TREE_CODE (lhs
) != SSA_NAME
)
6393 tree type
= TREE_TYPE (lhs
);
6394 if (!vect_narrowable_type_p (type
))
6397 /* First see whether we have any useful range information for the result. */
6398 unsigned int precision
= TYPE_PRECISION (type
);
6399 signop sign
= TYPE_SIGN (type
);
6400 wide_int min_value
, max_value
;
6401 if (!vect_get_range_info (lhs
, &min_value
, &max_value
))
6404 tree_code code
= gimple_assign_rhs_code (stmt
);
6405 unsigned int nops
= gimple_num_ops (stmt
);
6407 if (!vect_truncatable_operation_p (code
))
6408 /* Check that all relevant input operands are compatible, and update
6409 [MIN_VALUE, MAX_VALUE] to include their ranges. */
6410 for (unsigned int i
= 1; i
< nops
; ++i
)
6412 tree op
= gimple_op (stmt
, i
);
6413 if (TREE_CODE (op
) == INTEGER_CST
)
6415 /* Don't require the integer to have RHS_TYPE (which it might
6416 not for things like shift amounts, etc.), but do require it
6418 if (!int_fits_type_p (op
, type
))
6421 min_value
= wi::min (min_value
, wi::to_wide (op
, precision
), sign
);
6422 max_value
= wi::max (max_value
, wi::to_wide (op
, precision
), sign
);
6424 else if (TREE_CODE (op
) == SSA_NAME
)
6426 /* Ignore codes that don't take uniform arguments. */
6427 if (!types_compatible_p (TREE_TYPE (op
), type
))
6430 wide_int op_min_value
, op_max_value
;
6431 if (!vect_get_range_info (op
, &op_min_value
, &op_max_value
))
6434 min_value
= wi::min (min_value
, op_min_value
, sign
);
6435 max_value
= wi::max (max_value
, op_max_value
, sign
);
6441 /* Try to switch signed types for unsigned types if we can.
6442 This is better for two reasons. First, unsigned ops tend
6443 to be cheaper than signed ops. Second, it means that we can
6447 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
6452 unsigned short res_1 = (unsigned short) c & 0xff00;
6453 int res = (int) res_1;
6455 where the intermediate result res_1 has unsigned rather than
6457 if (sign
== SIGNED
&& !wi::neg_p (min_value
))
6460 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
6461 unsigned int precision1
= wi::min_precision (min_value
, sign
);
6462 unsigned int precision2
= wi::min_precision (max_value
, sign
);
6463 unsigned int value_precision
= MAX (precision1
, precision2
);
6464 if (value_precision
>= precision
)
6467 if (dump_enabled_p ())
6468 dump_printf_loc (MSG_NOTE
, vect_location
, "can narrow to %s:%d"
6469 " without loss of precision: %G",
6470 sign
== SIGNED
? "signed" : "unsigned",
6471 value_precision
, (gimple
*) stmt
);
6473 vect_set_operation_type (stmt_info
, type
, value_precision
, sign
);
6474 vect_set_min_input_precision (stmt_info
, type
, value_precision
);
6477 /* Use information about the users of STMT's result to decide whether
6478 STMT (described by STMT_INFO) could be done in a narrower type.
6479 This is effectively a backward propagation. */
6482 vect_determine_precisions_from_users (stmt_vec_info stmt_info
, gassign
*stmt
)
6484 tree_code code
= gimple_assign_rhs_code (stmt
);
6485 unsigned int opno
= (code
== COND_EXPR
? 2 : 1);
6486 tree type
= TREE_TYPE (gimple_op (stmt
, opno
));
6487 if (!vect_narrowable_type_p (type
))
6490 unsigned int precision
= TYPE_PRECISION (type
);
6491 unsigned int operation_precision
, min_input_precision
;
6495 /* Only the bits that contribute to the output matter. Don't change
6496 the precision of the operation itself. */
6497 operation_precision
= precision
;
6498 min_input_precision
= stmt_info
->min_output_precision
;
6504 tree shift
= gimple_assign_rhs2 (stmt
);
6505 if (TREE_CODE (shift
) != INTEGER_CST
6506 || !wi::ltu_p (wi::to_widest (shift
), precision
))
6508 unsigned int const_shift
= TREE_INT_CST_LOW (shift
);
6509 if (code
== LSHIFT_EXPR
)
6511 /* Avoid creating an undefined shift.
6513 ??? We could instead use min_output_precision as-is and
6514 optimize out-of-range shifts to zero. However, only
6515 degenerate testcases shift away all their useful input data,
6516 and it isn't natural to drop input operations in the middle
6517 of vectorization. This sort of thing should really be
6518 handled before vectorization. */
6519 operation_precision
= MAX (stmt_info
->min_output_precision
,
6521 /* We need CONST_SHIFT fewer bits of the input. */
6522 min_input_precision
= (MAX (operation_precision
, const_shift
)
6527 /* We need CONST_SHIFT extra bits to do the operation. */
6528 operation_precision
= (stmt_info
->min_output_precision
6530 min_input_precision
= operation_precision
;
6536 if (vect_truncatable_operation_p (code
))
6538 /* Input bit N has no effect on output bits N-1 and lower. */
6539 operation_precision
= stmt_info
->min_output_precision
;
6540 min_input_precision
= operation_precision
;
6546 if (operation_precision
< precision
)
6548 if (dump_enabled_p ())
6549 dump_printf_loc (MSG_NOTE
, vect_location
, "can narrow to %s:%d"
6550 " without affecting users: %G",
6551 TYPE_UNSIGNED (type
) ? "unsigned" : "signed",
6552 operation_precision
, (gimple
*) stmt
);
6553 vect_set_operation_type (stmt_info
, type
, operation_precision
,
6556 vect_set_min_input_precision (stmt_info
, type
, min_input_precision
);
6559 /* Return true if the statement described by STMT_INFO sets a boolean
6560 SSA_NAME and if we know how to vectorize this kind of statement using
6561 vector mask types. */
6564 possible_vector_mask_operation_p (stmt_vec_info stmt_info
)
6566 tree lhs
= gimple_get_lhs (stmt_info
->stmt
);
6568 || TREE_CODE (lhs
) != SSA_NAME
6569 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs
)))
6572 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
6574 tree_code rhs_code
= gimple_assign_rhs_code (assign
);
6586 return TREE_CODE_CLASS (rhs_code
) == tcc_comparison
;
6589 else if (is_a
<gphi
*> (stmt_info
->stmt
))
6594 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
6595 a vector mask type instead of a normal vector type. Record the
6596 result in STMT_INFO->mask_precision. */
6599 vect_determine_mask_precision (vec_info
*vinfo
, stmt_vec_info stmt_info
)
6601 if (!possible_vector_mask_operation_p (stmt_info
))
6604 /* If at least one boolean input uses a vector mask type,
6605 pick the mask type with the narrowest elements.
6607 ??? This is the traditional behavior. It should always produce
6608 the smallest number of operations, but isn't necessarily the
6609 optimal choice. For example, if we have:
6615 - the user of a wants it to have a mask type for 16-bit elements (M16)
6617 - c uses a mask type for 8-bit elements (M8)
6619 then picking M8 gives:
6621 - 1 M16->M8 pack for b
6623 - 2 M8->M16 unpacks for the user of a
6625 whereas picking M16 would have given:
6627 - 2 M8->M16 unpacks for c
6630 The number of operations are equal, but M16 would have given
6631 a shorter dependency chain and allowed more ILP. */
6632 unsigned int precision
= ~0U;
6633 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
6635 unsigned int nops
= gimple_num_ops (assign
);
6636 for (unsigned int i
= 1; i
< nops
; ++i
)
6638 tree rhs
= gimple_op (assign
, i
);
6639 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs
)))
6642 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (rhs
);
6644 /* Don't let external or constant operands influence the choice.
6645 We can convert them to whichever vector type we pick. */
6648 if (def_stmt_info
->mask_precision
)
6650 if (precision
> def_stmt_info
->mask_precision
)
6651 precision
= def_stmt_info
->mask_precision
;
6655 /* If the statement compares two values that shouldn't use vector masks,
6656 try comparing the values as normal scalars instead. */
6657 tree_code rhs_code
= gimple_assign_rhs_code (assign
);
6658 if (precision
== ~0U
6659 && TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
6661 tree rhs1_type
= TREE_TYPE (gimple_assign_rhs1 (assign
));
6663 tree vectype
, mask_type
;
6664 if (is_a
<scalar_mode
> (TYPE_MODE (rhs1_type
), &mode
)
6665 && (vectype
= get_vectype_for_scalar_type (vinfo
, rhs1_type
))
6666 && (mask_type
= get_mask_type_for_scalar_type (vinfo
, rhs1_type
))
6667 && expand_vec_cmp_expr_p (vectype
, mask_type
, rhs_code
))
6668 precision
= GET_MODE_BITSIZE (mode
);
6673 gphi
*phi
= as_a
<gphi
*> (stmt_info
->stmt
);
6674 for (unsigned i
= 0; i
< gimple_phi_num_args (phi
); ++i
)
6676 tree rhs
= gimple_phi_arg_def (phi
, i
);
6678 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (rhs
);
6680 /* Don't let external or constant operands influence the choice.
6681 We can convert them to whichever vector type we pick. */
6684 if (def_stmt_info
->mask_precision
)
6686 if (precision
> def_stmt_info
->mask_precision
)
6687 precision
= def_stmt_info
->mask_precision
;
6692 if (dump_enabled_p ())
6694 if (precision
== ~0U)
6695 dump_printf_loc (MSG_NOTE
, vect_location
,
6696 "using normal nonmask vectors for %G",
6699 dump_printf_loc (MSG_NOTE
, vect_location
,
6700 "using boolean precision %d for %G",
6701 precision
, stmt_info
->stmt
);
6704 stmt_info
->mask_precision
= precision
;
6707 /* Handle vect_determine_precisions for STMT_INFO, given that we
6708 have already done so for the users of its result. */
6711 vect_determine_stmt_precisions (vec_info
*vinfo
, stmt_vec_info stmt_info
)
6713 vect_determine_min_output_precision (vinfo
, stmt_info
);
6714 if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
6716 vect_determine_precisions_from_range (stmt_info
, stmt
);
6717 vect_determine_precisions_from_users (stmt_info
, stmt
);
6721 /* Walk backwards through the vectorizable region to determine the
6722 values of these fields:
6724 - min_output_precision
6725 - min_input_precision
6726 - operation_precision
6727 - operation_sign. */
6730 vect_determine_precisions (vec_info
*vinfo
)
6732 DUMP_VECT_SCOPE ("vect_determine_precisions");
6734 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
6736 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6737 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
6738 unsigned int nbbs
= loop
->num_nodes
;
6740 for (unsigned int i
= 0; i
< nbbs
; i
++)
6742 basic_block bb
= bbs
[i
];
6743 for (auto gsi
= gsi_start_phis (bb
);
6744 !gsi_end_p (gsi
); gsi_next (&gsi
))
6746 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi
.phi ());
6748 vect_determine_mask_precision (vinfo
, stmt_info
);
6750 for (auto si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
6751 if (!is_gimple_debug (gsi_stmt (si
)))
6752 vect_determine_mask_precision
6753 (vinfo
, vinfo
->lookup_stmt (gsi_stmt (si
)));
6755 for (unsigned int i
= 0; i
< nbbs
; i
++)
6757 basic_block bb
= bbs
[nbbs
- i
- 1];
6758 for (gimple_stmt_iterator si
= gsi_last_bb (bb
);
6759 !gsi_end_p (si
); gsi_prev (&si
))
6760 if (!is_gimple_debug (gsi_stmt (si
)))
6761 vect_determine_stmt_precisions
6762 (vinfo
, vinfo
->lookup_stmt (gsi_stmt (si
)));
6763 for (auto gsi
= gsi_start_phis (bb
);
6764 !gsi_end_p (gsi
); gsi_next (&gsi
))
6766 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi
.phi ());
6768 vect_determine_stmt_precisions (vinfo
, stmt_info
);
6774 bb_vec_info bb_vinfo
= as_a
<bb_vec_info
> (vinfo
);
6775 for (unsigned i
= 0; i
< bb_vinfo
->bbs
.length (); ++i
)
6777 basic_block bb
= bb_vinfo
->bbs
[i
];
6778 for (auto gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
6780 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi
.phi ());
6781 if (stmt_info
&& STMT_VINFO_VECTORIZABLE (stmt_info
))
6782 vect_determine_mask_precision (vinfo
, stmt_info
);
6784 for (auto gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
6786 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi_stmt (gsi
));
6787 if (stmt_info
&& STMT_VINFO_VECTORIZABLE (stmt_info
))
6788 vect_determine_mask_precision (vinfo
, stmt_info
);
6791 for (int i
= bb_vinfo
->bbs
.length () - 1; i
!= -1; --i
)
6793 for (gimple_stmt_iterator gsi
= gsi_last_bb (bb_vinfo
->bbs
[i
]);
6794 !gsi_end_p (gsi
); gsi_prev (&gsi
))
6796 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi_stmt (gsi
));
6797 if (stmt_info
&& STMT_VINFO_VECTORIZABLE (stmt_info
))
6798 vect_determine_stmt_precisions (vinfo
, stmt_info
);
6800 for (auto gsi
= gsi_start_phis (bb_vinfo
->bbs
[i
]);
6801 !gsi_end_p (gsi
); gsi_next (&gsi
))
6803 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi
.phi ());
6804 if (stmt_info
&& STMT_VINFO_VECTORIZABLE (stmt_info
))
6805 vect_determine_stmt_precisions (vinfo
, stmt_info
);
6811 typedef gimple
*(*vect_recog_func_ptr
) (vec_info
*, stmt_vec_info
, tree
*);
6813 struct vect_recog_func
6815 vect_recog_func_ptr fn
;
6819 /* Note that ordering matters - the first pattern matching on a stmt is
6820 taken which means usually the more complex one needs to preceed the
6821 less comples onex (widen_sum only after dot_prod or sad for example). */
6822 static vect_recog_func vect_vect_recog_func_ptrs
[] = {
6823 { vect_recog_bitfield_ref_pattern
, "bitfield_ref" },
6824 { vect_recog_bit_insert_pattern
, "bit_insert" },
6825 { vect_recog_abd_pattern
, "abd" },
6826 { vect_recog_over_widening_pattern
, "over_widening" },
6827 /* Must come after over_widening, which narrows the shift as much as
6828 possible beforehand. */
6829 { vect_recog_average_pattern
, "average" },
6830 { vect_recog_cond_expr_convert_pattern
, "cond_expr_convert" },
6831 { vect_recog_mulhs_pattern
, "mult_high" },
6832 { vect_recog_cast_forwprop_pattern
, "cast_forwprop" },
6833 { vect_recog_widen_mult_pattern
, "widen_mult" },
6834 { vect_recog_dot_prod_pattern
, "dot_prod" },
6835 { vect_recog_sad_pattern
, "sad" },
6836 { vect_recog_widen_sum_pattern
, "widen_sum" },
6837 { vect_recog_pow_pattern
, "pow" },
6838 { vect_recog_popcount_clz_ctz_ffs_pattern
, "popcount_clz_ctz_ffs" },
6839 { vect_recog_ctz_ffs_pattern
, "ctz_ffs" },
6840 { vect_recog_widen_shift_pattern
, "widen_shift" },
6841 { vect_recog_rotate_pattern
, "rotate" },
6842 { vect_recog_vector_vector_shift_pattern
, "vector_vector_shift" },
6843 { vect_recog_divmod_pattern
, "divmod" },
6844 { vect_recog_mult_pattern
, "mult" },
6845 { vect_recog_mixed_size_cond_pattern
, "mixed_size_cond" },
6846 { vect_recog_bool_pattern
, "bool" },
6847 /* This must come before mask conversion, and includes the parts
6848 of mask conversion that are needed for gather and scatter
6849 internal functions. */
6850 { vect_recog_gather_scatter_pattern
, "gather_scatter" },
6851 { vect_recog_mask_conversion_pattern
, "mask_conversion" },
6852 { vect_recog_widen_plus_pattern
, "widen_plus" },
6853 { vect_recog_widen_minus_pattern
, "widen_minus" },
6854 { vect_recog_widen_abd_pattern
, "widen_abd" },
6855 /* These must come after the double widening ones. */
6858 const unsigned int NUM_PATTERNS
= ARRAY_SIZE (vect_vect_recog_func_ptrs
);
6860 /* Mark statements that are involved in a pattern. */
6863 vect_mark_pattern_stmts (vec_info
*vinfo
,
6864 stmt_vec_info orig_stmt_info
, gimple
*pattern_stmt
,
6865 tree pattern_vectype
)
6867 stmt_vec_info orig_stmt_info_saved
= orig_stmt_info
;
6868 gimple
*def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
6870 gimple
*orig_pattern_stmt
= NULL
;
6871 if (is_pattern_stmt_p (orig_stmt_info
))
6873 /* We're replacing a statement in an existing pattern definition
6875 orig_pattern_stmt
= orig_stmt_info
->stmt
;
6876 if (dump_enabled_p ())
6877 dump_printf_loc (MSG_NOTE
, vect_location
,
6878 "replacing earlier pattern %G", orig_pattern_stmt
);
6880 /* To keep the book-keeping simple, just swap the lhs of the
6881 old and new statements, so that the old one has a valid but
6883 tree old_lhs
= gimple_get_lhs (orig_pattern_stmt
);
6884 gimple_set_lhs (orig_pattern_stmt
, gimple_get_lhs (pattern_stmt
));
6885 gimple_set_lhs (pattern_stmt
, old_lhs
);
6887 if (dump_enabled_p ())
6888 dump_printf_loc (MSG_NOTE
, vect_location
, "with %G", pattern_stmt
);
6890 /* Switch to the statement that ORIG replaces. */
6891 orig_stmt_info
= STMT_VINFO_RELATED_STMT (orig_stmt_info
);
6893 /* We shouldn't be replacing the main pattern statement. */
6894 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info
)->stmt
6895 != orig_pattern_stmt
);
6899 for (gimple_stmt_iterator si
= gsi_start (def_seq
);
6900 !gsi_end_p (si
); gsi_next (&si
))
6902 if (dump_enabled_p ())
6903 dump_printf_loc (MSG_NOTE
, vect_location
,
6904 "extra pattern stmt: %G", gsi_stmt (si
));
6905 stmt_vec_info pattern_stmt_info
6906 = vect_init_pattern_stmt (vinfo
, gsi_stmt (si
),
6907 orig_stmt_info
, pattern_vectype
);
6908 /* Stmts in the def sequence are not vectorizable cycle or
6909 induction defs, instead they should all be vect_internal_def
6910 feeding the main pattern stmt which retains this def type. */
6911 STMT_VINFO_DEF_TYPE (pattern_stmt_info
) = vect_internal_def
;
6914 if (orig_pattern_stmt
)
6916 vect_init_pattern_stmt (vinfo
, pattern_stmt
,
6917 orig_stmt_info
, pattern_vectype
);
6919 /* Insert all the new pattern statements before the original one. */
6920 gimple_seq
*orig_def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
6921 gimple_stmt_iterator gsi
= gsi_for_stmt (orig_pattern_stmt
,
6923 gsi_insert_seq_before_without_update (&gsi
, def_seq
, GSI_SAME_STMT
);
6924 gsi_insert_before_without_update (&gsi
, pattern_stmt
, GSI_SAME_STMT
);
6926 /* Remove the pattern statement that this new pattern replaces. */
6927 gsi_remove (&gsi
, false);
6930 vect_set_pattern_stmt (vinfo
,
6931 pattern_stmt
, orig_stmt_info
, pattern_vectype
);
6933 /* Transfer reduction path info to the pattern. */
6934 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved
) != -1)
6937 if (!gimple_extract_op (orig_stmt_info_saved
->stmt
, &op
))
6939 tree lookfor
= op
.ops
[STMT_VINFO_REDUC_IDX (orig_stmt_info
)];
6940 /* Search the pattern def sequence and the main pattern stmt. Note
6941 we may have inserted all into a containing pattern def sequence
6942 so the following is a bit awkward. */
6943 gimple_stmt_iterator si
;
6947 si
= gsi_start (def_seq
);
6959 if (gimple_extract_op (s
, &op
))
6960 for (unsigned i
= 0; i
< op
.num_ops
; ++i
)
6961 if (op
.ops
[i
] == lookfor
)
6963 STMT_VINFO_REDUC_IDX (vinfo
->lookup_stmt (s
)) = i
;
6964 lookfor
= gimple_get_lhs (s
);
6968 if (s
== pattern_stmt
)
6970 if (!found
&& dump_enabled_p ())
6971 dump_printf_loc (MSG_NOTE
, vect_location
,
6972 "failed to update reduction index.\n");
6980 if (s
== pattern_stmt
)
6981 /* Found the end inside a bigger pattern def seq. */
6990 /* Function vect_pattern_recog_1
6993 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
6994 computation pattern.
6995 STMT_INFO: A stmt from which the pattern search should start.
6997 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
6998 a sequence of statements that has the same functionality and can be
6999 used to replace STMT_INFO. It returns the last statement in the sequence
7000 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
7001 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
7002 statement, having first checked that the target supports the new operation
7005 This function also does some bookkeeping, as explained in the documentation
7006 for vect_recog_pattern. */
7009 vect_pattern_recog_1 (vec_info
*vinfo
,
7010 vect_recog_func
*recog_func
, stmt_vec_info stmt_info
)
7012 gimple
*pattern_stmt
;
7013 loop_vec_info loop_vinfo
;
7014 tree pattern_vectype
;
7016 /* If this statement has already been replaced with pattern statements,
7017 leave the original statement alone, since the first match wins.
7018 Instead try to match against the definition statements that feed
7019 the main pattern statement. */
7020 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
7022 gimple_stmt_iterator gsi
;
7023 for (gsi
= gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
7024 !gsi_end_p (gsi
); gsi_next (&gsi
))
7025 vect_pattern_recog_1 (vinfo
, recog_func
,
7026 vinfo
->lookup_stmt (gsi_stmt (gsi
)));
7030 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
7031 pattern_stmt
= recog_func
->fn (vinfo
, stmt_info
, &pattern_vectype
);
7034 /* Clear any half-formed pattern definition sequence. */
7035 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
7039 loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7041 /* Found a vectorizable pattern. */
7042 if (dump_enabled_p ())
7043 dump_printf_loc (MSG_NOTE
, vect_location
,
7044 "%s pattern recognized: %G",
7045 recog_func
->name
, pattern_stmt
);
7047 /* Mark the stmts that are involved in the pattern. */
7048 vect_mark_pattern_stmts (vinfo
, stmt_info
, pattern_stmt
, pattern_vectype
);
7050 /* Patterns cannot be vectorized using SLP, because they change the order of
7055 stmt_vec_info
*elem_ptr
;
7056 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo
), ix
, ix2
,
7057 elem_ptr
, *elem_ptr
== stmt_info
);
7062 /* Function vect_pattern_recog
7065 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
7068 Output - for each computation idiom that is detected we create a new stmt
7069 that provides the same functionality and that can be vectorized. We
7070 also record some information in the struct_stmt_info of the relevant
7071 stmts, as explained below:
7073 At the entry to this function we have the following stmts, with the
7074 following initial value in the STMT_VINFO fields:
7076 stmt in_pattern_p related_stmt vec_stmt
7077 S1: a_i = .... - - -
7078 S2: a_2 = ..use(a_i).. - - -
7079 S3: a_1 = ..use(a_2).. - - -
7080 S4: a_0 = ..use(a_1).. - - -
7081 S5: ... = ..use(a_0).. - - -
7083 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
7084 represented by a single stmt. We then:
7085 - create a new stmt S6 equivalent to the pattern (the stmt is not
7086 inserted into the code)
7087 - fill in the STMT_VINFO fields as follows:
7089 in_pattern_p related_stmt vec_stmt
7090 S1: a_i = .... - - -
7091 S2: a_2 = ..use(a_i).. - - -
7092 S3: a_1 = ..use(a_2).. - - -
7093 S4: a_0 = ..use(a_1).. true S6 -
7094 '---> S6: a_new = .... - S4 -
7095 S5: ... = ..use(a_0).. - - -
7097 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
7098 to each other through the RELATED_STMT field).
7100 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
7101 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
7102 remain irrelevant unless used by stmts other than S4.
7104 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
7105 (because they are marked as irrelevant). It will vectorize S6, and record
7106 a pointer to the new vector stmt VS6 from S6 (as usual).
7107 S4 will be skipped, and S5 will be vectorized as usual:
7109 in_pattern_p related_stmt vec_stmt
7110 S1: a_i = .... - - -
7111 S2: a_2 = ..use(a_i).. - - -
7112 S3: a_1 = ..use(a_2).. - - -
7113 > VS6: va_new = .... - - -
7114 S4: a_0 = ..use(a_1).. true S6 VS6
7115 '---> S6: a_new = .... - S4 VS6
7116 > VS5: ... = ..vuse(va_new).. - - -
7117 S5: ... = ..use(a_0).. - - -
7119 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
7120 elsewhere), and we'll end up with:
7123 VS5: ... = ..vuse(va_new)..
7125 In case of more than one pattern statements, e.g., widen-mult with
7129 S2 a_T = (TYPE) a_t;
7130 '--> S3: a_it = (interm_type) a_t;
7131 S4 prod_T = a_T * CONST;
7132 '--> S5: prod_T' = a_it w* CONST;
7134 there may be other users of a_T outside the pattern. In that case S2 will
7135 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
7136 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
7137 be recorded in S3. */
7140 vect_pattern_recog (vec_info
*vinfo
)
7145 gimple_stmt_iterator si
;
7148 vect_determine_precisions (vinfo
);
7150 DUMP_VECT_SCOPE ("vect_pattern_recog");
7152 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
7154 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7155 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
7156 nbbs
= loop
->num_nodes
;
7158 /* Scan through the loop stmts, applying the pattern recognition
7159 functions starting at each stmt visited: */
7160 for (i
= 0; i
< nbbs
; i
++)
7162 basic_block bb
= bbs
[i
];
7163 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
7165 if (is_gimple_debug (gsi_stmt (si
)))
7167 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi_stmt (si
));
7168 /* Scan over all generic vect_recog_xxx_pattern functions. */
7169 for (j
= 0; j
< NUM_PATTERNS
; j
++)
7170 vect_pattern_recog_1 (vinfo
, &vect_vect_recog_func_ptrs
[j
],
7177 bb_vec_info bb_vinfo
= as_a
<bb_vec_info
> (vinfo
);
7178 for (unsigned i
= 0; i
< bb_vinfo
->bbs
.length (); ++i
)
7179 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb_vinfo
->bbs
[i
]);
7180 !gsi_end_p (gsi
); gsi_next (&gsi
))
7182 stmt_vec_info stmt_info
= bb_vinfo
->lookup_stmt (gsi_stmt (gsi
));
7183 if (!stmt_info
|| !STMT_VINFO_VECTORIZABLE (stmt_info
))
7186 /* Scan over all generic vect_recog_xxx_pattern functions. */
7187 for (j
= 0; j
< NUM_PATTERNS
; j
++)
7188 vect_pattern_recog_1 (vinfo
,
7189 &vect_vect_recog_func_ptrs
[j
], stmt_info
);
7193 /* After this no more add_stmt calls are allowed. */
7194 vinfo
->stmt_vec_info_ro
= true;
7197 /* Build a GIMPLE_ASSIGN or GIMPLE_CALL with the tree_code,
7198 or internal_fn contained in ch, respectively. */
7200 vect_gimple_build (tree lhs
, code_helper ch
, tree op0
, tree op1
)
7202 gcc_assert (op0
!= NULL_TREE
);
7203 if (ch
.is_tree_code ())
7204 return gimple_build_assign (lhs
, (tree_code
) ch
, op0
, op1
);
7206 gcc_assert (ch
.is_internal_fn ());
7207 gimple
* stmt
= gimple_build_call_internal (as_internal_fn ((combined_fn
) ch
),
7208 op1
== NULL_TREE
? 1 : 2,
7210 gimple_call_set_lhs (stmt
, lhs
);