1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2022 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
37 #include "gimple-iterator.h"
39 #include "tree-vectorizer.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
47 #include "omp-simd-clone.h"
49 #include "tree-vector-builder.h"
50 #include "vec-perm-indices.h"
51 #include "gimple-range.h"
53 /* Return true if we have a useful VR_RANGE range for VAR, storing it
54 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
57 vect_get_range_info (tree var
, wide_int
*min_value
, wide_int
*max_value
)
60 get_range_query (cfun
)->range_of_expr (vr
, var
);
61 if (vr
.undefined_p ())
62 vr
.set_varying (TREE_TYPE (var
));
63 *min_value
= wi::to_wide (vr
.min ());
64 *max_value
= wi::to_wide (vr
.max ());
65 value_range_kind vr_type
= vr
.kind ();
66 wide_int nonzero
= get_nonzero_bits (var
);
67 signop sgn
= TYPE_SIGN (TREE_TYPE (var
));
68 if (intersect_range_with_nonzero_bits (vr_type
, min_value
, max_value
,
69 nonzero
, sgn
) == VR_RANGE
)
71 if (dump_enabled_p ())
73 dump_generic_expr_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, var
);
74 dump_printf (MSG_NOTE
, " has range [");
75 dump_hex (MSG_NOTE
, *min_value
);
76 dump_printf (MSG_NOTE
, ", ");
77 dump_hex (MSG_NOTE
, *max_value
);
78 dump_printf (MSG_NOTE
, "]\n");
84 if (dump_enabled_p ())
86 dump_generic_expr_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, var
);
87 dump_printf (MSG_NOTE
, " has no range info\n");
93 /* Report that we've found an instance of pattern PATTERN in
97 vect_pattern_detected (const char *name
, gimple
*stmt
)
99 if (dump_enabled_p ())
100 dump_printf_loc (MSG_NOTE
, vect_location
, "%s: detected: %G", name
, stmt
);
103 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
104 return the pattern statement's stmt_vec_info. Set its vector type to
105 VECTYPE if it doesn't have one already. */
108 vect_init_pattern_stmt (vec_info
*vinfo
, gimple
*pattern_stmt
,
109 stmt_vec_info orig_stmt_info
, tree vectype
)
111 stmt_vec_info pattern_stmt_info
= vinfo
->lookup_stmt (pattern_stmt
);
112 if (pattern_stmt_info
== NULL
)
113 pattern_stmt_info
= vinfo
->add_stmt (pattern_stmt
);
114 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt_info
->stmt
));
116 pattern_stmt_info
->pattern_stmt_p
= true;
117 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt_info
;
118 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
119 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
120 if (!STMT_VINFO_VECTYPE (pattern_stmt_info
))
123 || (VECTOR_BOOLEAN_TYPE_P (vectype
)
124 == vect_use_mask_type_p (orig_stmt_info
)));
125 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vectype
;
126 pattern_stmt_info
->mask_precision
= orig_stmt_info
->mask_precision
;
128 return pattern_stmt_info
;
131 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
132 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
136 vect_set_pattern_stmt (vec_info
*vinfo
, gimple
*pattern_stmt
,
137 stmt_vec_info orig_stmt_info
, tree vectype
)
139 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
140 STMT_VINFO_RELATED_STMT (orig_stmt_info
)
141 = vect_init_pattern_stmt (vinfo
, pattern_stmt
, orig_stmt_info
, vectype
);
144 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
145 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
146 be different from the vector type of the final pattern statement.
147 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
148 from which it was derived. */
151 append_pattern_def_seq (vec_info
*vinfo
,
152 stmt_vec_info stmt_info
, gimple
*new_stmt
,
153 tree vectype
= NULL_TREE
,
154 tree scalar_type_for_mask
= NULL_TREE
)
156 gcc_assert (!scalar_type_for_mask
157 == (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
)));
160 stmt_vec_info new_stmt_info
= vinfo
->add_stmt (new_stmt
);
161 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
162 if (scalar_type_for_mask
)
163 new_stmt_info
->mask_precision
164 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask
));
166 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
170 /* The caller wants to perform new operations on vect_external variable
171 VAR, so that the result of the operations would also be vect_external.
172 Return the edge on which the operations can be performed, if one exists.
173 Return null if the operations should instead be treated as part of
174 the pattern that needs them. */
177 vect_get_external_def_edge (vec_info
*vinfo
, tree var
)
180 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
182 e
= loop_preheader_edge (loop_vinfo
->loop
);
183 if (!SSA_NAME_IS_DEFAULT_DEF (var
))
185 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
187 || !dominated_by_p (CDI_DOMINATORS
, e
->dest
, bb
))
194 /* Return true if the target supports a vector version of CODE,
195 where CODE is known to map to a direct optab with the given SUBTYPE.
196 ITYPE specifies the type of (some of) the scalar inputs and OTYPE
197 specifies the type of the scalar result.
199 If CODE allows the inputs and outputs to have different type
200 (such as for WIDEN_SUM_EXPR), it is the input mode rather
201 than the output mode that determines the appropriate target pattern.
202 Operand 0 of the target pattern then specifies the mode that the output
205 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
206 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
210 vect_supportable_direct_optab_p (vec_info
*vinfo
, tree otype
, tree_code code
,
211 tree itype
, tree
*vecotype_out
,
212 tree
*vecitype_out
= NULL
,
213 enum optab_subtype subtype
= optab_default
)
215 tree vecitype
= get_vectype_for_scalar_type (vinfo
, itype
);
219 tree vecotype
= get_vectype_for_scalar_type (vinfo
, otype
);
223 optab optab
= optab_for_tree_code (code
, vecitype
, subtype
);
227 insn_code icode
= optab_handler (optab
, TYPE_MODE (vecitype
));
228 if (icode
== CODE_FOR_nothing
229 || insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (vecotype
))
232 *vecotype_out
= vecotype
;
234 *vecitype_out
= vecitype
;
238 /* Round bit precision PRECISION up to a full element. */
241 vect_element_precision (unsigned int precision
)
243 precision
= 1 << ceil_log2 (precision
);
244 return MAX (precision
, BITS_PER_UNIT
);
247 /* If OP is defined by a statement that's being considered for vectorization,
248 return information about that statement, otherwise return NULL. */
251 vect_get_internal_def (vec_info
*vinfo
, tree op
)
253 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (op
);
255 && STMT_VINFO_DEF_TYPE (def_stmt_info
) == vect_internal_def
)
256 return def_stmt_info
;
260 /* Check whether NAME, an ssa-name used in STMT_VINFO,
261 is a result of a type promotion, such that:
262 DEF_STMT: NAME = NOP (name0)
263 If CHECK_SIGN is TRUE, check that either both types are signed or both are
267 type_conversion_p (vec_info
*vinfo
, tree name
, bool check_sign
,
268 tree
*orig_type
, gimple
**def_stmt
, bool *promotion
)
270 tree type
= TREE_TYPE (name
);
272 enum vect_def_type dt
;
274 stmt_vec_info def_stmt_info
;
275 if (!vect_is_simple_use (name
, vinfo
, &dt
, &def_stmt_info
, def_stmt
))
278 if (dt
!= vect_internal_def
279 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
285 if (!is_gimple_assign (*def_stmt
))
288 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt
)))
291 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
293 *orig_type
= TREE_TYPE (oprnd0
);
294 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*orig_type
)
295 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*orig_type
)) && check_sign
))
298 if (TYPE_PRECISION (type
) >= (TYPE_PRECISION (*orig_type
) * 2))
303 if (!vect_is_simple_use (oprnd0
, vinfo
, &dt
))
309 /* Holds information about an input operand after some sign changes
310 and type promotions have been peeled away. */
311 class vect_unpromoted_value
{
313 vect_unpromoted_value ();
315 void set_op (tree
, vect_def_type
, stmt_vec_info
= NULL
);
317 /* The value obtained after peeling away zero or more casts. */
320 /* The type of OP. */
323 /* The definition type of OP. */
326 /* If OP is the result of peeling at least one cast, and if the cast
327 of OP itself is a vectorizable statement, CASTER identifies that
328 statement, otherwise it is null. */
329 stmt_vec_info caster
;
332 inline vect_unpromoted_value::vect_unpromoted_value ()
335 dt (vect_uninitialized_def
),
340 /* Set the operand to OP_IN, its definition type to DT_IN, and the
341 statement that casts it to CASTER_IN. */
344 vect_unpromoted_value::set_op (tree op_in
, vect_def_type dt_in
,
345 stmt_vec_info caster_in
)
348 type
= TREE_TYPE (op
);
353 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
354 to reach some vectorizable inner operand OP', continuing as long as it
355 is possible to convert OP' back to OP using a possible sign change
356 followed by a possible promotion P. Return this OP', or null if OP is
357 not a vectorizable SSA name. If there is a promotion P, describe its
358 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
359 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
360 have more than one user.
362 A successful return means that it is possible to go from OP' to OP
363 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
364 whereas the cast from UNPROM to OP might be a promotion, a sign
369 signed short *ptr = ...;
370 signed short C = *ptr;
371 unsigned short B = (unsigned short) C; // sign change
372 signed int A = (signed int) B; // unsigned promotion
373 ...possible other uses of A...
374 unsigned int OP = (unsigned int) A; // sign change
376 In this case it's possible to go directly from C to OP using:
378 OP = (unsigned int) (unsigned short) C;
379 +------------+ +--------------+
380 promotion sign change
382 so OP' would be C. The input to the promotion is B, so UNPROM
386 vect_look_through_possible_promotion (vec_info
*vinfo
, tree op
,
387 vect_unpromoted_value
*unprom
,
388 bool *single_use_p
= NULL
)
390 tree res
= NULL_TREE
;
391 tree op_type
= TREE_TYPE (op
);
392 unsigned int orig_precision
= TYPE_PRECISION (op_type
);
393 unsigned int min_precision
= orig_precision
;
394 stmt_vec_info caster
= NULL
;
395 while (TREE_CODE (op
) == SSA_NAME
&& INTEGRAL_TYPE_P (op_type
))
397 /* See whether OP is simple enough to vectorize. */
398 stmt_vec_info def_stmt_info
;
401 if (!vect_is_simple_use (op
, vinfo
, &dt
, &def_stmt_info
, &def_stmt
))
404 /* If OP is the input of a demotion, skip over it to see whether
405 OP is itself the result of a promotion. If so, the combined
406 effect of the promotion and the demotion might fit the required
407 pattern, otherwise neither operation fits.
409 This copes with cases such as the result of an arithmetic
410 operation being truncated before being stored, and where that
411 arithmetic operation has been recognized as an over-widened one. */
412 if (TYPE_PRECISION (op_type
) <= min_precision
)
414 /* Use OP as the UNPROM described above if we haven't yet
415 found a promotion, or if using the new input preserves the
416 sign of the previous promotion. */
418 || TYPE_PRECISION (unprom
->type
) == orig_precision
419 || TYPE_SIGN (unprom
->type
) == TYPE_SIGN (op_type
))
421 unprom
->set_op (op
, dt
, caster
);
422 min_precision
= TYPE_PRECISION (op_type
);
424 /* Stop if we've already seen a promotion and if this
425 conversion does more than change the sign. */
426 else if (TYPE_PRECISION (op_type
)
427 != TYPE_PRECISION (unprom
->type
))
430 /* The sequence now extends to OP. */
434 /* See whether OP is defined by a cast. Record it as CASTER if
435 the cast is potentially vectorizable. */
438 caster
= def_stmt_info
;
440 /* Ignore pattern statements, since we don't link uses for them. */
443 && !STMT_VINFO_RELATED_STMT (caster
)
444 && !has_single_use (res
))
445 *single_use_p
= false;
447 gassign
*assign
= dyn_cast
<gassign
*> (def_stmt
);
448 if (!assign
|| !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt
)))
451 /* Continue with the input to the cast. */
452 op
= gimple_assign_rhs1 (def_stmt
);
453 op_type
= TREE_TYPE (op
);
458 /* OP is an integer operand to an operation that returns TYPE, and we
459 want to treat the operation as a widening one. So far we can treat
460 it as widening from *COMMON_TYPE.
462 Return true if OP is suitable for such a widening operation,
463 either widening from *COMMON_TYPE or from some supertype of it.
464 Update *COMMON_TYPE to the supertype in the latter case.
466 SHIFT_P is true if OP is a shift amount. */
469 vect_joust_widened_integer (tree type
, bool shift_p
, tree op
,
472 /* Calculate the minimum precision required by OP, without changing
473 the sign of either operand. */
474 unsigned int precision
;
477 if (!wi::leu_p (wi::to_widest (op
), TYPE_PRECISION (type
) / 2))
479 precision
= TREE_INT_CST_LOW (op
);
483 precision
= wi::min_precision (wi::to_widest (op
),
484 TYPE_SIGN (*common_type
));
485 if (precision
* 2 > TYPE_PRECISION (type
))
489 /* If OP requires a wider type, switch to that type. The checks
490 above ensure that this is still narrower than the result. */
491 precision
= vect_element_precision (precision
);
492 if (TYPE_PRECISION (*common_type
) < precision
)
493 *common_type
= build_nonstandard_integer_type
494 (precision
, TYPE_UNSIGNED (*common_type
));
498 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
499 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
502 vect_joust_widened_type (tree type
, tree new_type
, tree
*common_type
)
504 if (types_compatible_p (*common_type
, new_type
))
507 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
508 if ((TYPE_PRECISION (new_type
) < TYPE_PRECISION (*common_type
))
509 && (TYPE_UNSIGNED (new_type
) || !TYPE_UNSIGNED (*common_type
)))
512 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
513 if (TYPE_PRECISION (*common_type
) < TYPE_PRECISION (new_type
)
514 && (TYPE_UNSIGNED (*common_type
) || !TYPE_UNSIGNED (new_type
)))
516 *common_type
= new_type
;
520 /* We have mismatched signs, with the signed type being
521 no wider than the unsigned type. In this case we need
522 a wider signed type. */
523 unsigned int precision
= MAX (TYPE_PRECISION (*common_type
),
524 TYPE_PRECISION (new_type
));
527 if (precision
* 2 > TYPE_PRECISION (type
))
530 *common_type
= build_nonstandard_integer_type (precision
, false);
534 /* Check whether STMT_INFO can be viewed as a tree of integer operations
535 in which each node either performs CODE or WIDENED_CODE, and where
536 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
537 specifies the maximum number of leaf operands. SHIFT_P says whether
538 CODE and WIDENED_CODE are some sort of shift.
540 If STMT_INFO is such a tree, return the number of leaf operands
541 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
542 to a type that (a) is narrower than the result of STMT_INFO and
543 (b) can hold all leaf operand values.
545 If SUBTYPE then allow that the signs of the operands
546 may differ in signs but not in precision. SUBTYPE is updated to reflect
549 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
553 vect_widened_op_tree (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree_code code
,
554 tree_code widened_code
, bool shift_p
,
555 unsigned int max_nops
,
556 vect_unpromoted_value
*unprom
, tree
*common_type
,
557 enum optab_subtype
*subtype
= NULL
)
559 /* Check for an integer operation with the right code. */
560 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
564 tree_code rhs_code
= gimple_assign_rhs_code (assign
);
565 if (rhs_code
!= code
&& rhs_code
!= widened_code
)
568 tree type
= TREE_TYPE (gimple_assign_lhs (assign
));
569 if (!INTEGRAL_TYPE_P (type
))
572 /* Assume that both operands will be leaf operands. */
575 /* Check the operands. */
576 unsigned int next_op
= 0;
577 for (unsigned int i
= 0; i
< 2; ++i
)
579 vect_unpromoted_value
*this_unprom
= &unprom
[next_op
];
580 unsigned int nops
= 1;
581 tree op
= gimple_op (assign
, i
+ 1);
582 if (i
== 1 && TREE_CODE (op
) == INTEGER_CST
)
584 /* We already have a common type from earlier operands.
585 Update it to account for OP. */
586 this_unprom
->set_op (op
, vect_constant_def
);
587 if (!vect_joust_widened_integer (type
, shift_p
, op
, common_type
))
592 /* Only allow shifts by constants. */
593 if (shift_p
&& i
== 1)
596 if (!vect_look_through_possible_promotion (vinfo
, op
, this_unprom
))
599 if (TYPE_PRECISION (this_unprom
->type
) == TYPE_PRECISION (type
))
601 /* The operand isn't widened. If STMT_INFO has the code
602 for an unwidened operation, recursively check whether
603 this operand is a node of the tree. */
606 || this_unprom
->dt
!= vect_internal_def
)
609 /* Give back the leaf slot allocated above now that we're
610 not treating this as a leaf operand. */
613 /* Recursively process the definition of the operand. */
614 stmt_vec_info def_stmt_info
615 = vinfo
->lookup_def (this_unprom
->op
);
616 nops
= vect_widened_op_tree (vinfo
, def_stmt_info
, code
,
617 widened_code
, shift_p
, max_nops
,
618 this_unprom
, common_type
,
627 /* Make sure that the operand is narrower than the result. */
628 if (TYPE_PRECISION (this_unprom
->type
) * 2
629 > TYPE_PRECISION (type
))
632 /* Update COMMON_TYPE for the new operand. */
634 *common_type
= this_unprom
->type
;
635 else if (!vect_joust_widened_type (type
, this_unprom
->type
,
640 /* See if we can sign extend the smaller type. */
641 if (TYPE_PRECISION (this_unprom
->type
)
642 > TYPE_PRECISION (*common_type
))
643 *common_type
= this_unprom
->type
;
644 *subtype
= optab_vector_mixed_sign
;
656 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
657 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
660 vect_recog_temp_ssa_var (tree type
, gimple
*stmt
)
662 return make_temp_ssa_name (type
, stmt
, "patt");
665 /* STMT2_INFO describes a type conversion that could be split into STMT1
666 followed by a version of STMT2_INFO that takes NEW_RHS as its first
667 input. Try to do this using pattern statements, returning true on
671 vect_split_statement (vec_info
*vinfo
, stmt_vec_info stmt2_info
, tree new_rhs
,
672 gimple
*stmt1
, tree vectype
)
674 if (is_pattern_stmt_p (stmt2_info
))
676 /* STMT2_INFO is part of a pattern. Get the statement to which
677 the pattern is attached. */
678 stmt_vec_info orig_stmt2_info
= STMT_VINFO_RELATED_STMT (stmt2_info
);
679 vect_init_pattern_stmt (vinfo
, stmt1
, orig_stmt2_info
, vectype
);
681 if (dump_enabled_p ())
682 dump_printf_loc (MSG_NOTE
, vect_location
,
683 "Splitting pattern statement: %G", stmt2_info
->stmt
);
685 /* Since STMT2_INFO is a pattern statement, we can change it
686 in-situ without worrying about changing the code for the
688 gimple_assign_set_rhs1 (stmt2_info
->stmt
, new_rhs
);
690 if (dump_enabled_p ())
692 dump_printf_loc (MSG_NOTE
, vect_location
, "into: %G", stmt1
);
693 dump_printf_loc (MSG_NOTE
, vect_location
, "and: %G",
697 gimple_seq
*def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info
);
698 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info
) == stmt2_info
)
699 /* STMT2_INFO is the actual pattern statement. Add STMT1
700 to the end of the definition sequence. */
701 gimple_seq_add_stmt_without_update (def_seq
, stmt1
);
704 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
706 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt2_info
->stmt
, def_seq
);
707 gsi_insert_before_without_update (&gsi
, stmt1
, GSI_SAME_STMT
);
713 /* STMT2_INFO doesn't yet have a pattern. Try to create a
714 two-statement pattern now. */
715 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info
));
716 tree lhs_type
= TREE_TYPE (gimple_get_lhs (stmt2_info
->stmt
));
717 tree lhs_vectype
= get_vectype_for_scalar_type (vinfo
, lhs_type
);
721 if (dump_enabled_p ())
722 dump_printf_loc (MSG_NOTE
, vect_location
,
723 "Splitting statement: %G", stmt2_info
->stmt
);
725 /* Add STMT1 as a singleton pattern definition sequence. */
726 gimple_seq
*def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info
);
727 vect_init_pattern_stmt (vinfo
, stmt1
, stmt2_info
, vectype
);
728 gimple_seq_add_stmt_without_update (def_seq
, stmt1
);
730 /* Build the second of the two pattern statements. */
731 tree new_lhs
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
732 gassign
*new_stmt2
= gimple_build_assign (new_lhs
, NOP_EXPR
, new_rhs
);
733 vect_set_pattern_stmt (vinfo
, new_stmt2
, stmt2_info
, lhs_vectype
);
735 if (dump_enabled_p ())
737 dump_printf_loc (MSG_NOTE
, vect_location
,
738 "into pattern statements: %G", stmt1
);
739 dump_printf_loc (MSG_NOTE
, vect_location
, "and: %G", new_stmt2
);
746 /* Convert UNPROM to TYPE and return the result, adding new statements
747 to STMT_INFO's pattern definition statements if no better way is
748 available. VECTYPE is the vector form of TYPE.
750 If SUBTYPE then convert the type based on the subtype. */
753 vect_convert_input (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree type
,
754 vect_unpromoted_value
*unprom
, tree vectype
,
755 enum optab_subtype subtype
= optab_default
)
758 /* Update the type if the signs differ. */
759 if (subtype
== optab_vector_mixed_sign
760 && TYPE_SIGN (type
) != TYPE_SIGN (TREE_TYPE (unprom
->op
)))
761 type
= build_nonstandard_integer_type (TYPE_PRECISION (type
),
762 TYPE_SIGN (unprom
->type
));
764 /* Check for a no-op conversion. */
765 if (types_compatible_p (type
, TREE_TYPE (unprom
->op
)))
768 /* Allow the caller to create constant vect_unpromoted_values. */
769 if (TREE_CODE (unprom
->op
) == INTEGER_CST
)
770 return wide_int_to_tree (type
, wi::to_widest (unprom
->op
));
772 tree input
= unprom
->op
;
775 tree lhs
= gimple_get_lhs (unprom
->caster
->stmt
);
776 tree lhs_type
= TREE_TYPE (lhs
);
778 /* If the result of the existing cast is the right width, use it
779 instead of the source of the cast. */
780 if (TYPE_PRECISION (lhs_type
) == TYPE_PRECISION (type
))
782 /* If the precision we want is between the source and result
783 precisions of the existing cast, try splitting the cast into
784 two and tapping into a mid-way point. */
785 else if (TYPE_PRECISION (lhs_type
) > TYPE_PRECISION (type
)
786 && TYPE_PRECISION (type
) > TYPE_PRECISION (unprom
->type
))
788 /* In order to preserve the semantics of the original cast,
789 give the mid-way point the same signedness as the input value.
791 It would be possible to use a signed type here instead if
792 TYPE is signed and UNPROM->TYPE is unsigned, but that would
793 make the sign of the midtype sensitive to the order in
794 which we process the statements, since the signedness of
795 TYPE is the signedness required by just one of possibly
796 many users. Also, unsigned promotions are usually as cheap
797 as or cheaper than signed ones, so it's better to keep an
798 unsigned promotion. */
799 tree midtype
= build_nonstandard_integer_type
800 (TYPE_PRECISION (type
), TYPE_UNSIGNED (unprom
->type
));
801 tree vec_midtype
= get_vectype_for_scalar_type (vinfo
, midtype
);
804 input
= vect_recog_temp_ssa_var (midtype
, NULL
);
805 gassign
*new_stmt
= gimple_build_assign (input
, NOP_EXPR
,
807 if (!vect_split_statement (vinfo
, unprom
->caster
, input
, new_stmt
,
809 append_pattern_def_seq (vinfo
, stmt_info
,
810 new_stmt
, vec_midtype
);
814 /* See if we can reuse an existing result. */
815 if (types_compatible_p (type
, TREE_TYPE (input
)))
819 /* We need a new conversion statement. */
820 tree new_op
= vect_recog_temp_ssa_var (type
, NULL
);
821 gassign
*new_stmt
= gimple_build_assign (new_op
, NOP_EXPR
, input
);
823 /* If OP is an external value, see if we can insert the new statement
824 on an incoming edge. */
825 if (input
== unprom
->op
&& unprom
->dt
== vect_external_def
)
826 if (edge e
= vect_get_external_def_edge (vinfo
, input
))
828 basic_block new_bb
= gsi_insert_on_edge_immediate (e
, new_stmt
);
829 gcc_assert (!new_bb
);
833 /* As a (common) last resort, add the statement to the pattern itself. */
834 append_pattern_def_seq (vinfo
, stmt_info
, new_stmt
, vectype
);
838 /* Invoke vect_convert_input for N elements of UNPROM and store the
839 result in the corresponding elements of RESULT.
841 If SUBTYPE then convert the type based on the subtype. */
844 vect_convert_inputs (vec_info
*vinfo
, stmt_vec_info stmt_info
, unsigned int n
,
845 tree
*result
, tree type
, vect_unpromoted_value
*unprom
,
846 tree vectype
, enum optab_subtype subtype
= optab_default
)
848 for (unsigned int i
= 0; i
< n
; ++i
)
851 for (j
= 0; j
< i
; ++j
)
852 if (unprom
[j
].op
== unprom
[i
].op
)
856 result
[i
] = result
[j
];
858 result
[i
] = vect_convert_input (vinfo
, stmt_info
,
859 type
, &unprom
[i
], vectype
, subtype
);
863 /* The caller has created a (possibly empty) sequence of pattern definition
864 statements followed by a single statement PATTERN_STMT. Cast the result
865 of this final statement to TYPE. If a new statement is needed, add
866 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
867 and return the new statement, otherwise return PATTERN_STMT as-is.
868 VECITYPE is the vector form of PATTERN_STMT's result type. */
871 vect_convert_output (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree type
,
872 gimple
*pattern_stmt
, tree vecitype
)
874 tree lhs
= gimple_get_lhs (pattern_stmt
);
875 if (!types_compatible_p (type
, TREE_TYPE (lhs
)))
877 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
, vecitype
);
878 tree cast_var
= vect_recog_temp_ssa_var (type
, NULL
);
879 pattern_stmt
= gimple_build_assign (cast_var
, NOP_EXPR
, lhs
);
884 /* Return true if STMT_VINFO describes a reduction for which reassociation
885 is allowed. If STMT_INFO is part of a group, assume that it's part of
886 a reduction chain and optimistically assume that all statements
887 except the last allow reassociation.
888 Also require it to have code CODE and to be a reduction
889 in the outermost loop. When returning true, store the operands in
890 *OP0_OUT and *OP1_OUT. */
893 vect_reassociating_reduction_p (vec_info
*vinfo
,
894 stmt_vec_info stmt_info
, tree_code code
,
895 tree
*op0_out
, tree
*op1_out
)
897 loop_vec_info loop_info
= dyn_cast
<loop_vec_info
> (vinfo
);
901 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
902 if (!assign
|| gimple_assign_rhs_code (assign
) != code
)
905 /* We don't allow changing the order of the computation in the inner-loop
906 when doing outer-loop vectorization. */
907 class loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
908 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
))
911 if (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
)
913 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign
)),
917 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info
) == NULL
)
920 *op0_out
= gimple_assign_rhs1 (assign
);
921 *op1_out
= gimple_assign_rhs2 (assign
);
922 if (commutative_tree_code (code
) && STMT_VINFO_REDUC_IDX (stmt_info
) == 0)
923 std::swap (*op0_out
, *op1_out
);
927 /* match.pd function to match
928 (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
930 1) @1, @2, c, d, a, b are all integral type.
931 2) There's single_use for both @1 and @2.
932 3) a, c have same precision.
933 4) c and @1 have different precision.
934 5) c, d are the same type or they can differ in sign when convert is
937 record a and c and d and @3. */
939 extern bool gimple_cond_expr_convert_p (tree
, tree
*, tree (*)(tree
));
941 /* Function vect_recog_cond_expr_convert
943 Try to find the following pattern:
948 TYPE_E op_true = (TYPE_E) A;
949 TYPE_E op_false = (TYPE_E) B;
951 E = C cmp D ? op_true : op_false;
954 TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
955 TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
956 single_use of op_true and op_false.
957 TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
961 * STMT_VINFO: The stmt from which the pattern search begins.
962 here it starts with E = c cmp D ? op_true : op_false;
966 TYPE1 E' = C cmp D ? A : B;
967 TYPE3 E = (TYPE3) E';
969 There may extra nop_convert for A or B to handle different signness.
971 * TYPE_OUT: The vector type of the output of this pattern.
973 * Return value: A new stmt that will be used to replace the sequence of
974 stmts that constitute the pattern. In this case it will be:
976 E' = C cmp D ? A : B; is recorded in pattern definition statements; */
979 vect_recog_cond_expr_convert_pattern (vec_info
*vinfo
,
980 stmt_vec_info stmt_vinfo
, tree
*type_out
)
982 gassign
*last_stmt
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
);
983 tree lhs
, match
[4], temp
, type
, new_lhs
, op2
;
985 gimple
*pattern_stmt
;
990 lhs
= gimple_assign_lhs (last_stmt
);
992 /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
993 TYPE_PRECISION (A) == TYPE_PRECISION (C). */
994 if (!gimple_cond_expr_convert_p (lhs
, &match
[0], NULL
))
997 vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt
);
1000 type
= TREE_TYPE (match
[1]);
1001 if (TYPE_SIGN (type
) != TYPE_SIGN (TREE_TYPE (match
[2])))
1003 op2
= vect_recog_temp_ssa_var (type
, NULL
);
1004 gimple
* nop_stmt
= gimple_build_assign (op2
, NOP_EXPR
, match
[2]);
1005 append_pattern_def_seq (vinfo
, stmt_vinfo
, nop_stmt
,
1006 get_vectype_for_scalar_type (vinfo
, type
));
1009 temp
= vect_recog_temp_ssa_var (type
, NULL
);
1010 cond_stmt
= gimple_build_assign (temp
, build3 (COND_EXPR
, type
, match
[3],
1012 append_pattern_def_seq (vinfo
, stmt_vinfo
, cond_stmt
,
1013 get_vectype_for_scalar_type (vinfo
, type
));
1014 new_lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
1015 pattern_stmt
= gimple_build_assign (new_lhs
, NOP_EXPR
, temp
);
1016 *type_out
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1018 if (dump_enabled_p ())
1019 dump_printf_loc (MSG_NOTE
, vect_location
,
1020 "created pattern stmt: %G", pattern_stmt
);
1021 return pattern_stmt
;
1024 /* Function vect_recog_dot_prod_pattern
1026 Try to find the following pattern:
1033 sum_0 = phi <init, sum_1>
1036 S3 x_T = (TYPE1) x_t;
1037 S4 y_T = (TYPE1) y_t;
1038 S5 prod = x_T * y_T;
1039 [S6 prod = (TYPE2) prod; #optional]
1040 S7 sum_1 = prod + sum_0;
1042 where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
1043 the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
1044 'type1a' and 'type1b' can differ.
1048 * STMT_VINFO: The stmt from which the pattern search begins. In the
1049 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
1054 * TYPE_OUT: The type of the output of this pattern.
1056 * Return value: A new stmt that will be used to replace the sequence of
1057 stmts that constitute the pattern. In this case it will be:
1058 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
1060 Note: The dot-prod idiom is a widening reduction pattern that is
1061 vectorized without preserving all the intermediate results. It
1062 produces only N/2 (widened) results (by summing up pairs of
1063 intermediate results) rather than all N results. Therefore, we
1064 cannot allow this pattern when we want to get all the results and in
1065 the correct order (as is the case when this computation is in an
1066 inner-loop nested in an outer-loop that us being vectorized). */
1069 vect_recog_dot_prod_pattern (vec_info
*vinfo
,
1070 stmt_vec_info stmt_vinfo
, tree
*type_out
)
1072 tree oprnd0
, oprnd1
;
1073 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1074 tree type
, half_type
;
1075 gimple
*pattern_stmt
;
1078 /* Look for the following pattern
1082 DDPROD = (TYPE2) DPROD;
1083 sum_1 = DDPROD + sum_0;
1085 - DX is double the size of X
1086 - DY is double the size of Y
1087 - DX, DY, DPROD all have the same type but the sign
1088 between X, Y and DPROD can differ.
1089 - sum is the same size of DPROD or bigger
1090 - sum has been recognized as a reduction variable.
1092 This is equivalent to:
1093 DPROD = X w* Y; #widen mult
1094 sum_1 = DPROD w+ sum_0; #widen summation
1096 DPROD = X w* Y; #widen mult
1097 sum_1 = DPROD + sum_0; #summation
1100 /* Starting from LAST_STMT, follow the defs of its uses in search
1101 of the above pattern. */
1103 if (!vect_reassociating_reduction_p (vinfo
, stmt_vinfo
, PLUS_EXPR
,
1107 type
= TREE_TYPE (gimple_get_lhs (last_stmt
));
1109 vect_unpromoted_value unprom_mult
;
1110 oprnd0
= vect_look_through_possible_promotion (vinfo
, oprnd0
, &unprom_mult
);
1112 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1113 we know that oprnd1 is the reduction variable (defined by a loop-header
1114 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1115 Left to check that oprnd0 is defined by a (widen_)mult_expr */
1119 stmt_vec_info mult_vinfo
= vect_get_internal_def (vinfo
, oprnd0
);
1123 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1124 inside the loop (in case we are analyzing an outer-loop). */
1125 vect_unpromoted_value unprom0
[2];
1126 enum optab_subtype subtype
= optab_vector
;
1127 if (!vect_widened_op_tree (vinfo
, mult_vinfo
, MULT_EXPR
, WIDEN_MULT_EXPR
,
1128 false, 2, unprom0
, &half_type
, &subtype
))
1131 /* If there are two widening operations, make sure they agree on the sign
1132 of the extension. The result of an optab_vector_mixed_sign operation
1133 is signed; otherwise, the result has the same sign as the operands. */
1134 if (TYPE_PRECISION (unprom_mult
.type
) != TYPE_PRECISION (type
)
1135 && (subtype
== optab_vector_mixed_sign
1136 ? TYPE_UNSIGNED (unprom_mult
.type
)
1137 : TYPE_SIGN (unprom_mult
.type
) != TYPE_SIGN (half_type
)))
1140 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt
);
1143 if (!vect_supportable_direct_optab_p (vinfo
, type
, DOT_PROD_EXPR
, half_type
,
1144 type_out
, &half_vectype
, subtype
))
1147 /* Get the inputs in the appropriate types. */
1149 vect_convert_inputs (vinfo
, stmt_vinfo
, 2, mult_oprnd
, half_type
,
1150 unprom0
, half_vectype
, subtype
);
1152 var
= vect_recog_temp_ssa_var (type
, NULL
);
1153 pattern_stmt
= gimple_build_assign (var
, DOT_PROD_EXPR
,
1154 mult_oprnd
[0], mult_oprnd
[1], oprnd1
);
1156 return pattern_stmt
;
1160 /* Function vect_recog_sad_pattern
1162 Try to find the following Sum of Absolute Difference (SAD) pattern:
1165 signed TYPE1 diff, abs_diff;
1168 sum_0 = phi <init, sum_1>
1171 S3 x_T = (TYPE1) x_t;
1172 S4 y_T = (TYPE1) y_t;
1173 S5 diff = x_T - y_T;
1174 S6 abs_diff = ABS_EXPR <diff>;
1175 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1176 S8 sum_1 = abs_diff + sum_0;
1178 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1179 same size of 'TYPE1' or bigger. This is a special case of a reduction
1184 * STMT_VINFO: The stmt from which the pattern search begins. In the
1185 example, when this function is called with S8, the pattern
1186 {S3,S4,S5,S6,S7,S8} will be detected.
1190 * TYPE_OUT: The type of the output of this pattern.
1192 * Return value: A new stmt that will be used to replace the sequence of
1193 stmts that constitute the pattern. In this case it will be:
1194 SAD_EXPR <x_t, y_t, sum_0>
1198 vect_recog_sad_pattern (vec_info
*vinfo
,
1199 stmt_vec_info stmt_vinfo
, tree
*type_out
)
1201 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1204 /* Look for the following pattern
1208 DAD = ABS_EXPR <DDIFF>;
1209 DDPROD = (TYPE2) DPROD;
1210 sum_1 = DAD + sum_0;
1212 - DX is at least double the size of X
1213 - DY is at least double the size of Y
1214 - DX, DY, DDIFF, DAD all have the same type
1215 - sum is the same size of DAD or bigger
1216 - sum has been recognized as a reduction variable.
1218 This is equivalent to:
1219 DDIFF = X w- Y; #widen sub
1220 DAD = ABS_EXPR <DDIFF>;
1221 sum_1 = DAD w+ sum_0; #widen summation
1223 DDIFF = X w- Y; #widen sub
1224 DAD = ABS_EXPR <DDIFF>;
1225 sum_1 = DAD + sum_0; #summation
1228 /* Starting from LAST_STMT, follow the defs of its uses in search
1229 of the above pattern. */
1231 tree plus_oprnd0
, plus_oprnd1
;
1232 if (!vect_reassociating_reduction_p (vinfo
, stmt_vinfo
, PLUS_EXPR
,
1233 &plus_oprnd0
, &plus_oprnd1
))
1236 tree sum_type
= TREE_TYPE (gimple_get_lhs (last_stmt
));
1238 /* Any non-truncating sequence of conversions is OK here, since
1239 with a successful match, the result of the ABS(U) is known to fit
1240 within the nonnegative range of the result type. (It cannot be the
1241 negative of the minimum signed value due to the range of the widening
1243 vect_unpromoted_value unprom_abs
;
1244 plus_oprnd0
= vect_look_through_possible_promotion (vinfo
, plus_oprnd0
,
1247 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1248 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1249 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1250 Then check that plus_oprnd0 is defined by an abs_expr. */
1255 stmt_vec_info abs_stmt_vinfo
= vect_get_internal_def (vinfo
, plus_oprnd0
);
1256 if (!abs_stmt_vinfo
)
1259 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1260 inside the loop (in case we are analyzing an outer-loop). */
1261 gassign
*abs_stmt
= dyn_cast
<gassign
*> (abs_stmt_vinfo
->stmt
);
1263 || (gimple_assign_rhs_code (abs_stmt
) != ABS_EXPR
1264 && gimple_assign_rhs_code (abs_stmt
) != ABSU_EXPR
))
1267 tree abs_oprnd
= gimple_assign_rhs1 (abs_stmt
);
1268 tree abs_type
= TREE_TYPE (abs_oprnd
);
1269 if (TYPE_UNSIGNED (abs_type
))
1272 /* Peel off conversions from the ABS input. This can involve sign
1273 changes (e.g. from an unsigned subtraction to a signed ABS input)
1274 or signed promotion, but it can't include unsigned promotion.
1275 (Note that ABS of an unsigned promotion should have been folded
1276 away before now anyway.) */
1277 vect_unpromoted_value unprom_diff
;
1278 abs_oprnd
= vect_look_through_possible_promotion (vinfo
, abs_oprnd
,
1282 if (TYPE_PRECISION (unprom_diff
.type
) != TYPE_PRECISION (abs_type
)
1283 && TYPE_UNSIGNED (unprom_diff
.type
))
1286 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1287 stmt_vec_info diff_stmt_vinfo
= vect_get_internal_def (vinfo
, abs_oprnd
);
1288 if (!diff_stmt_vinfo
)
1291 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1292 inside the loop (in case we are analyzing an outer-loop). */
1293 vect_unpromoted_value unprom
[2];
1294 if (!vect_widened_op_tree (vinfo
, diff_stmt_vinfo
, MINUS_EXPR
, WIDEN_MINUS_EXPR
,
1295 false, 2, unprom
, &half_type
))
1298 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt
);
1301 if (!vect_supportable_direct_optab_p (vinfo
, sum_type
, SAD_EXPR
, half_type
,
1302 type_out
, &half_vectype
))
1305 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1307 vect_convert_inputs (vinfo
, stmt_vinfo
, 2, sad_oprnd
, half_type
,
1308 unprom
, half_vectype
);
1310 tree var
= vect_recog_temp_ssa_var (sum_type
, NULL
);
1311 gimple
*pattern_stmt
= gimple_build_assign (var
, SAD_EXPR
, sad_oprnd
[0],
1312 sad_oprnd
[1], plus_oprnd1
);
1314 return pattern_stmt
;
1317 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1318 so that it can be treated as though it had the form:
1322 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1323 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1324 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1325 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1326 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1328 Try to replace the pattern with:
1332 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1333 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1334 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1335 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1337 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1339 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1340 name of the pattern being matched, for dump purposes. */
1343 vect_recog_widen_op_pattern (vec_info
*vinfo
,
1344 stmt_vec_info last_stmt_info
, tree
*type_out
,
1345 tree_code orig_code
, tree_code wide_code
,
1346 bool shift_p
, const char *name
)
1348 gimple
*last_stmt
= last_stmt_info
->stmt
;
1350 vect_unpromoted_value unprom
[2];
1352 if (!vect_widened_op_tree (vinfo
, last_stmt_info
, orig_code
, orig_code
,
1353 shift_p
, 2, unprom
, &half_type
))
1356 /* Pattern detected. */
1357 vect_pattern_detected (name
, last_stmt
);
1359 tree type
= TREE_TYPE (gimple_get_lhs (last_stmt
));
1361 if (TYPE_PRECISION (type
) != TYPE_PRECISION (half_type
) * 2
1362 || TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type
))
1363 itype
= build_nonstandard_integer_type (TYPE_PRECISION (half_type
) * 2,
1364 TYPE_UNSIGNED (half_type
));
1366 /* Check target support */
1367 tree vectype
= get_vectype_for_scalar_type (vinfo
, half_type
);
1368 tree vecitype
= get_vectype_for_scalar_type (vinfo
, itype
);
1370 tree vecctype
= vecitype
;
1371 if (orig_code
== MINUS_EXPR
1372 && TYPE_UNSIGNED (itype
)
1373 && TYPE_PRECISION (type
) > TYPE_PRECISION (itype
))
1375 /* Subtraction is special, even if half_type is unsigned and no matter
1376 whether type is signed or unsigned, if type is wider than itype,
1377 we need to sign-extend from the widening operation result to the
1379 Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1380 itype unsigned short and type either int or unsigned int.
1381 Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1382 (unsigned short) 0xffff, but for type int we want the result -1
1383 and for type unsigned int 0xffffffff rather than 0xffff. */
1384 ctype
= build_nonstandard_integer_type (TYPE_PRECISION (itype
), 0);
1385 vecctype
= get_vectype_for_scalar_type (vinfo
, ctype
);
1388 enum tree_code dummy_code
;
1390 auto_vec
<tree
> dummy_vec
;
1394 || !supportable_widening_operation (vinfo
, wide_code
, last_stmt_info
,
1396 &dummy_code
, &dummy_code
,
1397 &dummy_int
, &dummy_vec
))
1400 *type_out
= get_vectype_for_scalar_type (vinfo
, type
);
1405 vect_convert_inputs (vinfo
, last_stmt_info
,
1406 2, oprnd
, half_type
, unprom
, vectype
);
1408 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
1409 gimple
*pattern_stmt
= gimple_build_assign (var
, wide_code
,
1410 oprnd
[0], oprnd
[1]);
1412 if (vecctype
!= vecitype
)
1413 pattern_stmt
= vect_convert_output (vinfo
, last_stmt_info
, ctype
,
1414 pattern_stmt
, vecitype
);
1416 return vect_convert_output (vinfo
, last_stmt_info
,
1417 type
, pattern_stmt
, vecctype
);
1420 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1421 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1424 vect_recog_widen_mult_pattern (vec_info
*vinfo
, stmt_vec_info last_stmt_info
,
1427 return vect_recog_widen_op_pattern (vinfo
, last_stmt_info
, type_out
,
1428 MULT_EXPR
, WIDEN_MULT_EXPR
, false,
1429 "vect_recog_widen_mult_pattern");
1432 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1433 to WIDEN_PLUS_EXPR. See vect_recog_widen_op_pattern for details. */
1436 vect_recog_widen_plus_pattern (vec_info
*vinfo
, stmt_vec_info last_stmt_info
,
1439 return vect_recog_widen_op_pattern (vinfo
, last_stmt_info
, type_out
,
1440 PLUS_EXPR
, WIDEN_PLUS_EXPR
, false,
1441 "vect_recog_widen_plus_pattern");
1444 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1445 to WIDEN_MINUS_EXPR. See vect_recog_widen_op_pattern for details. */
1447 vect_recog_widen_minus_pattern (vec_info
*vinfo
, stmt_vec_info last_stmt_info
,
1450 return vect_recog_widen_op_pattern (vinfo
, last_stmt_info
, type_out
,
1451 MINUS_EXPR
, WIDEN_MINUS_EXPR
, false,
1452 "vect_recog_widen_minus_pattern");
1455 /* Function vect_recog_popcount_pattern
1457 Try to find the following pattern:
1463 temp_in = (UTYPE2)A;
1465 temp_out = __builtin_popcount{,l,ll} (temp_in);
1466 B = (TYPE1) temp_out;
1468 TYPE2 may or may not be equal to TYPE3.
1469 i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
1470 i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
1474 * STMT_VINFO: The stmt from which the pattern search begins.
1475 here it starts with B = (TYPE1) temp_out;
1479 * TYPE_OUT: The vector type of the output of this pattern.
1481 * Return value: A new stmt that will be used to replace the sequence of
1482 stmts that constitute the pattern. In this case it will be:
1487 vect_recog_popcount_pattern (vec_info
*vinfo
,
1488 stmt_vec_info stmt_vinfo
, tree
*type_out
)
1490 gassign
*last_stmt
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
);
1491 gimple
*popcount_stmt
, *pattern_stmt
;
1492 tree rhs_oprnd
, rhs_origin
, lhs_oprnd
, lhs_type
, vec_type
, new_var
;
1493 auto_vec
<tree
> vargs
;
1495 /* Find B = (TYPE1) temp_out. */
1498 tree_code code
= gimple_assign_rhs_code (last_stmt
);
1499 if (!CONVERT_EXPR_CODE_P (code
))
1502 lhs_oprnd
= gimple_assign_lhs (last_stmt
);
1503 lhs_type
= TREE_TYPE (lhs_oprnd
);
1504 if (!INTEGRAL_TYPE_P (lhs_type
))
1507 rhs_oprnd
= gimple_assign_rhs1 (last_stmt
);
1508 if (TREE_CODE (rhs_oprnd
) != SSA_NAME
1509 || !has_single_use (rhs_oprnd
))
1511 popcount_stmt
= SSA_NAME_DEF_STMT (rhs_oprnd
);
1513 /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
1514 if (!is_gimple_call (popcount_stmt
))
1516 switch (gimple_call_combined_fn (popcount_stmt
))
1524 if (gimple_call_num_args (popcount_stmt
) != 1)
1527 rhs_oprnd
= gimple_call_arg (popcount_stmt
, 0);
1528 vect_unpromoted_value unprom_diff
;
1529 rhs_origin
= vect_look_through_possible_promotion (vinfo
, rhs_oprnd
,
1535 /* Input and output of .POPCOUNT should be same-precision integer.
1536 Also A should be unsigned or same precision as temp_in,
1537 otherwise there would be sign_extend from A to temp_in. */
1538 if (TYPE_PRECISION (unprom_diff
.type
) != TYPE_PRECISION (lhs_type
)
1539 || (!TYPE_UNSIGNED (unprom_diff
.type
)
1540 && (TYPE_PRECISION (unprom_diff
.type
)
1541 != TYPE_PRECISION (TREE_TYPE (rhs_oprnd
)))))
1543 vargs
.safe_push (unprom_diff
.op
);
1545 vect_pattern_detected ("vec_regcog_popcount_pattern", popcount_stmt
);
1546 vec_type
= get_vectype_for_scalar_type (vinfo
, lhs_type
);
1547 /* Do it only if the backend has popcount<vector_mode>2 pattern. */
1549 || !direct_internal_fn_supported_p (IFN_POPCOUNT
, vec_type
,
1550 OPTIMIZE_FOR_SPEED
))
1553 /* Create B = .POPCOUNT (A). */
1554 new_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
1555 pattern_stmt
= gimple_build_call_internal_vec (IFN_POPCOUNT
, vargs
);
1556 gimple_call_set_lhs (pattern_stmt
, new_var
);
1557 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
1558 *type_out
= vec_type
;
1560 if (dump_enabled_p ())
1561 dump_printf_loc (MSG_NOTE
, vect_location
,
1562 "created pattern stmt: %G", pattern_stmt
);
1563 return pattern_stmt
;
1566 /* Function vect_recog_pow_pattern
1568 Try to find the following pattern:
1572 with POW being one of pow, powf, powi, powif and N being
1577 * STMT_VINFO: The stmt from which the pattern search begins.
1581 * TYPE_OUT: The type of the output of this pattern.
1583 * Return value: A new stmt that will be used to replace the sequence of
1584 stmts that constitute the pattern. In this case it will be:
1591 vect_recog_pow_pattern (vec_info
*vinfo
,
1592 stmt_vec_info stmt_vinfo
, tree
*type_out
)
1594 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1599 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
1602 switch (gimple_call_combined_fn (last_stmt
))
1612 base
= gimple_call_arg (last_stmt
, 0);
1613 exp
= gimple_call_arg (last_stmt
, 1);
1614 if (TREE_CODE (exp
) != REAL_CST
1615 && TREE_CODE (exp
) != INTEGER_CST
)
1617 if (flag_unsafe_math_optimizations
1618 && TREE_CODE (base
) == REAL_CST
1619 && gimple_call_builtin_p (last_stmt
, BUILT_IN_NORMAL
))
1621 combined_fn log_cfn
;
1622 built_in_function exp_bfn
;
1623 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt
)))
1626 log_cfn
= CFN_BUILT_IN_LOG
;
1627 exp_bfn
= BUILT_IN_EXP
;
1630 log_cfn
= CFN_BUILT_IN_LOGF
;
1631 exp_bfn
= BUILT_IN_EXPF
;
1634 log_cfn
= CFN_BUILT_IN_LOGL
;
1635 exp_bfn
= BUILT_IN_EXPL
;
1640 tree logc
= fold_const_call (log_cfn
, TREE_TYPE (base
), base
);
1641 tree exp_decl
= builtin_decl_implicit (exp_bfn
);
1642 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1643 does that, but if C is a power of 2, we want to use
1644 exp2 (log2 (C) * x) in the non-vectorized version, but for
1645 vectorization we don't have vectorized exp2. */
1647 && TREE_CODE (logc
) == REAL_CST
1649 && lookup_attribute ("omp declare simd",
1650 DECL_ATTRIBUTES (exp_decl
)))
1652 cgraph_node
*node
= cgraph_node::get_create (exp_decl
);
1653 if (node
->simd_clones
== NULL
)
1655 if (targetm
.simd_clone
.compute_vecsize_and_simdlen
== NULL
1656 || node
->definition
)
1658 expand_simd_clones (node
);
1659 if (node
->simd_clones
== NULL
)
1662 *type_out
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (base
));
1665 tree def
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1666 gimple
*g
= gimple_build_assign (def
, MULT_EXPR
, exp
, logc
);
1667 append_pattern_def_seq (vinfo
, stmt_vinfo
, g
);
1668 tree res
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1669 g
= gimple_build_call (exp_decl
, 1, def
);
1670 gimple_call_set_lhs (g
, res
);
1678 /* We now have a pow or powi builtin function call with a constant
1681 /* Catch squaring. */
1682 if ((tree_fits_shwi_p (exp
)
1683 && tree_to_shwi (exp
) == 2)
1684 || (TREE_CODE (exp
) == REAL_CST
1685 && real_equal (&TREE_REAL_CST (exp
), &dconst2
)))
1687 if (!vect_supportable_direct_optab_p (vinfo
, TREE_TYPE (base
), MULT_EXPR
,
1688 TREE_TYPE (base
), type_out
))
1691 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1692 stmt
= gimple_build_assign (var
, MULT_EXPR
, base
, base
);
1696 /* Catch square root. */
1697 if (TREE_CODE (exp
) == REAL_CST
1698 && real_equal (&TREE_REAL_CST (exp
), &dconsthalf
))
1700 *type_out
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (base
));
1702 && direct_internal_fn_supported_p (IFN_SQRT
, *type_out
,
1703 OPTIMIZE_FOR_SPEED
))
1705 gcall
*stmt
= gimple_build_call_internal (IFN_SQRT
, 1, base
);
1706 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
1707 gimple_call_set_lhs (stmt
, var
);
1708 gimple_call_set_nothrow (stmt
, true);
1717 /* Function vect_recog_widen_sum_pattern
1719 Try to find the following pattern:
1722 TYPE x_T, sum = init;
1724 sum_0 = phi <init, sum_1>
1726 S2 x_T = (TYPE) x_t;
1727 S3 sum_1 = x_T + sum_0;
1729 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1730 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1731 a special case of a reduction computation.
1735 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1736 when this function is called with S3, the pattern {S2,S3} will be detected.
1740 * TYPE_OUT: The type of the output of this pattern.
1742 * Return value: A new stmt that will be used to replace the sequence of
1743 stmts that constitute the pattern. In this case it will be:
1744 WIDEN_SUM <x_t, sum_0>
1746 Note: The widening-sum idiom is a widening reduction pattern that is
1747 vectorized without preserving all the intermediate results. It
1748 produces only N/2 (widened) results (by summing up pairs of
1749 intermediate results) rather than all N results. Therefore, we
1750 cannot allow this pattern when we want to get all the results and in
1751 the correct order (as is the case when this computation is in an
1752 inner-loop nested in an outer-loop that us being vectorized). */
1755 vect_recog_widen_sum_pattern (vec_info
*vinfo
,
1756 stmt_vec_info stmt_vinfo
, tree
*type_out
)
1758 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1759 tree oprnd0
, oprnd1
;
1761 gimple
*pattern_stmt
;
1764 /* Look for the following pattern
1767 In which DX is at least double the size of X, and sum_1 has been
1768 recognized as a reduction variable.
1771 /* Starting from LAST_STMT, follow the defs of its uses in search
1772 of the above pattern. */
1774 if (!vect_reassociating_reduction_p (vinfo
, stmt_vinfo
, PLUS_EXPR
,
1778 type
= TREE_TYPE (gimple_get_lhs (last_stmt
));
1780 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1781 we know that oprnd1 is the reduction variable (defined by a loop-header
1782 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1783 Left to check that oprnd0 is defined by a cast from type 'type' to type
1786 vect_unpromoted_value unprom0
;
1787 if (!vect_look_through_possible_promotion (vinfo
, oprnd0
, &unprom0
)
1788 || TYPE_PRECISION (unprom0
.type
) * 2 > TYPE_PRECISION (type
))
1791 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt
);
1793 if (!vect_supportable_direct_optab_p (vinfo
, type
, WIDEN_SUM_EXPR
,
1794 unprom0
.type
, type_out
))
1797 var
= vect_recog_temp_ssa_var (type
, NULL
);
1798 pattern_stmt
= gimple_build_assign (var
, WIDEN_SUM_EXPR
, unprom0
.op
, oprnd1
);
1800 return pattern_stmt
;
1803 /* Recognize cases in which an operation is performed in one type WTYPE
1804 but could be done more efficiently in a narrower type NTYPE. For example,
1807 ATYPE a; // narrower than NTYPE
1808 BTYPE b; // narrower than NTYPE
1809 WTYPE aw = (WTYPE) a;
1810 WTYPE bw = (WTYPE) b;
1811 WTYPE res = aw + bw; // only uses of aw and bw
1813 then it would be more efficient to do:
1815 NTYPE an = (NTYPE) a;
1816 NTYPE bn = (NTYPE) b;
1817 NTYPE resn = an + bn;
1818 WTYPE res = (WTYPE) resn;
1820 Other situations include things like:
1822 ATYPE a; // NTYPE or narrower
1823 WTYPE aw = (WTYPE) a;
1826 when only "(NTYPE) res" is significant. In that case it's more efficient
1827 to truncate "b" and do the operation on NTYPE instead:
1829 NTYPE an = (NTYPE) a;
1830 NTYPE bn = (NTYPE) b; // truncation
1831 NTYPE resn = an + bn;
1832 WTYPE res = (WTYPE) resn;
1834 All users of "res" should then use "resn" instead, making the final
1835 statement dead (not marked as relevant). The final statement is still
1836 needed to maintain the type correctness of the IR.
1838 vect_determine_precisions has already determined the minimum
1839 precison of the operation and the minimum precision required
1840 by users of the result. */
1843 vect_recog_over_widening_pattern (vec_info
*vinfo
,
1844 stmt_vec_info last_stmt_info
, tree
*type_out
)
1846 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
1850 /* See whether we have found that this operation can be done on a
1851 narrower type without changing its semantics. */
1852 unsigned int new_precision
= last_stmt_info
->operation_precision
;
1856 tree lhs
= gimple_assign_lhs (last_stmt
);
1857 tree type
= TREE_TYPE (lhs
);
1858 tree_code code
= gimple_assign_rhs_code (last_stmt
);
1860 /* Punt for reductions where we don't handle the type conversions. */
1861 if (STMT_VINFO_DEF_TYPE (last_stmt_info
) == vect_reduction_def
)
1864 /* Keep the first operand of a COND_EXPR as-is: only the other two
1865 operands are interesting. */
1866 unsigned int first_op
= (code
== COND_EXPR
? 2 : 1);
1868 /* Check the operands. */
1869 unsigned int nops
= gimple_num_ops (last_stmt
) - first_op
;
1870 auto_vec
<vect_unpromoted_value
, 3> unprom (nops
);
1871 unprom
.quick_grow (nops
);
1872 unsigned int min_precision
= 0;
1873 bool single_use_p
= false;
1874 for (unsigned int i
= 0; i
< nops
; ++i
)
1876 tree op
= gimple_op (last_stmt
, first_op
+ i
);
1877 if (TREE_CODE (op
) == INTEGER_CST
)
1878 unprom
[i
].set_op (op
, vect_constant_def
);
1879 else if (TREE_CODE (op
) == SSA_NAME
)
1881 bool op_single_use_p
= true;
1882 if (!vect_look_through_possible_promotion (vinfo
, op
, &unprom
[i
],
1887 (1) N bits of the result are needed;
1888 (2) all inputs are widened from M<N bits; and
1889 (3) one operand OP is a single-use SSA name
1891 we can shift the M->N widening from OP to the output
1892 without changing the number or type of extensions involved.
1893 This then reduces the number of copies of STMT_INFO.
1895 If instead of (3) more than one operand is a single-use SSA name,
1896 shifting the extension to the output is even more of a win.
1900 (1) N bits of the result are needed;
1901 (2) one operand OP2 is widened from M2<N bits;
1902 (3) another operand OP1 is widened from M1<M2 bits; and
1903 (4) both OP1 and OP2 are single-use
1905 the choice is between:
1907 (a) truncating OP2 to M1, doing the operation on M1,
1908 and then widening the result to N
1910 (b) widening OP1 to M2, doing the operation on M2, and then
1911 widening the result to N
1913 Both shift the M2->N widening of the inputs to the output.
1914 (a) additionally shifts the M1->M2 widening to the output;
1915 it requires fewer copies of STMT_INFO but requires an extra
1918 Which is better will depend on the complexity and cost of
1919 STMT_INFO, which is hard to predict at this stage. However,
1920 a clear tie-breaker in favor of (b) is the fact that the
1921 truncation in (a) increases the length of the operation chain.
1923 If instead of (4) only one of OP1 or OP2 is single-use,
1924 (b) is still a win over doing the operation in N bits:
1925 it still shifts the M2->N widening on the single-use operand
1926 to the output and reduces the number of STMT_INFO copies.
1928 If neither operand is single-use then operating on fewer than
1929 N bits might lead to more extensions overall. Whether it does
1930 or not depends on global information about the vectorization
1931 region, and whether that's a good trade-off would again
1932 depend on the complexity and cost of the statements involved,
1933 as well as things like register pressure that are not normally
1934 modelled at this stage. We therefore ignore these cases
1935 and just optimize the clear single-use wins above.
1937 Thus we take the maximum precision of the unpromoted operands
1938 and record whether any operand is single-use. */
1939 if (unprom
[i
].dt
== vect_internal_def
)
1941 min_precision
= MAX (min_precision
,
1942 TYPE_PRECISION (unprom
[i
].type
));
1943 single_use_p
|= op_single_use_p
;
1950 /* Although the operation could be done in operation_precision, we have
1951 to balance that against introducing extra truncations or extensions.
1952 Calculate the minimum precision that can be handled efficiently.
1954 The loop above determined that the operation could be handled
1955 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1956 extension from the inputs to the output without introducing more
1957 instructions, and would reduce the number of instructions required
1958 for STMT_INFO itself.
1960 vect_determine_precisions has also determined that the result only
1961 needs min_output_precision bits. Truncating by a factor of N times
1962 requires a tree of N - 1 instructions, so if TYPE is N times wider
1963 than min_output_precision, doing the operation in TYPE and truncating
1964 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1967 - truncating the input to a unary operation and doing the operation
1968 in the new type requires at most N - 1 + 1 = N instructions per
1971 - doing the same for a binary operation requires at most
1972 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1974 Both unary and binary operations require fewer instructions than
1975 this if the operands were extended from a suitable truncated form.
1976 Thus there is usually nothing to lose by doing operations in
1977 min_output_precision bits, but there can be something to gain. */
1979 min_precision
= last_stmt_info
->min_output_precision
;
1981 min_precision
= MIN (min_precision
, last_stmt_info
->min_output_precision
);
1983 /* Apply the minimum efficient precision we just calculated. */
1984 if (new_precision
< min_precision
)
1985 new_precision
= min_precision
;
1986 new_precision
= vect_element_precision (new_precision
);
1987 if (new_precision
>= TYPE_PRECISION (type
))
1990 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt
);
1992 *type_out
= get_vectype_for_scalar_type (vinfo
, type
);
1996 /* We've found a viable pattern. Get the new type of the operation. */
1997 bool unsigned_p
= (last_stmt_info
->operation_sign
== UNSIGNED
);
1998 tree new_type
= build_nonstandard_integer_type (new_precision
, unsigned_p
);
2000 /* If we're truncating an operation, we need to make sure that we
2001 don't introduce new undefined overflow. The codes tested here are
2002 a subset of those accepted by vect_truncatable_operation_p. */
2003 tree op_type
= new_type
;
2004 if (TYPE_OVERFLOW_UNDEFINED (new_type
)
2005 && (code
== PLUS_EXPR
|| code
== MINUS_EXPR
|| code
== MULT_EXPR
))
2006 op_type
= build_nonstandard_integer_type (new_precision
, true);
2008 /* We specifically don't check here whether the target supports the
2009 new operation, since it might be something that a later pattern
2010 wants to rewrite anyway. If targets have a minimum element size
2011 for some optabs, we should pattern-match smaller ops to larger ops
2012 where beneficial. */
2013 tree new_vectype
= get_vectype_for_scalar_type (vinfo
, new_type
);
2014 tree op_vectype
= get_vectype_for_scalar_type (vinfo
, op_type
);
2015 if (!new_vectype
|| !op_vectype
)
2018 if (dump_enabled_p ())
2019 dump_printf_loc (MSG_NOTE
, vect_location
, "demoting %T to %T\n",
2022 /* Calculate the rhs operands for an operation on OP_TYPE. */
2024 for (unsigned int i
= 1; i
< first_op
; ++i
)
2025 ops
[i
- 1] = gimple_op (last_stmt
, i
);
2026 vect_convert_inputs (vinfo
, last_stmt_info
, nops
, &ops
[first_op
- 1],
2027 op_type
, &unprom
[0], op_vectype
);
2029 /* Use the operation to produce a result of type OP_TYPE. */
2030 tree new_var
= vect_recog_temp_ssa_var (op_type
, NULL
);
2031 gimple
*pattern_stmt
= gimple_build_assign (new_var
, code
,
2032 ops
[0], ops
[1], ops
[2]);
2033 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
2035 if (dump_enabled_p ())
2036 dump_printf_loc (MSG_NOTE
, vect_location
,
2037 "created pattern stmt: %G", pattern_stmt
);
2039 /* Convert back to the original signedness, if OP_TYPE is different
2041 if (op_type
!= new_type
)
2042 pattern_stmt
= vect_convert_output (vinfo
, last_stmt_info
, new_type
,
2043 pattern_stmt
, op_vectype
);
2045 /* Promote the result to the original type. */
2046 pattern_stmt
= vect_convert_output (vinfo
, last_stmt_info
, type
,
2047 pattern_stmt
, new_vectype
);
2049 return pattern_stmt
;
2052 /* Recognize the following patterns:
2054 ATYPE a; // narrower than TYPE
2055 BTYPE b; // narrower than TYPE
2057 1) Multiply high with scaling
2058 TYPE res = ((TYPE) a * (TYPE) b) >> c;
2059 Here, c is bitsize (TYPE) / 2 - 1.
2061 2) ... or also with rounding
2062 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
2063 Here, d is bitsize (TYPE) / 2 - 2.
2065 3) Normal multiply high
2066 TYPE res = ((TYPE) a * (TYPE) b) >> e;
2067 Here, e is bitsize (TYPE) / 2.
2069 where only the bottom half of res is used. */
2072 vect_recog_mulhs_pattern (vec_info
*vinfo
,
2073 stmt_vec_info last_stmt_info
, tree
*type_out
)
2075 /* Check for a right shift. */
2076 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
2078 || gimple_assign_rhs_code (last_stmt
) != RSHIFT_EXPR
)
2081 /* Check that the shift result is wider than the users of the
2082 result need (i.e. that narrowing would be a natural choice). */
2083 tree lhs_type
= TREE_TYPE (gimple_assign_lhs (last_stmt
));
2084 unsigned int target_precision
2085 = vect_element_precision (last_stmt_info
->min_output_precision
);
2086 if (!INTEGRAL_TYPE_P (lhs_type
)
2087 || target_precision
>= TYPE_PRECISION (lhs_type
))
2090 /* Look through any change in sign on the outer shift input. */
2091 vect_unpromoted_value unprom_rshift_input
;
2092 tree rshift_input
= vect_look_through_possible_promotion
2093 (vinfo
, gimple_assign_rhs1 (last_stmt
), &unprom_rshift_input
);
2095 || TYPE_PRECISION (TREE_TYPE (rshift_input
))
2096 != TYPE_PRECISION (lhs_type
))
2099 /* Get the definition of the shift input. */
2100 stmt_vec_info rshift_input_stmt_info
2101 = vect_get_internal_def (vinfo
, rshift_input
);
2102 if (!rshift_input_stmt_info
)
2104 gassign
*rshift_input_stmt
2105 = dyn_cast
<gassign
*> (rshift_input_stmt_info
->stmt
);
2106 if (!rshift_input_stmt
)
2109 stmt_vec_info mulh_stmt_info
;
2111 bool rounding_p
= false;
2113 /* Check for the presence of the rounding term. */
2114 if (gimple_assign_rhs_code (rshift_input_stmt
) == PLUS_EXPR
)
2116 /* Check that the outer shift was by 1. */
2117 if (!integer_onep (gimple_assign_rhs2 (last_stmt
)))
2120 /* Check that the second operand of the PLUS_EXPR is 1. */
2121 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt
)))
2124 /* Look through any change in sign on the addition input. */
2125 vect_unpromoted_value unprom_plus_input
;
2126 tree plus_input
= vect_look_through_possible_promotion
2127 (vinfo
, gimple_assign_rhs1 (rshift_input_stmt
), &unprom_plus_input
);
2129 || TYPE_PRECISION (TREE_TYPE (plus_input
))
2130 != TYPE_PRECISION (TREE_TYPE (rshift_input
)))
2133 /* Get the definition of the multiply-high-scale part. */
2134 stmt_vec_info plus_input_stmt_info
2135 = vect_get_internal_def (vinfo
, plus_input
);
2136 if (!plus_input_stmt_info
)
2138 gassign
*plus_input_stmt
2139 = dyn_cast
<gassign
*> (plus_input_stmt_info
->stmt
);
2140 if (!plus_input_stmt
2141 || gimple_assign_rhs_code (plus_input_stmt
) != RSHIFT_EXPR
)
2144 /* Look through any change in sign on the scaling input. */
2145 vect_unpromoted_value unprom_scale_input
;
2146 tree scale_input
= vect_look_through_possible_promotion
2147 (vinfo
, gimple_assign_rhs1 (plus_input_stmt
), &unprom_scale_input
);
2149 || TYPE_PRECISION (TREE_TYPE (scale_input
))
2150 != TYPE_PRECISION (TREE_TYPE (plus_input
)))
2153 /* Get the definition of the multiply-high part. */
2154 mulh_stmt_info
= vect_get_internal_def (vinfo
, scale_input
);
2155 if (!mulh_stmt_info
)
2158 /* Get the scaling term. */
2159 scale_term
= gimple_assign_rhs2 (plus_input_stmt
);
2164 mulh_stmt_info
= rshift_input_stmt_info
;
2165 scale_term
= gimple_assign_rhs2 (last_stmt
);
2168 /* Check that the scaling factor is constant. */
2169 if (TREE_CODE (scale_term
) != INTEGER_CST
)
2172 /* Check whether the scaling input term can be seen as two widened
2173 inputs multiplied together. */
2174 vect_unpromoted_value unprom_mult
[2];
2177 = vect_widened_op_tree (vinfo
, mulh_stmt_info
, MULT_EXPR
, WIDEN_MULT_EXPR
,
2178 false, 2, unprom_mult
, &new_type
);
2182 /* Adjust output precision. */
2183 if (TYPE_PRECISION (new_type
) < target_precision
)
2184 new_type
= build_nonstandard_integer_type
2185 (target_precision
, TYPE_UNSIGNED (new_type
));
2187 unsigned mult_precision
= TYPE_PRECISION (new_type
);
2189 /* Check that the scaling factor is expected. Instead of
2190 target_precision, we should use the one that we actually
2191 use for internal function. */
2194 /* Check pattern 2). */
2195 if (wi::to_widest (scale_term
) + mult_precision
+ 2
2196 != TYPE_PRECISION (lhs_type
))
2203 /* Check for pattern 1). */
2204 if (wi::to_widest (scale_term
) + mult_precision
+ 1
2205 == TYPE_PRECISION (lhs_type
))
2207 /* Check for pattern 3). */
2208 else if (wi::to_widest (scale_term
) + mult_precision
2209 == TYPE_PRECISION (lhs_type
))
2215 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt
);
2217 /* Check for target support. */
2218 tree new_vectype
= get_vectype_for_scalar_type (vinfo
, new_type
);
2220 || !direct_internal_fn_supported_p
2221 (ifn
, new_vectype
, OPTIMIZE_FOR_SPEED
))
2224 /* The IR requires a valid vector type for the cast result, even though
2225 it's likely to be discarded. */
2226 *type_out
= get_vectype_for_scalar_type (vinfo
, lhs_type
);
2230 /* Generate the IFN_MULHRS call. */
2231 tree new_var
= vect_recog_temp_ssa_var (new_type
, NULL
);
2233 vect_convert_inputs (vinfo
, last_stmt_info
, 2, new_ops
, new_type
,
2234 unprom_mult
, new_vectype
);
2236 = gimple_build_call_internal (ifn
, 2, new_ops
[0], new_ops
[1]);
2237 gimple_call_set_lhs (mulhrs_stmt
, new_var
);
2238 gimple_set_location (mulhrs_stmt
, gimple_location (last_stmt
));
2240 if (dump_enabled_p ())
2241 dump_printf_loc (MSG_NOTE
, vect_location
,
2242 "created pattern stmt: %G", mulhrs_stmt
);
2244 return vect_convert_output (vinfo
, last_stmt_info
, lhs_type
,
2245 mulhrs_stmt
, new_vectype
);
2248 /* Recognize the patterns:
2250 ATYPE a; // narrower than TYPE
2251 BTYPE b; // narrower than TYPE
2252 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
2253 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
2255 where only the bottom half of avg is used. Try to transform them into:
2257 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
2258 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
2262 TYPE avg = (TYPE) avg';
2264 where NTYPE is no wider than half of TYPE. Since only the bottom half
2265 of avg is used, all or part of the cast of avg' should become redundant.
2267 If there is no target support available, generate code to distribute rshift
2268 over plus and add a carry. */
2271 vect_recog_average_pattern (vec_info
*vinfo
,
2272 stmt_vec_info last_stmt_info
, tree
*type_out
)
2274 /* Check for a shift right by one bit. */
2275 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
2277 || gimple_assign_rhs_code (last_stmt
) != RSHIFT_EXPR
2278 || !integer_onep (gimple_assign_rhs2 (last_stmt
)))
2281 /* Check that the shift result is wider than the users of the
2282 result need (i.e. that narrowing would be a natural choice). */
2283 tree lhs
= gimple_assign_lhs (last_stmt
);
2284 tree type
= TREE_TYPE (lhs
);
2285 unsigned int target_precision
2286 = vect_element_precision (last_stmt_info
->min_output_precision
);
2287 if (!INTEGRAL_TYPE_P (type
) || target_precision
>= TYPE_PRECISION (type
))
2290 /* Look through any change in sign on the shift input. */
2291 tree rshift_rhs
= gimple_assign_rhs1 (last_stmt
);
2292 vect_unpromoted_value unprom_plus
;
2293 rshift_rhs
= vect_look_through_possible_promotion (vinfo
, rshift_rhs
,
2296 || TYPE_PRECISION (TREE_TYPE (rshift_rhs
)) != TYPE_PRECISION (type
))
2299 /* Get the definition of the shift input. */
2300 stmt_vec_info plus_stmt_info
= vect_get_internal_def (vinfo
, rshift_rhs
);
2301 if (!plus_stmt_info
)
2304 /* Check whether the shift input can be seen as a tree of additions on
2305 2 or 3 widened inputs.
2307 Note that the pattern should be a win even if the result of one or
2308 more additions is reused elsewhere: if the pattern matches, we'd be
2309 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
2310 internal_fn ifn
= IFN_AVG_FLOOR
;
2311 vect_unpromoted_value unprom
[3];
2313 unsigned int nops
= vect_widened_op_tree (vinfo
, plus_stmt_info
, PLUS_EXPR
,
2314 WIDEN_PLUS_EXPR
, false, 3,
2320 /* Check that one operand is 1. */
2322 for (i
= 0; i
< 3; ++i
)
2323 if (integer_onep (unprom
[i
].op
))
2327 /* Throw away the 1 operand and keep the other two. */
2329 unprom
[i
] = unprom
[2];
2333 vect_pattern_detected ("vect_recog_average_pattern", last_stmt
);
2337 (a) the operation can be viewed as:
2339 TYPE widened0 = (TYPE) UNPROM[0];
2340 TYPE widened1 = (TYPE) UNPROM[1];
2341 TYPE tmp1 = widened0 + widened1 {+ 1};
2342 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
2344 (b) the first two statements are equivalent to:
2346 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
2347 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
2349 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
2352 (d) all the operations can be performed correctly at twice the width of
2353 NEW_TYPE, due to the nature of the average operation; and
2355 (e) users of the result of the right shift need only TARGET_PRECISION
2356 bits, where TARGET_PRECISION is no more than half of TYPE's
2359 Under these circumstances, the only situation in which NEW_TYPE
2360 could be narrower than TARGET_PRECISION is if widened0, widened1
2361 and an addition result are all used more than once. Thus we can
2362 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
2363 as "free", whereas widening the result of the average instruction
2364 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
2365 therefore better not to go narrower than TARGET_PRECISION. */
2366 if (TYPE_PRECISION (new_type
) < target_precision
)
2367 new_type
= build_nonstandard_integer_type (target_precision
,
2368 TYPE_UNSIGNED (new_type
));
2370 /* Check for target support. */
2371 tree new_vectype
= get_vectype_for_scalar_type (vinfo
, new_type
);
2375 bool fallback_p
= false;
2377 if (direct_internal_fn_supported_p (ifn
, new_vectype
, OPTIMIZE_FOR_SPEED
))
2379 else if (TYPE_UNSIGNED (new_type
)
2380 && optab_for_tree_code (RSHIFT_EXPR
, new_vectype
, optab_scalar
)
2381 && optab_for_tree_code (PLUS_EXPR
, new_vectype
, optab_default
)
2382 && optab_for_tree_code (BIT_IOR_EXPR
, new_vectype
, optab_default
)
2383 && optab_for_tree_code (BIT_AND_EXPR
, new_vectype
, optab_default
))
2388 /* The IR requires a valid vector type for the cast result, even though
2389 it's likely to be discarded. */
2390 *type_out
= get_vectype_for_scalar_type (vinfo
, type
);
2394 tree new_var
= vect_recog_temp_ssa_var (new_type
, NULL
);
2396 vect_convert_inputs (vinfo
, last_stmt_info
, 2, new_ops
, new_type
,
2397 unprom
, new_vectype
);
2401 /* As a fallback, generate code for following sequence:
2403 shifted_op0 = new_ops[0] >> 1;
2404 shifted_op1 = new_ops[1] >> 1;
2405 sum_of_shifted = shifted_op0 + shifted_op1;
2406 unmasked_carry = new_ops[0] and/or new_ops[1];
2407 carry = unmasked_carry & 1;
2408 new_var = sum_of_shifted + carry;
2411 tree one_cst
= build_one_cst (new_type
);
2414 tree shifted_op0
= vect_recog_temp_ssa_var (new_type
, NULL
);
2415 g
= gimple_build_assign (shifted_op0
, RSHIFT_EXPR
, new_ops
[0], one_cst
);
2416 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
2418 tree shifted_op1
= vect_recog_temp_ssa_var (new_type
, NULL
);
2419 g
= gimple_build_assign (shifted_op1
, RSHIFT_EXPR
, new_ops
[1], one_cst
);
2420 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
2422 tree sum_of_shifted
= vect_recog_temp_ssa_var (new_type
, NULL
);
2423 g
= gimple_build_assign (sum_of_shifted
, PLUS_EXPR
,
2424 shifted_op0
, shifted_op1
);
2425 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
2427 tree unmasked_carry
= vect_recog_temp_ssa_var (new_type
, NULL
);
2428 tree_code c
= (ifn
== IFN_AVG_CEIL
) ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
2429 g
= gimple_build_assign (unmasked_carry
, c
, new_ops
[0], new_ops
[1]);
2430 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
2432 tree carry
= vect_recog_temp_ssa_var (new_type
, NULL
);
2433 g
= gimple_build_assign (carry
, BIT_AND_EXPR
, unmasked_carry
, one_cst
);
2434 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
2436 g
= gimple_build_assign (new_var
, PLUS_EXPR
, sum_of_shifted
, carry
);
2437 return vect_convert_output (vinfo
, last_stmt_info
, type
, g
, new_vectype
);
2440 /* Generate the IFN_AVG* call. */
2441 gcall
*average_stmt
= gimple_build_call_internal (ifn
, 2, new_ops
[0],
2443 gimple_call_set_lhs (average_stmt
, new_var
);
2444 gimple_set_location (average_stmt
, gimple_location (last_stmt
));
2446 if (dump_enabled_p ())
2447 dump_printf_loc (MSG_NOTE
, vect_location
,
2448 "created pattern stmt: %G", average_stmt
);
2450 return vect_convert_output (vinfo
, last_stmt_info
,
2451 type
, average_stmt
, new_vectype
);
2454 /* Recognize cases in which the input to a cast is wider than its
2455 output, and the input is fed by a widening operation. Fold this
2456 by removing the unnecessary intermediate widening. E.g.:
2459 unsigned int b = (unsigned int) a;
2460 unsigned short c = (unsigned short) b;
2464 unsigned short c = (unsigned short) a;
2466 Although this is rare in input IR, it is an expected side-effect
2467 of the over-widening pattern above.
2469 This is beneficial also for integer-to-float conversions, if the
2470 widened integer has more bits than the float, and if the unwidened
2474 vect_recog_cast_forwprop_pattern (vec_info
*vinfo
,
2475 stmt_vec_info last_stmt_info
, tree
*type_out
)
2477 /* Check for a cast, including an integer-to-float conversion. */
2478 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
2481 tree_code code
= gimple_assign_rhs_code (last_stmt
);
2482 if (!CONVERT_EXPR_CODE_P (code
) && code
!= FLOAT_EXPR
)
2485 /* Make sure that the rhs is a scalar with a natural bitsize. */
2486 tree lhs
= gimple_assign_lhs (last_stmt
);
2489 tree lhs_type
= TREE_TYPE (lhs
);
2490 scalar_mode lhs_mode
;
2491 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type
)
2492 || !is_a
<scalar_mode
> (TYPE_MODE (lhs_type
), &lhs_mode
))
2495 /* Check for a narrowing operation (from a vector point of view). */
2496 tree rhs
= gimple_assign_rhs1 (last_stmt
);
2497 tree rhs_type
= TREE_TYPE (rhs
);
2498 if (!INTEGRAL_TYPE_P (rhs_type
)
2499 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type
)
2500 || TYPE_PRECISION (rhs_type
) <= GET_MODE_BITSIZE (lhs_mode
))
2503 /* Try to find an unpromoted input. */
2504 vect_unpromoted_value unprom
;
2505 if (!vect_look_through_possible_promotion (vinfo
, rhs
, &unprom
)
2506 || TYPE_PRECISION (unprom
.type
) >= TYPE_PRECISION (rhs_type
))
2509 /* If the bits above RHS_TYPE matter, make sure that they're the
2510 same when extending from UNPROM as they are when extending from RHS. */
2511 if (!INTEGRAL_TYPE_P (lhs_type
)
2512 && TYPE_SIGN (rhs_type
) != TYPE_SIGN (unprom
.type
))
2515 /* We can get the same result by casting UNPROM directly, to avoid
2516 the unnecessary widening and narrowing. */
2517 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt
);
2519 *type_out
= get_vectype_for_scalar_type (vinfo
, lhs_type
);
2523 tree new_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
2524 gimple
*pattern_stmt
= gimple_build_assign (new_var
, code
, unprom
.op
);
2525 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
2527 return pattern_stmt
;
2530 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
2531 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
2534 vect_recog_widen_shift_pattern (vec_info
*vinfo
,
2535 stmt_vec_info last_stmt_info
, tree
*type_out
)
2537 return vect_recog_widen_op_pattern (vinfo
, last_stmt_info
, type_out
,
2538 LSHIFT_EXPR
, WIDEN_LSHIFT_EXPR
, true,
2539 "vect_recog_widen_shift_pattern");
2542 /* Detect a rotate pattern wouldn't be otherwise vectorized:
2546 S0 a_t = b_t r<< c_t;
2550 * STMT_VINFO: The stmt from which the pattern search begins,
2551 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
2555 S2 e_t = d_t & (B - 1);
2556 S3 f_t = b_t << c_t;
2557 S4 g_t = b_t >> e_t;
2560 where B is element bitsize of type.
2564 * TYPE_OUT: The type of the output of this pattern.
2566 * Return value: A new stmt that will be used to replace the rotate
2570 vect_recog_rotate_pattern (vec_info
*vinfo
,
2571 stmt_vec_info stmt_vinfo
, tree
*type_out
)
2573 gimple
*last_stmt
= stmt_vinfo
->stmt
;
2574 tree oprnd0
, oprnd1
, lhs
, var
, var1
, var2
, vectype
, type
, stype
, def
, def2
;
2575 gimple
*pattern_stmt
, *def_stmt
;
2576 enum tree_code rhs_code
;
2577 enum vect_def_type dt
;
2578 optab optab1
, optab2
;
2579 edge ext_def
= NULL
;
2580 bool bswap16_p
= false;
2582 if (is_gimple_assign (last_stmt
))
2584 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2594 lhs
= gimple_assign_lhs (last_stmt
);
2595 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2596 type
= TREE_TYPE (oprnd0
);
2597 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2599 else if (gimple_call_builtin_p (last_stmt
, BUILT_IN_BSWAP16
))
2601 /* __builtin_bswap16 (x) is another form of x r>> 8.
2602 The vectorizer has bswap support, but only if the argument isn't
2604 lhs
= gimple_call_lhs (last_stmt
);
2605 oprnd0
= gimple_call_arg (last_stmt
, 0);
2606 type
= TREE_TYPE (oprnd0
);
2608 || TYPE_PRECISION (TREE_TYPE (lhs
)) != 16
2609 || TYPE_PRECISION (type
) <= 16
2610 || TREE_CODE (oprnd0
) != SSA_NAME
2611 || BITS_PER_UNIT
!= 8
2612 || !TYPE_UNSIGNED (TREE_TYPE (lhs
)))
2615 stmt_vec_info def_stmt_info
;
2616 if (!vect_is_simple_use (oprnd0
, vinfo
, &dt
, &def_stmt_info
, &def_stmt
))
2619 if (dt
!= vect_internal_def
)
2622 if (gimple_assign_cast_p (def_stmt
))
2624 def
= gimple_assign_rhs1 (def_stmt
);
2625 if (INTEGRAL_TYPE_P (TREE_TYPE (def
))
2626 && TYPE_PRECISION (TREE_TYPE (def
)) == 16)
2630 type
= TREE_TYPE (lhs
);
2631 vectype
= get_vectype_for_scalar_type (vinfo
, type
);
2632 if (vectype
== NULL_TREE
)
2635 if (tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype
))
2637 /* The encoding uses one stepped pattern for each byte in the
2639 vec_perm_builder
elts (TYPE_VECTOR_SUBPARTS (char_vectype
), 2, 3);
2640 for (unsigned i
= 0; i
< 3; ++i
)
2641 for (unsigned j
= 0; j
< 2; ++j
)
2642 elts
.quick_push ((i
+ 1) * 2 - j
- 1);
2644 vec_perm_indices
indices (elts
, 1,
2645 TYPE_VECTOR_SUBPARTS (char_vectype
));
2646 if (can_vec_perm_const_p (TYPE_MODE (char_vectype
), indices
))
2648 /* vectorizable_bswap can handle the __builtin_bswap16 if we
2649 undo the argument promotion. */
2650 if (!useless_type_conversion_p (type
, TREE_TYPE (oprnd0
)))
2652 def
= vect_recog_temp_ssa_var (type
, NULL
);
2653 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd0
);
2654 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
2658 /* Pattern detected. */
2659 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt
);
2661 *type_out
= vectype
;
2663 /* Pattern supported. Create a stmt to be used to replace the
2664 pattern, with the unpromoted argument. */
2665 var
= vect_recog_temp_ssa_var (type
, NULL
);
2666 pattern_stmt
= gimple_build_call (gimple_call_fndecl (last_stmt
),
2668 gimple_call_set_lhs (pattern_stmt
, var
);
2669 gimple_call_set_fntype (as_a
<gcall
*> (pattern_stmt
),
2670 gimple_call_fntype (last_stmt
));
2671 return pattern_stmt
;
2675 oprnd1
= build_int_cst (integer_type_node
, 8);
2676 rhs_code
= LROTATE_EXPR
;
2682 if (TREE_CODE (oprnd0
) != SSA_NAME
2683 || TYPE_PRECISION (TREE_TYPE (lhs
)) != TYPE_PRECISION (type
)
2684 || !INTEGRAL_TYPE_P (type
)
2685 || !TYPE_UNSIGNED (type
))
2688 stmt_vec_info def_stmt_info
;
2689 if (!vect_is_simple_use (oprnd1
, vinfo
, &dt
, &def_stmt_info
, &def_stmt
))
2692 if (dt
!= vect_internal_def
2693 && dt
!= vect_constant_def
2694 && dt
!= vect_external_def
)
2697 vectype
= get_vectype_for_scalar_type (vinfo
, type
);
2698 if (vectype
== NULL_TREE
)
2701 /* If vector/vector or vector/scalar rotate is supported by the target,
2702 don't do anything here. */
2703 optab1
= optab_for_tree_code (rhs_code
, vectype
, optab_vector
);
2705 && optab_handler (optab1
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
2710 if (!useless_type_conversion_p (type
, TREE_TYPE (oprnd0
)))
2712 def
= vect_recog_temp_ssa_var (type
, NULL
);
2713 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd0
);
2714 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
2718 /* Pattern detected. */
2719 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt
);
2721 *type_out
= vectype
;
2723 /* Pattern supported. Create a stmt to be used to replace the
2725 var
= vect_recog_temp_ssa_var (type
, NULL
);
2726 pattern_stmt
= gimple_build_assign (var
, LROTATE_EXPR
, oprnd0
,
2728 return pattern_stmt
;
2733 if (is_a
<bb_vec_info
> (vinfo
) || dt
!= vect_internal_def
)
2735 optab2
= optab_for_tree_code (rhs_code
, vectype
, optab_scalar
);
2737 && optab_handler (optab2
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
2741 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2742 don't do anything here either. */
2743 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_vector
);
2744 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_vector
);
2746 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
2748 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
2750 if (! is_a
<bb_vec_info
> (vinfo
) && dt
== vect_internal_def
)
2752 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_scalar
);
2753 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_scalar
);
2755 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
2757 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
2761 *type_out
= vectype
;
2763 if (bswap16_p
&& !useless_type_conversion_p (type
, TREE_TYPE (oprnd0
)))
2765 def
= vect_recog_temp_ssa_var (type
, NULL
);
2766 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd0
);
2767 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
2771 if (dt
== vect_external_def
&& TREE_CODE (oprnd1
) == SSA_NAME
)
2772 ext_def
= vect_get_external_def_edge (vinfo
, oprnd1
);
2775 scalar_int_mode mode
= SCALAR_INT_TYPE_MODE (type
);
2776 if (dt
!= vect_internal_def
|| TYPE_MODE (TREE_TYPE (oprnd1
)) == mode
)
2778 else if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
2780 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
2781 if (TYPE_MODE (TREE_TYPE (rhs1
)) == mode
2782 && TYPE_PRECISION (TREE_TYPE (rhs1
))
2783 == TYPE_PRECISION (type
))
2787 if (def
== NULL_TREE
)
2789 def
= vect_recog_temp_ssa_var (type
, NULL
);
2790 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
2791 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
2793 stype
= TREE_TYPE (def
);
2795 if (TREE_CODE (def
) == INTEGER_CST
)
2797 if (!tree_fits_uhwi_p (def
)
2798 || tree_to_uhwi (def
) >= GET_MODE_PRECISION (mode
)
2799 || integer_zerop (def
))
2801 def2
= build_int_cst (stype
,
2802 GET_MODE_PRECISION (mode
) - tree_to_uhwi (def
));
2806 tree vecstype
= get_vectype_for_scalar_type (vinfo
, stype
);
2808 if (vecstype
== NULL_TREE
)
2810 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
2811 def_stmt
= gimple_build_assign (def2
, NEGATE_EXPR
, def
);
2815 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
2816 gcc_assert (!new_bb
);
2819 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecstype
);
2821 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
2822 tree mask
= build_int_cst (stype
, GET_MODE_PRECISION (mode
) - 1);
2823 def_stmt
= gimple_build_assign (def2
, BIT_AND_EXPR
,
2824 gimple_assign_lhs (def_stmt
), mask
);
2828 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
2829 gcc_assert (!new_bb
);
2832 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecstype
);
2835 var1
= vect_recog_temp_ssa_var (type
, NULL
);
2836 def_stmt
= gimple_build_assign (var1
, rhs_code
== LROTATE_EXPR
2837 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
2839 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
2841 var2
= vect_recog_temp_ssa_var (type
, NULL
);
2842 def_stmt
= gimple_build_assign (var2
, rhs_code
== LROTATE_EXPR
2843 ? RSHIFT_EXPR
: LSHIFT_EXPR
,
2845 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
2847 /* Pattern detected. */
2848 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt
);
2850 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2851 var
= vect_recog_temp_ssa_var (type
, NULL
);
2852 pattern_stmt
= gimple_build_assign (var
, BIT_IOR_EXPR
, var1
, var2
);
2854 return pattern_stmt
;
2857 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2865 S3 res_T = b_T op a_t;
2867 where type 'TYPE' is a type with different size than 'type',
2868 and op is <<, >> or rotate.
2873 TYPE b_T, c_T, res_T;
2876 S1 a_t = (type) c_T;
2878 S3 res_T = b_T op a_t;
2882 * STMT_VINFO: The stmt from which the pattern search begins,
2883 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2884 with a shift/rotate which has same type on both operands, in the
2885 second case just b_T op c_T, in the first case with added cast
2886 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2890 * TYPE_OUT: The type of the output of this pattern.
2892 * Return value: A new stmt that will be used to replace the shift/rotate
2896 vect_recog_vector_vector_shift_pattern (vec_info
*vinfo
,
2897 stmt_vec_info stmt_vinfo
,
2900 gimple
*last_stmt
= stmt_vinfo
->stmt
;
2901 tree oprnd0
, oprnd1
, lhs
, var
;
2902 gimple
*pattern_stmt
;
2903 enum tree_code rhs_code
;
2905 if (!is_gimple_assign (last_stmt
))
2908 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2920 lhs
= gimple_assign_lhs (last_stmt
);
2921 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2922 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2923 if (TREE_CODE (oprnd0
) != SSA_NAME
2924 || TREE_CODE (oprnd1
) != SSA_NAME
2925 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
2926 || !type_has_mode_precision_p (TREE_TYPE (oprnd1
))
2927 || TYPE_PRECISION (TREE_TYPE (lhs
))
2928 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2931 stmt_vec_info def_vinfo
= vect_get_internal_def (vinfo
, oprnd1
);
2935 *type_out
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (oprnd0
));
2936 if (*type_out
== NULL_TREE
)
2939 tree def
= NULL_TREE
;
2940 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_vinfo
->stmt
);
2941 if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
2943 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
2944 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
2945 && TYPE_PRECISION (TREE_TYPE (rhs1
))
2946 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2948 if (TYPE_PRECISION (TREE_TYPE (oprnd1
))
2949 >= TYPE_PRECISION (TREE_TYPE (rhs1
)))
2954 = build_low_bits_mask (TREE_TYPE (rhs1
),
2955 TYPE_PRECISION (TREE_TYPE (oprnd1
)));
2956 def
= vect_recog_temp_ssa_var (TREE_TYPE (rhs1
), NULL
);
2957 def_stmt
= gimple_build_assign (def
, BIT_AND_EXPR
, rhs1
, mask
);
2958 tree vecstype
= get_vectype_for_scalar_type (vinfo
,
2960 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecstype
);
2965 if (def
== NULL_TREE
)
2967 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2968 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
2969 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
2972 /* Pattern detected. */
2973 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt
);
2975 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2976 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2977 pattern_stmt
= gimple_build_assign (var
, rhs_code
, oprnd0
, def
);
2979 return pattern_stmt
;
2982 /* Return true iff the target has a vector optab implementing the operation
2983 CODE on type VECTYPE. */
2986 target_has_vecop_for_code (tree_code code
, tree vectype
)
2988 optab voptab
= optab_for_tree_code (code
, vectype
, optab_vector
);
2990 && optab_handler (voptab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
;
2993 /* Verify that the target has optabs of VECTYPE to perform all the steps
2994 needed by the multiplication-by-immediate synthesis algorithm described by
2995 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2996 present. Return true iff the target supports all the steps. */
2999 target_supports_mult_synth_alg (struct algorithm
*alg
, mult_variant var
,
3000 tree vectype
, bool synth_shift_p
)
3002 if (alg
->op
[0] != alg_zero
&& alg
->op
[0] != alg_m
)
3005 bool supports_vminus
= target_has_vecop_for_code (MINUS_EXPR
, vectype
);
3006 bool supports_vplus
= target_has_vecop_for_code (PLUS_EXPR
, vectype
);
3008 if (var
== negate_variant
3009 && !target_has_vecop_for_code (NEGATE_EXPR
, vectype
))
3012 /* If we must synthesize shifts with additions make sure that vector
3013 addition is available. */
3014 if ((var
== add_variant
|| synth_shift_p
) && !supports_vplus
)
3017 for (int i
= 1; i
< alg
->ops
; i
++)
3025 case alg_add_factor
:
3026 if (!supports_vplus
)
3031 case alg_sub_factor
:
3032 if (!supports_vminus
)
3038 case alg_impossible
:
3048 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
3049 putting the final result in DEST. Append all statements but the last into
3050 VINFO. Return the last statement. */
3053 synth_lshift_by_additions (vec_info
*vinfo
,
3054 tree dest
, tree op
, HOST_WIDE_INT amnt
,
3055 stmt_vec_info stmt_info
)
3058 tree itype
= TREE_TYPE (op
);
3060 gcc_assert (amnt
>= 0);
3061 for (i
= 0; i
< amnt
; i
++)
3063 tree tmp_var
= (i
< amnt
- 1) ? vect_recog_temp_ssa_var (itype
, NULL
)
3066 = gimple_build_assign (tmp_var
, PLUS_EXPR
, prev_res
, prev_res
);
3069 append_pattern_def_seq (vinfo
, stmt_info
, stmt
);
3077 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
3078 CODE to operands OP1 and OP2, creating a new temporary SSA var in
3079 the process if necessary. Append the resulting assignment statements
3080 to the sequence in STMT_VINFO. Return the SSA variable that holds the
3081 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
3082 left shifts using additions. */
3085 apply_binop_and_append_stmt (vec_info
*vinfo
,
3086 tree_code code
, tree op1
, tree op2
,
3087 stmt_vec_info stmt_vinfo
, bool synth_shift_p
)
3089 if (integer_zerop (op2
)
3090 && (code
== LSHIFT_EXPR
3091 || code
== PLUS_EXPR
))
3093 gcc_assert (TREE_CODE (op1
) == SSA_NAME
);
3098 tree itype
= TREE_TYPE (op1
);
3099 tree tmp_var
= vect_recog_temp_ssa_var (itype
, NULL
);
3101 if (code
== LSHIFT_EXPR
3104 stmt
= synth_lshift_by_additions (vinfo
, tmp_var
, op1
,
3105 TREE_INT_CST_LOW (op2
), stmt_vinfo
);
3106 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
3110 stmt
= gimple_build_assign (tmp_var
, code
, op1
, op2
);
3111 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
3115 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
3116 and simple arithmetic operations to be vectorized. Record the statements
3117 produced in STMT_VINFO and return the last statement in the sequence or
3118 NULL if it's not possible to synthesize such a multiplication.
3119 This function mirrors the behavior of expand_mult_const in expmed.cc but
3120 works on tree-ssa form. */
3123 vect_synth_mult_by_constant (vec_info
*vinfo
, tree op
, tree val
,
3124 stmt_vec_info stmt_vinfo
)
3126 tree itype
= TREE_TYPE (op
);
3127 machine_mode mode
= TYPE_MODE (itype
);
3128 struct algorithm alg
;
3129 mult_variant variant
;
3130 if (!tree_fits_shwi_p (val
))
3133 /* Multiplication synthesis by shifts, adds and subs can introduce
3134 signed overflow where the original operation didn't. Perform the
3135 operations on an unsigned type and cast back to avoid this.
3136 In the future we may want to relax this for synthesis algorithms
3137 that we can prove do not cause unexpected overflow. */
3138 bool cast_to_unsigned_p
= !TYPE_OVERFLOW_WRAPS (itype
);
3140 tree multtype
= cast_to_unsigned_p
? unsigned_type_for (itype
) : itype
;
3141 tree vectype
= get_vectype_for_scalar_type (vinfo
, multtype
);
3145 /* Targets that don't support vector shifts but support vector additions
3146 can synthesize shifts that way. */
3147 bool synth_shift_p
= !vect_supportable_shift (vinfo
, LSHIFT_EXPR
, multtype
);
3149 HOST_WIDE_INT hwval
= tree_to_shwi (val
);
3150 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
3151 The vectorizer's benefit analysis will decide whether it's beneficial
3153 bool possible
= choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype
))
3154 ? TYPE_MODE (vectype
) : mode
,
3155 hwval
, &alg
, &variant
, MAX_COST
);
3159 if (!target_supports_mult_synth_alg (&alg
, variant
, vectype
, synth_shift_p
))
3164 /* Clear out the sequence of statements so we can populate it below. */
3165 gimple
*stmt
= NULL
;
3167 if (cast_to_unsigned_p
)
3169 tree tmp_op
= vect_recog_temp_ssa_var (multtype
, NULL
);
3170 stmt
= gimple_build_assign (tmp_op
, CONVERT_EXPR
, op
);
3171 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
3175 if (alg
.op
[0] == alg_zero
)
3176 accumulator
= build_int_cst (multtype
, 0);
3180 bool needs_fixup
= (variant
== negate_variant
)
3181 || (variant
== add_variant
);
3183 for (int i
= 1; i
< alg
.ops
; i
++)
3185 tree shft_log
= build_int_cst (multtype
, alg
.log
[i
]);
3186 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
3187 tree tmp_var
= NULL_TREE
;
3194 = synth_lshift_by_additions (vinfo
, accum_tmp
, accumulator
,
3195 alg
.log
[i
], stmt_vinfo
);
3197 stmt
= gimple_build_assign (accum_tmp
, LSHIFT_EXPR
, accumulator
,
3202 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, op
, shft_log
,
3203 stmt_vinfo
, synth_shift_p
);
3204 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
,
3208 tmp_var
= apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, op
,
3209 shft_log
, stmt_vinfo
,
3211 /* In some algorithms the first step involves zeroing the
3212 accumulator. If subtracting from such an accumulator
3213 just emit the negation directly. */
3214 if (integer_zerop (accumulator
))
3215 stmt
= gimple_build_assign (accum_tmp
, NEGATE_EXPR
, tmp_var
);
3217 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, accumulator
,
3222 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, accumulator
,
3223 shft_log
, stmt_vinfo
, synth_shift_p
);
3224 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, tmp_var
, op
);
3228 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, accumulator
,
3229 shft_log
, stmt_vinfo
, synth_shift_p
);
3230 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, tmp_var
, op
);
3232 case alg_add_factor
:
3234 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, accumulator
,
3235 shft_log
, stmt_vinfo
, synth_shift_p
);
3236 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
,
3239 case alg_sub_factor
:
3241 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, accumulator
,
3242 shft_log
, stmt_vinfo
, synth_shift_p
);
3243 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, tmp_var
,
3249 /* We don't want to append the last stmt in the sequence to stmt_vinfo
3250 but rather return it directly. */
3252 if ((i
< alg
.ops
- 1) || needs_fixup
|| cast_to_unsigned_p
)
3253 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
3254 accumulator
= accum_tmp
;
3256 if (variant
== negate_variant
)
3258 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
3259 stmt
= gimple_build_assign (accum_tmp
, NEGATE_EXPR
, accumulator
);
3260 accumulator
= accum_tmp
;
3261 if (cast_to_unsigned_p
)
3262 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
3264 else if (variant
== add_variant
)
3266 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
3267 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
, op
);
3268 accumulator
= accum_tmp
;
3269 if (cast_to_unsigned_p
)
3270 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
3272 /* Move back to a signed if needed. */
3273 if (cast_to_unsigned_p
)
3275 tree accum_tmp
= vect_recog_temp_ssa_var (itype
, NULL
);
3276 stmt
= gimple_build_assign (accum_tmp
, CONVERT_EXPR
, accumulator
);
3282 /* Detect multiplication by constant and convert it into a sequence of
3283 shifts and additions, subtractions, negations. We reuse the
3284 choose_mult_variant algorithms from expmed.cc
3288 STMT_VINFO: The stmt from which the pattern search begins,
3293 * TYPE_OUT: The type of the output of this pattern.
3295 * Return value: A new stmt that will be used to replace
3296 the multiplication. */
3299 vect_recog_mult_pattern (vec_info
*vinfo
,
3300 stmt_vec_info stmt_vinfo
, tree
*type_out
)
3302 gimple
*last_stmt
= stmt_vinfo
->stmt
;
3303 tree oprnd0
, oprnd1
, vectype
, itype
;
3304 gimple
*pattern_stmt
;
3306 if (!is_gimple_assign (last_stmt
))
3309 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
3312 oprnd0
= gimple_assign_rhs1 (last_stmt
);
3313 oprnd1
= gimple_assign_rhs2 (last_stmt
);
3314 itype
= TREE_TYPE (oprnd0
);
3316 if (TREE_CODE (oprnd0
) != SSA_NAME
3317 || TREE_CODE (oprnd1
) != INTEGER_CST
3318 || !INTEGRAL_TYPE_P (itype
)
3319 || !type_has_mode_precision_p (itype
))
3322 vectype
= get_vectype_for_scalar_type (vinfo
, itype
);
3323 if (vectype
== NULL_TREE
)
3326 /* If the target can handle vectorized multiplication natively,
3327 don't attempt to optimize this. */
3328 optab mul_optab
= optab_for_tree_code (MULT_EXPR
, vectype
, optab_default
);
3329 if (mul_optab
!= unknown_optab
)
3331 machine_mode vec_mode
= TYPE_MODE (vectype
);
3332 int icode
= (int) optab_handler (mul_optab
, vec_mode
);
3333 if (icode
!= CODE_FOR_nothing
)
3337 pattern_stmt
= vect_synth_mult_by_constant (vinfo
,
3338 oprnd0
, oprnd1
, stmt_vinfo
);
3342 /* Pattern detected. */
3343 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt
);
3345 *type_out
= vectype
;
3347 return pattern_stmt
;
3350 /* Detect a signed division by a constant that wouldn't be
3351 otherwise vectorized:
3357 where type 'type' is an integral type and N is a constant.
3359 Similarly handle modulo by a constant:
3365 * STMT_VINFO: The stmt from which the pattern search begins,
3366 i.e. the division stmt. S1 is replaced by if N is a power
3367 of two constant and type is signed:
3368 S3 y_t = b_t < 0 ? N - 1 : 0;
3370 S1' a_t = x_t >> log2 (N);
3372 S4 is replaced if N is a power of two constant and
3373 type is signed by (where *_T temporaries have unsigned type):
3374 S9 y_T = b_t < 0 ? -1U : 0U;
3375 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
3376 S7 z_t = (type) z_T;
3378 S5 x_t = w_t & (N - 1);
3379 S4' a_t = x_t - z_t;
3383 * TYPE_OUT: The type of the output of this pattern.
3385 * Return value: A new stmt that will be used to replace the division
3386 S1 or modulo S4 stmt. */
3389 vect_recog_divmod_pattern (vec_info
*vinfo
,
3390 stmt_vec_info stmt_vinfo
, tree
*type_out
)
3392 gimple
*last_stmt
= stmt_vinfo
->stmt
;
3393 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
3394 gimple
*pattern_stmt
, *def_stmt
;
3395 enum tree_code rhs_code
;
3398 int dummy_int
, prec
;
3400 if (!is_gimple_assign (last_stmt
))
3403 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3406 case TRUNC_DIV_EXPR
:
3407 case EXACT_DIV_EXPR
:
3408 case TRUNC_MOD_EXPR
:
3414 oprnd0
= gimple_assign_rhs1 (last_stmt
);
3415 oprnd1
= gimple_assign_rhs2 (last_stmt
);
3416 itype
= TREE_TYPE (oprnd0
);
3417 if (TREE_CODE (oprnd0
) != SSA_NAME
3418 || TREE_CODE (oprnd1
) != INTEGER_CST
3419 || TREE_CODE (itype
) != INTEGER_TYPE
3420 || !type_has_mode_precision_p (itype
))
3423 scalar_int_mode itype_mode
= SCALAR_INT_TYPE_MODE (itype
);
3424 vectype
= get_vectype_for_scalar_type (vinfo
, itype
);
3425 if (vectype
== NULL_TREE
)
3428 if (optimize_bb_for_size_p (gimple_bb (last_stmt
)))
3430 /* If the target can handle vectorized division or modulo natively,
3431 don't attempt to optimize this, since native division is likely
3432 to give smaller code. */
3433 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
3434 if (optab
!= unknown_optab
)
3436 machine_mode vec_mode
= TYPE_MODE (vectype
);
3437 int icode
= (int) optab_handler (optab
, vec_mode
);
3438 if (icode
!= CODE_FOR_nothing
)
3443 prec
= TYPE_PRECISION (itype
);
3444 if (integer_pow2p (oprnd1
))
3446 if (TYPE_UNSIGNED (itype
) || tree_int_cst_sgn (oprnd1
) != 1)
3449 /* Pattern detected. */
3450 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt
);
3452 *type_out
= vectype
;
3454 /* Check if the target supports this internal function. */
3455 internal_fn ifn
= IFN_DIV_POW2
;
3456 if (direct_internal_fn_supported_p (ifn
, vectype
, OPTIMIZE_FOR_SPEED
))
3458 tree shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
3460 tree var_div
= vect_recog_temp_ssa_var (itype
, NULL
);
3461 gimple
*div_stmt
= gimple_build_call_internal (ifn
, 2, oprnd0
, shift
);
3462 gimple_call_set_lhs (div_stmt
, var_div
);
3464 if (rhs_code
== TRUNC_MOD_EXPR
)
3466 append_pattern_def_seq (vinfo
, stmt_vinfo
, div_stmt
);
3468 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3469 LSHIFT_EXPR
, var_div
, shift
);
3470 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3472 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3474 gimple_assign_lhs (def_stmt
));
3477 pattern_stmt
= div_stmt
;
3478 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
3480 return pattern_stmt
;
3483 cond
= build2 (LT_EXPR
, boolean_type_node
, oprnd0
,
3484 build_int_cst (itype
, 0));
3485 if (rhs_code
== TRUNC_DIV_EXPR
3486 || rhs_code
== EXACT_DIV_EXPR
)
3488 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
3491 = gimple_build_assign (var
, COND_EXPR
, cond
,
3492 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
3493 build_int_cst (itype
, 1)),
3494 build_int_cst (itype
, 0));
3495 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3496 var
= vect_recog_temp_ssa_var (itype
, NULL
);
3498 = gimple_build_assign (var
, PLUS_EXPR
, oprnd0
,
3499 gimple_assign_lhs (def_stmt
));
3500 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3502 shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
3504 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3505 RSHIFT_EXPR
, var
, shift
);
3510 if (compare_tree_int (oprnd1
, 2) == 0)
3512 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
3513 def_stmt
= gimple_build_assign (signmask
, COND_EXPR
, cond
,
3514 build_int_cst (itype
, 1),
3515 build_int_cst (itype
, 0));
3516 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3521 = build_nonstandard_integer_type (prec
, 1);
3522 tree vecutype
= get_vectype_for_scalar_type (vinfo
, utype
);
3524 = build_int_cst (utype
, GET_MODE_BITSIZE (itype_mode
)
3525 - tree_log2 (oprnd1
));
3526 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
3528 def_stmt
= gimple_build_assign (var
, COND_EXPR
, cond
,
3529 build_int_cst (utype
, -1),
3530 build_int_cst (utype
, 0));
3531 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecutype
);
3532 var
= vect_recog_temp_ssa_var (utype
, NULL
);
3533 def_stmt
= gimple_build_assign (var
, RSHIFT_EXPR
,
3534 gimple_assign_lhs (def_stmt
),
3536 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecutype
);
3537 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
3539 = gimple_build_assign (signmask
, NOP_EXPR
, var
);
3540 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3543 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3544 PLUS_EXPR
, oprnd0
, signmask
);
3545 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3547 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3548 BIT_AND_EXPR
, gimple_assign_lhs (def_stmt
),
3549 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
3550 build_int_cst (itype
, 1)));
3551 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3554 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3555 MINUS_EXPR
, gimple_assign_lhs (def_stmt
),
3559 return pattern_stmt
;
3562 if (prec
> HOST_BITS_PER_WIDE_INT
3563 || integer_zerop (oprnd1
))
3566 if (!can_mult_highpart_p (TYPE_MODE (vectype
), TYPE_UNSIGNED (itype
)))
3569 if (TYPE_UNSIGNED (itype
))
3571 unsigned HOST_WIDE_INT mh
, ml
;
3572 int pre_shift
, post_shift
;
3573 unsigned HOST_WIDE_INT d
= (TREE_INT_CST_LOW (oprnd1
)
3574 & GET_MODE_MASK (itype_mode
));
3575 tree t1
, t2
, t3
, t4
;
3577 if (d
>= (HOST_WIDE_INT_1U
<< (prec
- 1)))
3578 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
3581 /* Find a suitable multiplier and right shift count
3582 instead of multiplying with D. */
3583 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
3585 /* If the suggested multiplier is more than SIZE bits, we can do better
3586 for even divisors, using an initial right shift. */
3587 if (mh
!= 0 && (d
& 1) == 0)
3589 pre_shift
= ctz_or_zero (d
);
3590 mh
= choose_multiplier (d
>> pre_shift
, prec
, prec
- pre_shift
,
3591 &ml
, &post_shift
, &dummy_int
);
3599 if (post_shift
- 1 >= prec
)
3602 /* t1 = oprnd0 h* ml;
3606 q = t4 >> (post_shift - 1); */
3607 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
3608 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
3609 build_int_cst (itype
, ml
));
3610 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3612 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
3614 = gimple_build_assign (t2
, MINUS_EXPR
, oprnd0
, t1
);
3615 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3617 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
3619 = gimple_build_assign (t3
, RSHIFT_EXPR
, t2
, integer_one_node
);
3620 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3622 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
3624 = gimple_build_assign (t4
, PLUS_EXPR
, t1
, t3
);
3626 if (post_shift
!= 1)
3628 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3630 q
= vect_recog_temp_ssa_var (itype
, NULL
);
3632 = gimple_build_assign (q
, RSHIFT_EXPR
, t4
,
3633 build_int_cst (itype
, post_shift
- 1));
3638 pattern_stmt
= def_stmt
;
3643 if (pre_shift
>= prec
|| post_shift
>= prec
)
3646 /* t1 = oprnd0 >> pre_shift;
3648 q = t2 >> post_shift; */
3651 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
3653 = gimple_build_assign (t1
, RSHIFT_EXPR
, oprnd0
,
3654 build_int_cst (NULL
, pre_shift
));
3655 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3660 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
3661 def_stmt
= gimple_build_assign (t2
, MULT_HIGHPART_EXPR
, t1
,
3662 build_int_cst (itype
, ml
));
3666 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3668 q
= vect_recog_temp_ssa_var (itype
, NULL
);
3670 = gimple_build_assign (q
, RSHIFT_EXPR
, t2
,
3671 build_int_cst (itype
, post_shift
));
3676 pattern_stmt
= def_stmt
;
3681 unsigned HOST_WIDE_INT ml
;
3683 HOST_WIDE_INT d
= TREE_INT_CST_LOW (oprnd1
);
3684 unsigned HOST_WIDE_INT abs_d
;
3686 tree t1
, t2
, t3
, t4
;
3688 /* Give up for -1. */
3692 /* Since d might be INT_MIN, we have to cast to
3693 unsigned HOST_WIDE_INT before negating to avoid
3694 undefined signed overflow. */
3696 ? (unsigned HOST_WIDE_INT
) d
3697 : - (unsigned HOST_WIDE_INT
) d
);
3699 /* n rem d = n rem -d */
3700 if (rhs_code
== TRUNC_MOD_EXPR
&& d
< 0)
3703 oprnd1
= build_int_cst (itype
, abs_d
);
3705 if (HOST_BITS_PER_WIDE_INT
>= prec
3706 && abs_d
== HOST_WIDE_INT_1U
<< (prec
- 1))
3707 /* This case is not handled correctly below. */
3710 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
, &post_shift
, &dummy_int
);
3711 if (ml
>= HOST_WIDE_INT_1U
<< (prec
- 1))
3714 ml
|= HOST_WIDE_INT_M1U
<< (prec
- 1);
3716 if (post_shift
>= prec
)
3719 /* t1 = oprnd0 h* ml; */
3720 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
3721 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
3722 build_int_cst (itype
, ml
));
3726 /* t2 = t1 + oprnd0; */
3727 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3728 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
3729 def_stmt
= gimple_build_assign (t2
, PLUS_EXPR
, t1
, oprnd0
);
3736 /* t3 = t2 >> post_shift; */
3737 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3738 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
3739 def_stmt
= gimple_build_assign (t3
, RSHIFT_EXPR
, t2
,
3740 build_int_cst (itype
, post_shift
));
3747 get_range_query (cfun
)->range_of_expr (r
, oprnd0
);
3748 if (r
.kind () == VR_RANGE
)
3750 if (!wi::neg_p (r
.lower_bound (), TYPE_SIGN (itype
)))
3752 else if (wi::neg_p (r
.upper_bound (), TYPE_SIGN (itype
)))
3756 if (msb
== 0 && d
>= 0)
3760 pattern_stmt
= def_stmt
;
3764 /* t4 = oprnd0 >> (prec - 1);
3765 or if we know from VRP that oprnd0 >= 0
3767 or if we know from VRP that oprnd0 < 0
3769 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3770 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
3772 def_stmt
= gimple_build_assign (t4
, INTEGER_CST
,
3773 build_int_cst (itype
, msb
));
3775 def_stmt
= gimple_build_assign (t4
, RSHIFT_EXPR
, oprnd0
,
3776 build_int_cst (itype
, prec
- 1));
3777 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3779 /* q = t3 - t4; or q = t4 - t3; */
3780 q
= vect_recog_temp_ssa_var (itype
, NULL
);
3781 pattern_stmt
= gimple_build_assign (q
, MINUS_EXPR
, d
< 0 ? t4
: t3
,
3786 if (rhs_code
== TRUNC_MOD_EXPR
)
3790 /* We divided. Now finish by:
3793 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
);
3795 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
3796 def_stmt
= gimple_build_assign (t1
, MULT_EXPR
, q
, oprnd1
);
3797 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3799 r
= vect_recog_temp_ssa_var (itype
, NULL
);
3800 pattern_stmt
= gimple_build_assign (r
, MINUS_EXPR
, oprnd0
, t1
);
3803 /* Pattern detected. */
3804 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt
);
3806 *type_out
= vectype
;
3807 return pattern_stmt
;
3810 /* Function vect_recog_mixed_size_cond_pattern
3812 Try to find the following pattern:
3817 S1 a_T = x_t CMP y_t ? b_T : c_T;
3819 where type 'TYPE' is an integral type which has different size
3820 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3821 than 'type', the constants need to fit into an integer type
3822 with the same width as 'type') or results of conversion from 'type'.
3826 * STMT_VINFO: The stmt from which the pattern search begins.
3830 * TYPE_OUT: The type of the output of this pattern.
3832 * Return value: A new stmt that will be used to replace the pattern.
3833 Additionally a def_stmt is added.
3835 a_it = x_t CMP y_t ? b_it : c_it;
3836 a_T = (TYPE) a_it; */
3839 vect_recog_mixed_size_cond_pattern (vec_info
*vinfo
,
3840 stmt_vec_info stmt_vinfo
, tree
*type_out
)
3842 gimple
*last_stmt
= stmt_vinfo
->stmt
;
3843 tree cond_expr
, then_clause
, else_clause
;
3844 tree type
, vectype
, comp_vectype
, itype
= NULL_TREE
, vecitype
;
3845 gimple
*pattern_stmt
, *def_stmt
;
3846 tree orig_type0
= NULL_TREE
, orig_type1
= NULL_TREE
;
3847 gimple
*def_stmt0
= NULL
, *def_stmt1
= NULL
;
3849 tree comp_scalar_type
;
3851 if (!is_gimple_assign (last_stmt
)
3852 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
3853 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
3856 cond_expr
= gimple_assign_rhs1 (last_stmt
);
3857 then_clause
= gimple_assign_rhs2 (last_stmt
);
3858 else_clause
= gimple_assign_rhs3 (last_stmt
);
3860 if (!COMPARISON_CLASS_P (cond_expr
))
3863 comp_scalar_type
= TREE_TYPE (TREE_OPERAND (cond_expr
, 0));
3864 comp_vectype
= get_vectype_for_scalar_type (vinfo
, comp_scalar_type
);
3865 if (comp_vectype
== NULL_TREE
)
3868 type
= TREE_TYPE (gimple_assign_lhs (last_stmt
));
3869 if (types_compatible_p (type
, comp_scalar_type
)
3870 || ((TREE_CODE (then_clause
) != INTEGER_CST
3871 || TREE_CODE (else_clause
) != INTEGER_CST
)
3872 && !INTEGRAL_TYPE_P (comp_scalar_type
))
3873 || !INTEGRAL_TYPE_P (type
))
3876 if ((TREE_CODE (then_clause
) != INTEGER_CST
3877 && !type_conversion_p (vinfo
, then_clause
, false,
3878 &orig_type0
, &def_stmt0
, &promotion
))
3879 || (TREE_CODE (else_clause
) != INTEGER_CST
3880 && !type_conversion_p (vinfo
, else_clause
, false,
3881 &orig_type1
, &def_stmt1
, &promotion
)))
3884 if (orig_type0
&& orig_type1
3885 && !types_compatible_p (orig_type0
, orig_type1
))
3890 if (!types_compatible_p (orig_type0
, comp_scalar_type
))
3892 then_clause
= gimple_assign_rhs1 (def_stmt0
);
3898 if (!types_compatible_p (orig_type1
, comp_scalar_type
))
3900 else_clause
= gimple_assign_rhs1 (def_stmt1
);
3905 HOST_WIDE_INT cmp_mode_size
3906 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype
));
3908 scalar_int_mode type_mode
= SCALAR_INT_TYPE_MODE (type
);
3909 if (GET_MODE_BITSIZE (type_mode
) == cmp_mode_size
)
3912 vectype
= get_vectype_for_scalar_type (vinfo
, type
);
3913 if (vectype
== NULL_TREE
)
3916 if (expand_vec_cond_expr_p (vectype
, comp_vectype
, TREE_CODE (cond_expr
)))
3919 if (itype
== NULL_TREE
)
3920 itype
= build_nonstandard_integer_type (cmp_mode_size
,
3921 TYPE_UNSIGNED (type
));
3923 if (itype
== NULL_TREE
3924 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype
)) != cmp_mode_size
)
3927 vecitype
= get_vectype_for_scalar_type (vinfo
, itype
);
3928 if (vecitype
== NULL_TREE
)
3931 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
, TREE_CODE (cond_expr
)))
3934 if (GET_MODE_BITSIZE (type_mode
) > cmp_mode_size
)
3936 if ((TREE_CODE (then_clause
) == INTEGER_CST
3937 && !int_fits_type_p (then_clause
, itype
))
3938 || (TREE_CODE (else_clause
) == INTEGER_CST
3939 && !int_fits_type_p (else_clause
, itype
)))
3943 def_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3944 COND_EXPR
, unshare_expr (cond_expr
),
3945 fold_convert (itype
, then_clause
),
3946 fold_convert (itype
, else_clause
));
3947 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
3948 NOP_EXPR
, gimple_assign_lhs (def_stmt
));
3950 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecitype
);
3951 *type_out
= vectype
;
3953 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt
);
3955 return pattern_stmt
;
3959 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3960 true if bool VAR can and should be optimized that way. Assume it shouldn't
3961 in case it's a result of a comparison which can be directly vectorized into
3962 a vector comparison. Fills in STMTS with all stmts visited during the
3966 check_bool_pattern (tree var
, vec_info
*vinfo
, hash_set
<gimple
*> &stmts
)
3969 enum tree_code rhs_code
;
3971 stmt_vec_info def_stmt_info
= vect_get_internal_def (vinfo
, var
);
3975 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_stmt_info
->stmt
);
3979 if (stmts
.contains (def_stmt
))
3982 rhs1
= gimple_assign_rhs1 (def_stmt
);
3983 rhs_code
= gimple_assign_rhs_code (def_stmt
);
3987 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
3992 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
3994 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
3999 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
4006 if (! check_bool_pattern (rhs1
, vinfo
, stmts
)
4007 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt
), vinfo
, stmts
))
4012 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
4014 tree vecitype
, comp_vectype
;
4016 /* If the comparison can throw, then is_gimple_condexpr will be
4017 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
4018 if (stmt_could_throw_p (cfun
, def_stmt
))
4021 comp_vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs1
));
4022 if (comp_vectype
== NULL_TREE
)
4025 tree mask_type
= get_mask_type_for_scalar_type (vinfo
,
4028 && expand_vec_cmp_expr_p (comp_vectype
, mask_type
, rhs_code
))
4031 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
4033 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
4035 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
4036 vecitype
= get_vectype_for_scalar_type (vinfo
, itype
);
4037 if (vecitype
== NULL_TREE
)
4041 vecitype
= comp_vectype
;
4042 if (! expand_vec_cond_expr_p (vecitype
, comp_vectype
, rhs_code
))
4050 bool res
= stmts
.add (def_stmt
);
4051 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
4058 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
4059 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
4060 pattern sequence. */
4063 adjust_bool_pattern_cast (vec_info
*vinfo
,
4064 tree type
, tree var
, stmt_vec_info stmt_info
)
4066 gimple
*cast_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
4068 append_pattern_def_seq (vinfo
, stmt_info
, cast_stmt
,
4069 get_vectype_for_scalar_type (vinfo
, type
));
4070 return gimple_assign_lhs (cast_stmt
);
4073 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
4074 VAR is an SSA_NAME that should be transformed from bool to a wider integer
4075 type, OUT_TYPE is the desired final integer type of the whole pattern.
4076 STMT_INFO is the info of the pattern root and is where pattern stmts should
4077 be associated with. DEFS is a map of pattern defs. */
4080 adjust_bool_pattern (vec_info
*vinfo
, tree var
, tree out_type
,
4081 stmt_vec_info stmt_info
, hash_map
<tree
, tree
> &defs
)
4083 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
4084 enum tree_code rhs_code
, def_rhs_code
;
4085 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
4087 gimple
*pattern_stmt
, *def_stmt
;
4088 tree trueval
= NULL_TREE
;
4090 rhs1
= gimple_assign_rhs1 (stmt
);
4091 rhs2
= gimple_assign_rhs2 (stmt
);
4092 rhs_code
= gimple_assign_rhs_code (stmt
);
4093 loc
= gimple_location (stmt
);
4098 irhs1
= *defs
.get (rhs1
);
4099 itype
= TREE_TYPE (irhs1
);
4101 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4106 irhs1
= *defs
.get (rhs1
);
4107 itype
= TREE_TYPE (irhs1
);
4109 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4110 BIT_XOR_EXPR
, irhs1
, build_int_cst (itype
, 1));
4114 /* Try to optimize x = y & (a < b ? 1 : 0); into
4115 x = (a < b ? y : 0);
4121 S1 a_b = x1 CMP1 y1;
4122 S2 b_b = x2 CMP2 y2;
4124 S4 d_T = (TYPE) c_b;
4126 we would normally emit:
4128 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4129 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4130 S3' c_T = a_T & b_T;
4133 but we can save one stmt by using the
4134 result of one of the COND_EXPRs in the other COND_EXPR and leave
4135 BIT_AND_EXPR stmt out:
4137 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4138 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4141 At least when VEC_COND_EXPR is implemented using masks
4142 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
4143 computes the comparison masks and ands it, in one case with
4144 all ones vector, in the other case with a vector register.
4145 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
4146 often more expensive. */
4147 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
4148 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
4149 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
4151 irhs1
= *defs
.get (rhs1
);
4152 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
4153 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
4154 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1
))))
4156 rhs_code
= def_rhs_code
;
4158 rhs2
= gimple_assign_rhs2 (def_stmt
);
4163 irhs2
= *defs
.get (rhs2
);
4166 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
4167 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
4168 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
4170 irhs2
= *defs
.get (rhs2
);
4171 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
4172 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
4173 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1
))))
4175 rhs_code
= def_rhs_code
;
4177 rhs2
= gimple_assign_rhs2 (def_stmt
);
4182 irhs1
= *defs
.get (rhs1
);
4188 irhs1
= *defs
.get (rhs1
);
4189 irhs2
= *defs
.get (rhs2
);
4191 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
4192 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
4194 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
4195 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
4196 int out_prec
= TYPE_PRECISION (out_type
);
4197 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
4198 irhs2
= adjust_bool_pattern_cast (vinfo
, TREE_TYPE (irhs1
), irhs2
,
4200 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
4201 irhs1
= adjust_bool_pattern_cast (vinfo
, TREE_TYPE (irhs2
), irhs1
,
4205 irhs1
= adjust_bool_pattern_cast (vinfo
,
4206 out_type
, irhs1
, stmt_info
);
4207 irhs2
= adjust_bool_pattern_cast (vinfo
,
4208 out_type
, irhs2
, stmt_info
);
4211 itype
= TREE_TYPE (irhs1
);
4213 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4214 rhs_code
, irhs1
, irhs2
);
4219 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
4220 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
4221 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
))
4222 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1
)),
4223 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1
)))))
4225 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
4227 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
4230 itype
= TREE_TYPE (rhs1
);
4231 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
4232 if (trueval
== NULL_TREE
)
4233 trueval
= build_int_cst (itype
, 1);
4235 gcc_checking_assert (useless_type_conversion_p (itype
,
4236 TREE_TYPE (trueval
)));
4238 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4239 COND_EXPR
, cond_expr
, trueval
,
4240 build_int_cst (itype
, 0));
4244 gimple_set_location (pattern_stmt
, loc
);
4245 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
,
4246 get_vectype_for_scalar_type (vinfo
, itype
));
4247 defs
.put (var
, gimple_assign_lhs (pattern_stmt
));
4250 /* Comparison function to qsort a vector of gimple stmts after UID. */
4253 sort_after_uid (const void *p1
, const void *p2
)
4255 const gimple
*stmt1
= *(const gimple
* const *)p1
;
4256 const gimple
*stmt2
= *(const gimple
* const *)p2
;
4257 return gimple_uid (stmt1
) - gimple_uid (stmt2
);
4260 /* Create pattern stmts for all stmts participating in the bool pattern
4261 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
4262 OUT_TYPE. Return the def of the pattern root. */
4265 adjust_bool_stmts (vec_info
*vinfo
, hash_set
<gimple
*> &bool_stmt_set
,
4266 tree out_type
, stmt_vec_info stmt_info
)
4268 /* Gather original stmts in the bool pattern in their order of appearance
4270 auto_vec
<gimple
*> bool_stmts (bool_stmt_set
.elements ());
4271 for (hash_set
<gimple
*>::iterator i
= bool_stmt_set
.begin ();
4272 i
!= bool_stmt_set
.end (); ++i
)
4273 bool_stmts
.quick_push (*i
);
4274 bool_stmts
.qsort (sort_after_uid
);
4276 /* Now process them in that order, producing pattern stmts. */
4277 hash_map
<tree
, tree
> defs
;
4278 for (unsigned i
= 0; i
< bool_stmts
.length (); ++i
)
4279 adjust_bool_pattern (vinfo
, gimple_assign_lhs (bool_stmts
[i
]),
4280 out_type
, stmt_info
, defs
);
4282 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
4283 gimple
*pattern_stmt
4284 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
4285 return gimple_assign_lhs (pattern_stmt
);
4288 /* Return the proper type for converting bool VAR into
4289 an integer value or NULL_TREE if no such type exists.
4290 The type is chosen so that the converted value has the
4291 same number of elements as VAR's vector type. */
4294 integer_type_for_mask (tree var
, vec_info
*vinfo
)
4296 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var
)))
4299 stmt_vec_info def_stmt_info
= vect_get_internal_def (vinfo
, var
);
4300 if (!def_stmt_info
|| !vect_use_mask_type_p (def_stmt_info
))
4303 return build_nonstandard_integer_type (def_stmt_info
->mask_precision
, 1);
4306 /* Function vect_recog_bool_pattern
4308 Try to find pattern like following:
4310 bool a_b, b_b, c_b, d_b, e_b;
4313 S1 a_b = x1 CMP1 y1;
4314 S2 b_b = x2 CMP2 y2;
4316 S4 d_b = x3 CMP3 y3;
4318 S6 f_T = (TYPE) e_b;
4320 where type 'TYPE' is an integral type. Or a similar pattern
4323 S6 f_Y = e_b ? r_Y : s_Y;
4325 as results from if-conversion of a complex condition.
4329 * STMT_VINFO: The stmt at the end from which the pattern
4330 search begins, i.e. cast of a bool to
4335 * TYPE_OUT: The type of the output of this pattern.
4337 * Return value: A new stmt that will be used to replace the pattern.
4339 Assuming size of TYPE is the same as size of all comparisons
4340 (otherwise some casts would be added where needed), the above
4341 sequence we create related pattern stmts:
4342 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4343 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4344 S4' d_T = x3 CMP3 y3 ? 1 : 0;
4345 S5' e_T = c_T | d_T;
4348 Instead of the above S3' we could emit:
4349 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4350 S3' c_T = a_T | b_T;
4351 but the above is more efficient. */
4354 vect_recog_bool_pattern (vec_info
*vinfo
,
4355 stmt_vec_info stmt_vinfo
, tree
*type_out
)
4357 gimple
*last_stmt
= stmt_vinfo
->stmt
;
4358 enum tree_code rhs_code
;
4359 tree var
, lhs
, rhs
, vectype
;
4360 gimple
*pattern_stmt
;
4362 if (!is_gimple_assign (last_stmt
))
4365 var
= gimple_assign_rhs1 (last_stmt
);
4366 lhs
= gimple_assign_lhs (last_stmt
);
4367 rhs_code
= gimple_assign_rhs_code (last_stmt
);
4369 if (rhs_code
== VIEW_CONVERT_EXPR
)
4370 var
= TREE_OPERAND (var
, 0);
4372 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var
)))
4375 hash_set
<gimple
*> bool_stmts
;
4377 if (CONVERT_EXPR_CODE_P (rhs_code
)
4378 || rhs_code
== VIEW_CONVERT_EXPR
)
4380 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
4381 || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs
)))
4383 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
4385 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
4387 rhs
= adjust_bool_stmts (vinfo
, bool_stmts
,
4388 TREE_TYPE (lhs
), stmt_vinfo
);
4389 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
4390 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
4391 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
4394 = gimple_build_assign (lhs
, NOP_EXPR
, rhs
);
4398 tree type
= integer_type_for_mask (var
, vinfo
);
4399 tree cst0
, cst1
, tmp
;
4404 /* We may directly use cond with narrowed type to avoid
4405 multiple cond exprs with following result packing and
4406 perform single cond with packed mask instead. In case
4407 of widening we better make cond first and then extract
4409 if (TYPE_MODE (type
) == TYPE_MODE (TREE_TYPE (lhs
)))
4410 type
= TREE_TYPE (lhs
);
4412 cst0
= build_int_cst (type
, 0);
4413 cst1
= build_int_cst (type
, 1);
4414 tmp
= vect_recog_temp_ssa_var (type
, NULL
);
4415 pattern_stmt
= gimple_build_assign (tmp
, COND_EXPR
, var
, cst1
, cst0
);
4417 if (!useless_type_conversion_p (type
, TREE_TYPE (lhs
)))
4419 tree new_vectype
= get_vectype_for_scalar_type (vinfo
, type
);
4420 append_pattern_def_seq (vinfo
, stmt_vinfo
,
4421 pattern_stmt
, new_vectype
);
4423 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
4424 pattern_stmt
= gimple_build_assign (lhs
, CONVERT_EXPR
, tmp
);
4428 *type_out
= vectype
;
4429 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
4431 return pattern_stmt
;
4433 else if (rhs_code
== COND_EXPR
4434 && TREE_CODE (var
) == SSA_NAME
)
4436 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
4437 if (vectype
== NULL_TREE
)
4440 /* Build a scalar type for the boolean result that when
4441 vectorized matches the vector type of the result in
4442 size and number of elements. */
4444 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype
)),
4445 TYPE_VECTOR_SUBPARTS (vectype
));
4448 = build_nonstandard_integer_type (prec
,
4449 TYPE_UNSIGNED (TREE_TYPE (var
)));
4450 if (get_vectype_for_scalar_type (vinfo
, type
) == NULL_TREE
)
4453 if (!check_bool_pattern (var
, vinfo
, bool_stmts
))
4456 rhs
= adjust_bool_stmts (vinfo
, bool_stmts
, type
, stmt_vinfo
);
4458 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
4460 = gimple_build_assign (lhs
, COND_EXPR
,
4461 build2 (NE_EXPR
, boolean_type_node
,
4462 rhs
, build_int_cst (type
, 0)),
4463 gimple_assign_rhs2 (last_stmt
),
4464 gimple_assign_rhs3 (last_stmt
));
4465 *type_out
= vectype
;
4466 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
4468 return pattern_stmt
;
4470 else if (rhs_code
== SSA_NAME
4471 && STMT_VINFO_DATA_REF (stmt_vinfo
))
4473 stmt_vec_info pattern_stmt_info
;
4474 tree nunits_vectype
;
4475 if (!vect_get_vector_types_for_stmt (vinfo
, stmt_vinfo
, &vectype
,
4477 || !VECTOR_MODE_P (TYPE_MODE (vectype
)))
4480 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
4481 rhs
= adjust_bool_stmts (vinfo
, bool_stmts
,
4482 TREE_TYPE (vectype
), stmt_vinfo
);
4485 tree type
= integer_type_for_mask (var
, vinfo
);
4486 tree cst0
, cst1
, new_vectype
;
4491 if (TYPE_MODE (type
) == TYPE_MODE (TREE_TYPE (vectype
)))
4492 type
= TREE_TYPE (vectype
);
4494 cst0
= build_int_cst (type
, 0);
4495 cst1
= build_int_cst (type
, 1);
4496 new_vectype
= get_vectype_for_scalar_type (vinfo
, type
);
4498 rhs
= vect_recog_temp_ssa_var (type
, NULL
);
4499 pattern_stmt
= gimple_build_assign (rhs
, COND_EXPR
, var
, cst1
, cst0
);
4500 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, new_vectype
);
4503 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
4504 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
4506 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
4507 gimple
*cast_stmt
= gimple_build_assign (rhs2
, NOP_EXPR
, rhs
);
4508 append_pattern_def_seq (vinfo
, stmt_vinfo
, cast_stmt
);
4511 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
4512 pattern_stmt_info
= vinfo
->add_stmt (pattern_stmt
);
4513 vinfo
->move_dr (pattern_stmt_info
, stmt_vinfo
);
4514 *type_out
= vectype
;
4515 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
4517 return pattern_stmt
;
4524 /* A helper for vect_recog_mask_conversion_pattern. Build
4525 conversion of MASK to a type suitable for masking VECTYPE.
4526 Built statement gets required vectype and is appended to
4527 a pattern sequence of STMT_VINFO.
4529 Return converted mask. */
4532 build_mask_conversion (vec_info
*vinfo
,
4533 tree mask
, tree vectype
, stmt_vec_info stmt_vinfo
)
4538 masktype
= truth_type_for (vectype
);
4539 tmp
= vect_recog_temp_ssa_var (TREE_TYPE (masktype
), NULL
);
4540 stmt
= gimple_build_assign (tmp
, CONVERT_EXPR
, mask
);
4541 append_pattern_def_seq (vinfo
, stmt_vinfo
,
4542 stmt
, masktype
, TREE_TYPE (vectype
));
4548 /* Function vect_recog_mask_conversion_pattern
4550 Try to find statements which require boolean type
4551 converison. Additional conversion statements are
4552 added to handle such cases. For example:
4562 S4 c_1 = m_3 ? c_2 : c_3;
4564 Will be transformed into:
4568 S3'' m_2' = (_Bool[bitsize=32])m_2
4569 S3' m_3' = m_1 & m_2';
4570 S4'' m_3'' = (_Bool[bitsize=8])m_3'
4571 S4' c_1' = m_3'' ? c_2 : c_3; */
4574 vect_recog_mask_conversion_pattern (vec_info
*vinfo
,
4575 stmt_vec_info stmt_vinfo
, tree
*type_out
)
4577 gimple
*last_stmt
= stmt_vinfo
->stmt
;
4578 enum tree_code rhs_code
;
4579 tree lhs
= NULL_TREE
, rhs1
, rhs2
, tmp
, rhs1_type
, rhs2_type
;
4580 tree vectype1
, vectype2
;
4581 stmt_vec_info pattern_stmt_info
;
4582 tree rhs1_op0
= NULL_TREE
, rhs1_op1
= NULL_TREE
;
4583 tree rhs1_op0_type
= NULL_TREE
, rhs1_op1_type
= NULL_TREE
;
4585 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
4586 if (is_gimple_call (last_stmt
)
4587 && gimple_call_internal_p (last_stmt
))
4589 gcall
*pattern_stmt
;
4591 internal_fn ifn
= gimple_call_internal_fn (last_stmt
);
4592 int mask_argno
= internal_fn_mask_index (ifn
);
4596 bool store_p
= internal_store_fn_p (ifn
);
4599 int rhs_index
= internal_fn_stored_value_index (ifn
);
4600 tree rhs
= gimple_call_arg (last_stmt
, rhs_index
);
4601 vectype1
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs
));
4605 lhs
= gimple_call_lhs (last_stmt
);
4606 vectype1
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
4609 tree mask_arg
= gimple_call_arg (last_stmt
, mask_argno
);
4610 tree mask_arg_type
= integer_type_for_mask (mask_arg
, vinfo
);
4613 vectype2
= get_mask_type_for_scalar_type (vinfo
, mask_arg_type
);
4615 if (!vectype1
|| !vectype2
4616 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1
),
4617 TYPE_VECTOR_SUBPARTS (vectype2
)))
4620 tmp
= build_mask_conversion (vinfo
, mask_arg
, vectype1
, stmt_vinfo
);
4622 auto_vec
<tree
, 8> args
;
4623 unsigned int nargs
= gimple_call_num_args (last_stmt
);
4624 args
.safe_grow (nargs
, true);
4625 for (unsigned int i
= 0; i
< nargs
; ++i
)
4626 args
[i
] = ((int) i
== mask_argno
4628 : gimple_call_arg (last_stmt
, i
));
4629 pattern_stmt
= gimple_build_call_internal_vec (ifn
, args
);
4633 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
4634 gimple_call_set_lhs (pattern_stmt
, lhs
);
4636 gimple_call_set_nothrow (pattern_stmt
, true);
4638 pattern_stmt_info
= vinfo
->add_stmt (pattern_stmt
);
4639 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
4640 vinfo
->move_dr (pattern_stmt_info
, stmt_vinfo
);
4642 *type_out
= vectype1
;
4643 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
4645 return pattern_stmt
;
4648 if (!is_gimple_assign (last_stmt
))
4651 gimple
*pattern_stmt
;
4652 lhs
= gimple_assign_lhs (last_stmt
);
4653 rhs1
= gimple_assign_rhs1 (last_stmt
);
4654 rhs_code
= gimple_assign_rhs_code (last_stmt
);
4656 /* Check for cond expression requiring mask conversion. */
4657 if (rhs_code
== COND_EXPR
)
4659 vectype1
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
4661 if (TREE_CODE (rhs1
) == SSA_NAME
)
4663 rhs1_type
= integer_type_for_mask (rhs1
, vinfo
);
4667 else if (COMPARISON_CLASS_P (rhs1
))
4669 /* Check whether we're comparing scalar booleans and (if so)
4670 whether a better mask type exists than the mask associated
4671 with boolean-sized elements. This avoids unnecessary packs
4672 and unpacks if the booleans are set from comparisons of
4673 wider types. E.g. in:
4675 int x1, x2, x3, x4, y1, y1;
4677 bool b1 = (x1 == x2);
4678 bool b2 = (x3 == x4);
4679 ... = b1 == b2 ? y1 : y2;
4681 it is better for b1 and b2 to use the mask type associated
4682 with int elements rather bool (byte) elements. */
4683 rhs1_op0
= TREE_OPERAND (rhs1
, 0);
4684 rhs1_op1
= TREE_OPERAND (rhs1
, 1);
4685 if (!rhs1_op0
|| !rhs1_op1
)
4687 rhs1_op0_type
= integer_type_for_mask (rhs1_op0
, vinfo
);
4688 rhs1_op1_type
= integer_type_for_mask (rhs1_op1
, vinfo
);
4691 rhs1_type
= TREE_TYPE (rhs1_op0
);
4692 else if (!rhs1_op1_type
)
4693 rhs1_type
= TREE_TYPE (rhs1_op1
);
4694 else if (TYPE_PRECISION (rhs1_op0_type
)
4695 != TYPE_PRECISION (rhs1_op1_type
))
4697 int tmp0
= (int) TYPE_PRECISION (rhs1_op0_type
)
4698 - (int) TYPE_PRECISION (TREE_TYPE (lhs
));
4699 int tmp1
= (int) TYPE_PRECISION (rhs1_op1_type
)
4700 - (int) TYPE_PRECISION (TREE_TYPE (lhs
));
4701 if ((tmp0
> 0 && tmp1
> 0) || (tmp0
< 0 && tmp1
< 0))
4703 if (abs (tmp0
) > abs (tmp1
))
4704 rhs1_type
= rhs1_op1_type
;
4706 rhs1_type
= rhs1_op0_type
;
4709 rhs1_type
= build_nonstandard_integer_type
4710 (TYPE_PRECISION (TREE_TYPE (lhs
)), 1);
4713 rhs1_type
= rhs1_op0_type
;
4718 vectype2
= get_mask_type_for_scalar_type (vinfo
, rhs1_type
);
4720 if (!vectype1
|| !vectype2
)
4723 /* Continue if a conversion is needed. Also continue if we have
4724 a comparison whose vector type would normally be different from
4725 VECTYPE2 when considered in isolation. In that case we'll
4726 replace the comparison with an SSA name (so that we can record
4727 its vector type) and behave as though the comparison was an SSA
4728 name from the outset. */
4729 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1
),
4730 TYPE_VECTOR_SUBPARTS (vectype2
))
4735 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4736 in place, we can handle it in vectorizable_condition. This avoids
4737 unnecessary promotion stmts and increased vectorization factor. */
4738 if (COMPARISON_CLASS_P (rhs1
)
4739 && INTEGRAL_TYPE_P (rhs1_type
)
4740 && known_le (TYPE_VECTOR_SUBPARTS (vectype1
),
4741 TYPE_VECTOR_SUBPARTS (vectype2
)))
4743 enum vect_def_type dt
;
4744 if (vect_is_simple_use (TREE_OPERAND (rhs1
, 0), vinfo
, &dt
)
4745 && dt
== vect_external_def
4746 && vect_is_simple_use (TREE_OPERAND (rhs1
, 1), vinfo
, &dt
)
4747 && (dt
== vect_external_def
4748 || dt
== vect_constant_def
))
4750 tree wide_scalar_type
= build_nonstandard_integer_type
4751 (vector_element_bits (vectype1
), TYPE_UNSIGNED (rhs1_type
));
4752 tree vectype3
= get_vectype_for_scalar_type (vinfo
,
4754 if (expand_vec_cond_expr_p (vectype1
, vectype3
, TREE_CODE (rhs1
)))
4759 /* If rhs1 is a comparison we need to move it into a
4760 separate statement. */
4761 if (TREE_CODE (rhs1
) != SSA_NAME
)
4763 tmp
= vect_recog_temp_ssa_var (TREE_TYPE (rhs1
), NULL
);
4765 && TYPE_PRECISION (rhs1_op0_type
) != TYPE_PRECISION (rhs1_type
))
4766 rhs1_op0
= build_mask_conversion (vinfo
, rhs1_op0
,
4767 vectype2
, stmt_vinfo
);
4769 && TYPE_PRECISION (rhs1_op1_type
) != TYPE_PRECISION (rhs1_type
))
4770 rhs1_op1
= build_mask_conversion (vinfo
, rhs1_op1
,
4771 vectype2
, stmt_vinfo
);
4772 pattern_stmt
= gimple_build_assign (tmp
, TREE_CODE (rhs1
),
4773 rhs1_op0
, rhs1_op1
);
4775 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vectype2
,
4779 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
4780 TYPE_VECTOR_SUBPARTS (vectype2
)))
4781 tmp
= build_mask_conversion (vinfo
, rhs1
, vectype1
, stmt_vinfo
);
4785 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
4786 pattern_stmt
= gimple_build_assign (lhs
, COND_EXPR
, tmp
,
4787 gimple_assign_rhs2 (last_stmt
),
4788 gimple_assign_rhs3 (last_stmt
));
4790 *type_out
= vectype1
;
4791 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
4793 return pattern_stmt
;
4796 /* Now check for binary boolean operations requiring conversion for
4798 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs
)))
4801 if (rhs_code
!= BIT_IOR_EXPR
4802 && rhs_code
!= BIT_XOR_EXPR
4803 && rhs_code
!= BIT_AND_EXPR
4804 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
)
4807 rhs2
= gimple_assign_rhs2 (last_stmt
);
4809 rhs1_type
= integer_type_for_mask (rhs1
, vinfo
);
4810 rhs2_type
= integer_type_for_mask (rhs2
, vinfo
);
4812 if (!rhs1_type
|| !rhs2_type
4813 || TYPE_PRECISION (rhs1_type
) == TYPE_PRECISION (rhs2_type
))
4816 if (TYPE_PRECISION (rhs1_type
) < TYPE_PRECISION (rhs2_type
))
4818 vectype1
= get_mask_type_for_scalar_type (vinfo
, rhs1_type
);
4821 rhs2
= build_mask_conversion (vinfo
, rhs2
, vectype1
, stmt_vinfo
);
4825 vectype1
= get_mask_type_for_scalar_type (vinfo
, rhs2_type
);
4828 rhs1
= build_mask_conversion (vinfo
, rhs1
, vectype1
, stmt_vinfo
);
4831 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
4832 pattern_stmt
= gimple_build_assign (lhs
, rhs_code
, rhs1
, rhs2
);
4834 *type_out
= vectype1
;
4835 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
4837 return pattern_stmt
;
4840 /* STMT_INFO is a load or store. If the load or store is conditional, return
4841 the boolean condition under which it occurs, otherwise return null. */
4844 vect_get_load_store_mask (stmt_vec_info stmt_info
)
4846 if (gassign
*def_assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
4848 gcc_assert (gimple_assign_single_p (def_assign
));
4852 if (gcall
*def_call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
4854 internal_fn ifn
= gimple_call_internal_fn (def_call
);
4855 int mask_index
= internal_fn_mask_index (ifn
);
4856 return gimple_call_arg (def_call
, mask_index
);
4862 /* Return MASK if MASK is suitable for masking an operation on vectors
4863 of type VECTYPE, otherwise convert it into such a form and return
4864 the result. Associate any conversion statements with STMT_INFO's
4868 vect_convert_mask_for_vectype (tree mask
, tree vectype
,
4869 stmt_vec_info stmt_info
, vec_info
*vinfo
)
4871 tree mask_type
= integer_type_for_mask (mask
, vinfo
);
4874 tree mask_vectype
= get_mask_type_for_scalar_type (vinfo
, mask_type
);
4876 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
),
4877 TYPE_VECTOR_SUBPARTS (mask_vectype
)))
4878 mask
= build_mask_conversion (vinfo
, mask
, vectype
, stmt_info
);
4883 /* Return the equivalent of:
4885 fold_convert (TYPE, VALUE)
4887 with the expectation that the operation will be vectorized.
4888 If new statements are needed, add them as pattern statements
4892 vect_add_conversion_to_pattern (vec_info
*vinfo
,
4893 tree type
, tree value
, stmt_vec_info stmt_info
)
4895 if (useless_type_conversion_p (type
, TREE_TYPE (value
)))
4898 tree new_value
= vect_recog_temp_ssa_var (type
, NULL
);
4899 gassign
*conversion
= gimple_build_assign (new_value
, CONVERT_EXPR
, value
);
4900 append_pattern_def_seq (vinfo
, stmt_info
, conversion
,
4901 get_vectype_for_scalar_type (vinfo
, type
));
4905 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4906 internal function. Return the final statement on success and set
4907 *TYPE_OUT to the vector type being loaded or stored.
4909 This function only handles gathers and scatters that were recognized
4910 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4913 vect_recog_gather_scatter_pattern (vec_info
*vinfo
,
4914 stmt_vec_info stmt_info
, tree
*type_out
)
4916 /* Currently we only support this for loop vectorization. */
4917 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
4921 /* Make sure that we're looking at a gather load or scatter store. */
4922 data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
4923 if (!dr
|| !STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
4926 /* Get the boolean that controls whether the load or store happens.
4927 This is null if the operation is unconditional. */
4928 tree mask
= vect_get_load_store_mask (stmt_info
);
4930 /* Make sure that the target supports an appropriate internal
4931 function for the gather/scatter operation. */
4932 gather_scatter_info gs_info
;
4933 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, &gs_info
)
4934 || gs_info
.ifn
== IFN_LAST
)
4937 /* Convert the mask to the right form. */
4938 tree gs_vectype
= get_vectype_for_scalar_type (loop_vinfo
,
4939 gs_info
.element_type
);
4941 mask
= vect_convert_mask_for_vectype (mask
, gs_vectype
, stmt_info
,
4943 else if (gs_info
.ifn
== IFN_MASK_SCATTER_STORE
4944 || gs_info
.ifn
== IFN_MASK_GATHER_LOAD
)
4945 mask
= build_int_cst (TREE_TYPE (truth_type_for (gs_vectype
)), -1);
4947 /* Get the invariant base and non-invariant offset, converting the
4948 latter to the same width as the vector elements. */
4949 tree base
= gs_info
.base
;
4950 tree offset_type
= TREE_TYPE (gs_info
.offset_vectype
);
4951 tree offset
= vect_add_conversion_to_pattern (vinfo
, offset_type
,
4952 gs_info
.offset
, stmt_info
);
4954 /* Build the new pattern statement. */
4955 tree scale
= size_int (gs_info
.scale
);
4956 gcall
*pattern_stmt
;
4957 if (DR_IS_READ (dr
))
4959 tree zero
= build_zero_cst (gs_info
.element_type
);
4961 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 5, base
,
4962 offset
, scale
, zero
, mask
);
4964 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 4, base
,
4965 offset
, scale
, zero
);
4966 tree load_lhs
= vect_recog_temp_ssa_var (gs_info
.element_type
, NULL
);
4967 gimple_call_set_lhs (pattern_stmt
, load_lhs
);
4971 tree rhs
= vect_get_store_rhs (stmt_info
);
4973 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 5,
4974 base
, offset
, scale
, rhs
,
4977 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 4,
4978 base
, offset
, scale
, rhs
);
4980 gimple_call_set_nothrow (pattern_stmt
, true);
4982 /* Copy across relevant vectorization info and associate DR with the
4983 new pattern statement instead of the original statement. */
4984 stmt_vec_info pattern_stmt_info
= loop_vinfo
->add_stmt (pattern_stmt
);
4985 loop_vinfo
->move_dr (pattern_stmt_info
, stmt_info
);
4987 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4988 *type_out
= vectype
;
4989 vect_pattern_detected ("gather/scatter pattern", stmt_info
->stmt
);
4991 return pattern_stmt
;
4994 /* Return true if TYPE is a non-boolean integer type. These are the types
4995 that we want to consider for narrowing. */
4998 vect_narrowable_type_p (tree type
)
5000 return INTEGRAL_TYPE_P (type
) && !VECT_SCALAR_BOOLEAN_TYPE_P (type
);
5003 /* Return true if the operation given by CODE can be truncated to N bits
5004 when only N bits of the output are needed. This is only true if bit N+1
5005 of the inputs has no effect on the low N bits of the result. */
5008 vect_truncatable_operation_p (tree_code code
)
5026 /* Record that STMT_INFO could be changed from operating on TYPE to
5027 operating on a type with the precision and sign given by PRECISION
5028 and SIGN respectively. PRECISION is an arbitrary bit precision;
5029 it might not be a whole number of bytes. */
5032 vect_set_operation_type (stmt_vec_info stmt_info
, tree type
,
5033 unsigned int precision
, signop sign
)
5035 /* Round the precision up to a whole number of bytes. */
5036 precision
= vect_element_precision (precision
);
5037 if (precision
< TYPE_PRECISION (type
)
5038 && (!stmt_info
->operation_precision
5039 || stmt_info
->operation_precision
> precision
))
5041 stmt_info
->operation_precision
= precision
;
5042 stmt_info
->operation_sign
= sign
;
5046 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
5047 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
5048 is an arbitrary bit precision; it might not be a whole number of bytes. */
5051 vect_set_min_input_precision (stmt_vec_info stmt_info
, tree type
,
5052 unsigned int min_input_precision
)
5054 /* This operation in isolation only requires the inputs to have
5055 MIN_INPUT_PRECISION of precision, However, that doesn't mean
5056 that MIN_INPUT_PRECISION is a natural precision for the chain
5057 as a whole. E.g. consider something like:
5059 unsigned short *x, *y;
5060 *y = ((*x & 0xf0) >> 4) | (*y << 4);
5062 The right shift can be done on unsigned chars, and only requires the
5063 result of "*x & 0xf0" to be done on unsigned chars. But taking that
5064 approach would mean turning a natural chain of single-vector unsigned
5065 short operations into one that truncates "*x" and then extends
5066 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
5067 operation and one vector for each unsigned char operation.
5068 This would be a significant pessimization.
5070 Instead only propagate the maximum of this precision and the precision
5071 required by the users of the result. This means that we don't pessimize
5072 the case above but continue to optimize things like:
5076 *y = ((*x & 0xf0) >> 4) | (*y << 4);
5078 Here we would truncate two vectors of *x to a single vector of
5079 unsigned chars and use single-vector unsigned char operations for
5080 everything else, rather than doing two unsigned short copies of
5081 "(*x & 0xf0) >> 4" and then truncating the result. */
5082 min_input_precision
= MAX (min_input_precision
,
5083 stmt_info
->min_output_precision
);
5085 if (min_input_precision
< TYPE_PRECISION (type
)
5086 && (!stmt_info
->min_input_precision
5087 || stmt_info
->min_input_precision
> min_input_precision
))
5088 stmt_info
->min_input_precision
= min_input_precision
;
5091 /* Subroutine of vect_determine_min_output_precision. Return true if
5092 we can calculate a reduced number of output bits for STMT_INFO,
5093 whose result is LHS. */
5096 vect_determine_min_output_precision_1 (vec_info
*vinfo
,
5097 stmt_vec_info stmt_info
, tree lhs
)
5099 /* Take the maximum precision required by users of the result. */
5100 unsigned int precision
= 0;
5101 imm_use_iterator iter
;
5103 FOR_EACH_IMM_USE_FAST (use
, iter
, lhs
)
5105 gimple
*use_stmt
= USE_STMT (use
);
5106 if (is_gimple_debug (use_stmt
))
5108 stmt_vec_info use_stmt_info
= vinfo
->lookup_stmt (use_stmt
);
5109 if (!use_stmt_info
|| !use_stmt_info
->min_input_precision
)
5111 /* The input precision recorded for COND_EXPRs applies only to the
5112 "then" and "else" values. */
5113 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5115 && gimple_assign_rhs_code (assign
) == COND_EXPR
5116 && use
->use
!= gimple_assign_rhs2_ptr (assign
)
5117 && use
->use
!= gimple_assign_rhs3_ptr (assign
))
5119 precision
= MAX (precision
, use_stmt_info
->min_input_precision
);
5122 if (dump_enabled_p ())
5123 dump_printf_loc (MSG_NOTE
, vect_location
,
5124 "only the low %d bits of %T are significant\n",
5126 stmt_info
->min_output_precision
= precision
;
5130 /* Calculate min_output_precision for STMT_INFO. */
5133 vect_determine_min_output_precision (vec_info
*vinfo
, stmt_vec_info stmt_info
)
5135 /* We're only interested in statements with a narrowable result. */
5136 tree lhs
= gimple_get_lhs (stmt_info
->stmt
);
5138 || TREE_CODE (lhs
) != SSA_NAME
5139 || !vect_narrowable_type_p (TREE_TYPE (lhs
)))
5142 if (!vect_determine_min_output_precision_1 (vinfo
, stmt_info
, lhs
))
5143 stmt_info
->min_output_precision
= TYPE_PRECISION (TREE_TYPE (lhs
));
5146 /* Use range information to decide whether STMT (described by STMT_INFO)
5147 could be done in a narrower type. This is effectively a forward
5148 propagation, since it uses context-independent information that applies
5149 to all users of an SSA name. */
5152 vect_determine_precisions_from_range (stmt_vec_info stmt_info
, gassign
*stmt
)
5154 tree lhs
= gimple_assign_lhs (stmt
);
5155 if (!lhs
|| TREE_CODE (lhs
) != SSA_NAME
)
5158 tree type
= TREE_TYPE (lhs
);
5159 if (!vect_narrowable_type_p (type
))
5162 /* First see whether we have any useful range information for the result. */
5163 unsigned int precision
= TYPE_PRECISION (type
);
5164 signop sign
= TYPE_SIGN (type
);
5165 wide_int min_value
, max_value
;
5166 if (!vect_get_range_info (lhs
, &min_value
, &max_value
))
5169 tree_code code
= gimple_assign_rhs_code (stmt
);
5170 unsigned int nops
= gimple_num_ops (stmt
);
5172 if (!vect_truncatable_operation_p (code
))
5173 /* Check that all relevant input operands are compatible, and update
5174 [MIN_VALUE, MAX_VALUE] to include their ranges. */
5175 for (unsigned int i
= 1; i
< nops
; ++i
)
5177 tree op
= gimple_op (stmt
, i
);
5178 if (TREE_CODE (op
) == INTEGER_CST
)
5180 /* Don't require the integer to have RHS_TYPE (which it might
5181 not for things like shift amounts, etc.), but do require it
5183 if (!int_fits_type_p (op
, type
))
5186 min_value
= wi::min (min_value
, wi::to_wide (op
, precision
), sign
);
5187 max_value
= wi::max (max_value
, wi::to_wide (op
, precision
), sign
);
5189 else if (TREE_CODE (op
) == SSA_NAME
)
5191 /* Ignore codes that don't take uniform arguments. */
5192 if (!types_compatible_p (TREE_TYPE (op
), type
))
5195 wide_int op_min_value
, op_max_value
;
5196 if (!vect_get_range_info (op
, &op_min_value
, &op_max_value
))
5199 min_value
= wi::min (min_value
, op_min_value
, sign
);
5200 max_value
= wi::max (max_value
, op_max_value
, sign
);
5206 /* Try to switch signed types for unsigned types if we can.
5207 This is better for two reasons. First, unsigned ops tend
5208 to be cheaper than signed ops. Second, it means that we can
5212 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
5217 unsigned short res_1 = (unsigned short) c & 0xff00;
5218 int res = (int) res_1;
5220 where the intermediate result res_1 has unsigned rather than
5222 if (sign
== SIGNED
&& !wi::neg_p (min_value
))
5225 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
5226 unsigned int precision1
= wi::min_precision (min_value
, sign
);
5227 unsigned int precision2
= wi::min_precision (max_value
, sign
);
5228 unsigned int value_precision
= MAX (precision1
, precision2
);
5229 if (value_precision
>= precision
)
5232 if (dump_enabled_p ())
5233 dump_printf_loc (MSG_NOTE
, vect_location
, "can narrow to %s:%d"
5234 " without loss of precision: %G",
5235 sign
== SIGNED
? "signed" : "unsigned",
5236 value_precision
, stmt
);
5238 vect_set_operation_type (stmt_info
, type
, value_precision
, sign
);
5239 vect_set_min_input_precision (stmt_info
, type
, value_precision
);
5242 /* Use information about the users of STMT's result to decide whether
5243 STMT (described by STMT_INFO) could be done in a narrower type.
5244 This is effectively a backward propagation. */
5247 vect_determine_precisions_from_users (stmt_vec_info stmt_info
, gassign
*stmt
)
5249 tree_code code
= gimple_assign_rhs_code (stmt
);
5250 unsigned int opno
= (code
== COND_EXPR
? 2 : 1);
5251 tree type
= TREE_TYPE (gimple_op (stmt
, opno
));
5252 if (!vect_narrowable_type_p (type
))
5255 unsigned int precision
= TYPE_PRECISION (type
);
5256 unsigned int operation_precision
, min_input_precision
;
5260 /* Only the bits that contribute to the output matter. Don't change
5261 the precision of the operation itself. */
5262 operation_precision
= precision
;
5263 min_input_precision
= stmt_info
->min_output_precision
;
5269 tree shift
= gimple_assign_rhs2 (stmt
);
5270 if (TREE_CODE (shift
) != INTEGER_CST
5271 || !wi::ltu_p (wi::to_widest (shift
), precision
))
5273 unsigned int const_shift
= TREE_INT_CST_LOW (shift
);
5274 if (code
== LSHIFT_EXPR
)
5276 /* Avoid creating an undefined shift.
5278 ??? We could instead use min_output_precision as-is and
5279 optimize out-of-range shifts to zero. However, only
5280 degenerate testcases shift away all their useful input data,
5281 and it isn't natural to drop input operations in the middle
5282 of vectorization. This sort of thing should really be
5283 handled before vectorization. */
5284 operation_precision
= MAX (stmt_info
->min_output_precision
,
5286 /* We need CONST_SHIFT fewer bits of the input. */
5287 min_input_precision
= (MAX (operation_precision
, const_shift
)
5292 /* We need CONST_SHIFT extra bits to do the operation. */
5293 operation_precision
= (stmt_info
->min_output_precision
5295 min_input_precision
= operation_precision
;
5301 if (vect_truncatable_operation_p (code
))
5303 /* Input bit N has no effect on output bits N-1 and lower. */
5304 operation_precision
= stmt_info
->min_output_precision
;
5305 min_input_precision
= operation_precision
;
5311 if (operation_precision
< precision
)
5313 if (dump_enabled_p ())
5314 dump_printf_loc (MSG_NOTE
, vect_location
, "can narrow to %s:%d"
5315 " without affecting users: %G",
5316 TYPE_UNSIGNED (type
) ? "unsigned" : "signed",
5317 operation_precision
, stmt
);
5318 vect_set_operation_type (stmt_info
, type
, operation_precision
,
5321 vect_set_min_input_precision (stmt_info
, type
, min_input_precision
);
5324 /* Return true if the statement described by STMT_INFO sets a boolean
5325 SSA_NAME and if we know how to vectorize this kind of statement using
5326 vector mask types. */
5329 possible_vector_mask_operation_p (stmt_vec_info stmt_info
)
5331 tree lhs
= gimple_get_lhs (stmt_info
->stmt
);
5333 || TREE_CODE (lhs
) != SSA_NAME
5334 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs
)))
5337 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
5339 tree_code rhs_code
= gimple_assign_rhs_code (assign
);
5351 return TREE_CODE_CLASS (rhs_code
) == tcc_comparison
;
5354 else if (is_a
<gphi
*> (stmt_info
->stmt
))
5359 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
5360 a vector mask type instead of a normal vector type. Record the
5361 result in STMT_INFO->mask_precision. */
5364 vect_determine_mask_precision (vec_info
*vinfo
, stmt_vec_info stmt_info
)
5366 if (!possible_vector_mask_operation_p (stmt_info
))
5369 /* If at least one boolean input uses a vector mask type,
5370 pick the mask type with the narrowest elements.
5372 ??? This is the traditional behavior. It should always produce
5373 the smallest number of operations, but isn't necessarily the
5374 optimal choice. For example, if we have:
5380 - the user of a wants it to have a mask type for 16-bit elements (M16)
5382 - c uses a mask type for 8-bit elements (M8)
5384 then picking M8 gives:
5386 - 1 M16->M8 pack for b
5388 - 2 M8->M16 unpacks for the user of a
5390 whereas picking M16 would have given:
5392 - 2 M8->M16 unpacks for c
5395 The number of operations are equal, but M16 would have given
5396 a shorter dependency chain and allowed more ILP. */
5397 unsigned int precision
= ~0U;
5398 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
5400 unsigned int nops
= gimple_num_ops (assign
);
5401 for (unsigned int i
= 1; i
< nops
; ++i
)
5403 tree rhs
= gimple_op (assign
, i
);
5404 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs
)))
5407 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (rhs
);
5409 /* Don't let external or constant operands influence the choice.
5410 We can convert them to whichever vector type we pick. */
5413 if (def_stmt_info
->mask_precision
)
5415 if (precision
> def_stmt_info
->mask_precision
)
5416 precision
= def_stmt_info
->mask_precision
;
5420 /* If the statement compares two values that shouldn't use vector masks,
5421 try comparing the values as normal scalars instead. */
5422 tree_code rhs_code
= gimple_assign_rhs_code (assign
);
5423 if (precision
== ~0U
5424 && TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
5426 tree rhs1_type
= TREE_TYPE (gimple_assign_rhs1 (assign
));
5428 tree vectype
, mask_type
;
5429 if (is_a
<scalar_mode
> (TYPE_MODE (rhs1_type
), &mode
)
5430 && (vectype
= get_vectype_for_scalar_type (vinfo
, rhs1_type
))
5431 && (mask_type
= get_mask_type_for_scalar_type (vinfo
, rhs1_type
))
5432 && expand_vec_cmp_expr_p (vectype
, mask_type
, rhs_code
))
5433 precision
= GET_MODE_BITSIZE (mode
);
5438 gphi
*phi
= as_a
<gphi
*> (stmt_info
->stmt
);
5439 for (unsigned i
= 0; i
< gimple_phi_num_args (phi
); ++i
)
5441 tree rhs
= gimple_phi_arg_def (phi
, i
);
5443 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (rhs
);
5445 /* Don't let external or constant operands influence the choice.
5446 We can convert them to whichever vector type we pick. */
5449 if (def_stmt_info
->mask_precision
)
5451 if (precision
> def_stmt_info
->mask_precision
)
5452 precision
= def_stmt_info
->mask_precision
;
5457 if (dump_enabled_p ())
5459 if (precision
== ~0U)
5460 dump_printf_loc (MSG_NOTE
, vect_location
,
5461 "using normal nonmask vectors for %G",
5464 dump_printf_loc (MSG_NOTE
, vect_location
,
5465 "using boolean precision %d for %G",
5466 precision
, stmt_info
->stmt
);
5469 stmt_info
->mask_precision
= precision
;
5472 /* Handle vect_determine_precisions for STMT_INFO, given that we
5473 have already done so for the users of its result. */
5476 vect_determine_stmt_precisions (vec_info
*vinfo
, stmt_vec_info stmt_info
)
5478 vect_determine_min_output_precision (vinfo
, stmt_info
);
5479 if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
5481 vect_determine_precisions_from_range (stmt_info
, stmt
);
5482 vect_determine_precisions_from_users (stmt_info
, stmt
);
5486 /* Walk backwards through the vectorizable region to determine the
5487 values of these fields:
5489 - min_output_precision
5490 - min_input_precision
5491 - operation_precision
5492 - operation_sign. */
5495 vect_determine_precisions (vec_info
*vinfo
)
5497 DUMP_VECT_SCOPE ("vect_determine_precisions");
5499 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
5501 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5502 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
5503 unsigned int nbbs
= loop
->num_nodes
;
5505 for (unsigned int i
= 0; i
< nbbs
; i
++)
5507 basic_block bb
= bbs
[i
];
5508 for (auto gsi
= gsi_start_phis (bb
);
5509 !gsi_end_p (gsi
); gsi_next (&gsi
))
5511 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi
.phi ());
5513 vect_determine_mask_precision (vinfo
, stmt_info
);
5515 for (auto si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
5516 if (!is_gimple_debug (gsi_stmt (si
)))
5517 vect_determine_mask_precision
5518 (vinfo
, vinfo
->lookup_stmt (gsi_stmt (si
)));
5520 for (unsigned int i
= 0; i
< nbbs
; i
++)
5522 basic_block bb
= bbs
[nbbs
- i
- 1];
5523 for (gimple_stmt_iterator si
= gsi_last_bb (bb
);
5524 !gsi_end_p (si
); gsi_prev (&si
))
5525 if (!is_gimple_debug (gsi_stmt (si
)))
5526 vect_determine_stmt_precisions
5527 (vinfo
, vinfo
->lookup_stmt (gsi_stmt (si
)));
5528 for (auto gsi
= gsi_start_phis (bb
);
5529 !gsi_end_p (gsi
); gsi_next (&gsi
))
5531 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi
.phi ());
5533 vect_determine_stmt_precisions (vinfo
, stmt_info
);
5539 bb_vec_info bb_vinfo
= as_a
<bb_vec_info
> (vinfo
);
5540 for (unsigned i
= 0; i
< bb_vinfo
->bbs
.length (); ++i
)
5542 basic_block bb
= bb_vinfo
->bbs
[i
];
5543 for (auto gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
5545 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi
.phi ());
5546 if (stmt_info
&& STMT_VINFO_VECTORIZABLE (stmt_info
))
5547 vect_determine_mask_precision (vinfo
, stmt_info
);
5549 for (auto gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
5551 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi_stmt (gsi
));
5552 if (stmt_info
&& STMT_VINFO_VECTORIZABLE (stmt_info
))
5553 vect_determine_mask_precision (vinfo
, stmt_info
);
5556 for (int i
= bb_vinfo
->bbs
.length () - 1; i
!= -1; --i
)
5558 for (gimple_stmt_iterator gsi
= gsi_last_bb (bb_vinfo
->bbs
[i
]);
5559 !gsi_end_p (gsi
); gsi_prev (&gsi
))
5561 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi_stmt (gsi
));
5562 if (stmt_info
&& STMT_VINFO_VECTORIZABLE (stmt_info
))
5563 vect_determine_stmt_precisions (vinfo
, stmt_info
);
5565 for (auto gsi
= gsi_start_phis (bb_vinfo
->bbs
[i
]);
5566 !gsi_end_p (gsi
); gsi_next (&gsi
))
5568 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi
.phi ());
5569 if (stmt_info
&& STMT_VINFO_VECTORIZABLE (stmt_info
))
5570 vect_determine_stmt_precisions (vinfo
, stmt_info
);
5576 typedef gimple
*(*vect_recog_func_ptr
) (vec_info
*, stmt_vec_info
, tree
*);
5578 struct vect_recog_func
5580 vect_recog_func_ptr fn
;
5584 /* Note that ordering matters - the first pattern matching on a stmt is
5585 taken which means usually the more complex one needs to preceed the
5586 less comples onex (widen_sum only after dot_prod or sad for example). */
5587 static vect_recog_func vect_vect_recog_func_ptrs
[] = {
5588 { vect_recog_over_widening_pattern
, "over_widening" },
5589 /* Must come after over_widening, which narrows the shift as much as
5590 possible beforehand. */
5591 { vect_recog_average_pattern
, "average" },
5592 { vect_recog_cond_expr_convert_pattern
, "cond_expr_convert" },
5593 { vect_recog_mulhs_pattern
, "mult_high" },
5594 { vect_recog_cast_forwprop_pattern
, "cast_forwprop" },
5595 { vect_recog_widen_mult_pattern
, "widen_mult" },
5596 { vect_recog_dot_prod_pattern
, "dot_prod" },
5597 { vect_recog_sad_pattern
, "sad" },
5598 { vect_recog_widen_sum_pattern
, "widen_sum" },
5599 { vect_recog_pow_pattern
, "pow" },
5600 { vect_recog_popcount_pattern
, "popcount" },
5601 { vect_recog_widen_shift_pattern
, "widen_shift" },
5602 { vect_recog_rotate_pattern
, "rotate" },
5603 { vect_recog_vector_vector_shift_pattern
, "vector_vector_shift" },
5604 { vect_recog_divmod_pattern
, "divmod" },
5605 { vect_recog_mult_pattern
, "mult" },
5606 { vect_recog_mixed_size_cond_pattern
, "mixed_size_cond" },
5607 { vect_recog_bool_pattern
, "bool" },
5608 /* This must come before mask conversion, and includes the parts
5609 of mask conversion that are needed for gather and scatter
5610 internal functions. */
5611 { vect_recog_gather_scatter_pattern
, "gather_scatter" },
5612 { vect_recog_mask_conversion_pattern
, "mask_conversion" },
5613 { vect_recog_widen_plus_pattern
, "widen_plus" },
5614 { vect_recog_widen_minus_pattern
, "widen_minus" },
5617 const unsigned int NUM_PATTERNS
= ARRAY_SIZE (vect_vect_recog_func_ptrs
);
5619 /* Mark statements that are involved in a pattern. */
5622 vect_mark_pattern_stmts (vec_info
*vinfo
,
5623 stmt_vec_info orig_stmt_info
, gimple
*pattern_stmt
,
5624 tree pattern_vectype
)
5626 stmt_vec_info orig_stmt_info_saved
= orig_stmt_info
;
5627 gimple
*def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
5629 gimple
*orig_pattern_stmt
= NULL
;
5630 if (is_pattern_stmt_p (orig_stmt_info
))
5632 /* We're replacing a statement in an existing pattern definition
5634 orig_pattern_stmt
= orig_stmt_info
->stmt
;
5635 if (dump_enabled_p ())
5636 dump_printf_loc (MSG_NOTE
, vect_location
,
5637 "replacing earlier pattern %G", orig_pattern_stmt
);
5639 /* To keep the book-keeping simple, just swap the lhs of the
5640 old and new statements, so that the old one has a valid but
5642 tree old_lhs
= gimple_get_lhs (orig_pattern_stmt
);
5643 gimple_set_lhs (orig_pattern_stmt
, gimple_get_lhs (pattern_stmt
));
5644 gimple_set_lhs (pattern_stmt
, old_lhs
);
5646 if (dump_enabled_p ())
5647 dump_printf_loc (MSG_NOTE
, vect_location
, "with %G", pattern_stmt
);
5649 /* Switch to the statement that ORIG replaces. */
5650 orig_stmt_info
= STMT_VINFO_RELATED_STMT (orig_stmt_info
);
5652 /* We shouldn't be replacing the main pattern statement. */
5653 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info
)->stmt
5654 != orig_pattern_stmt
);
5658 for (gimple_stmt_iterator si
= gsi_start (def_seq
);
5659 !gsi_end_p (si
); gsi_next (&si
))
5661 if (dump_enabled_p ())
5662 dump_printf_loc (MSG_NOTE
, vect_location
,
5663 "extra pattern stmt: %G", gsi_stmt (si
));
5664 stmt_vec_info pattern_stmt_info
5665 = vect_init_pattern_stmt (vinfo
, gsi_stmt (si
),
5666 orig_stmt_info
, pattern_vectype
);
5667 /* Stmts in the def sequence are not vectorizable cycle or
5668 induction defs, instead they should all be vect_internal_def
5669 feeding the main pattern stmt which retains this def type. */
5670 STMT_VINFO_DEF_TYPE (pattern_stmt_info
) = vect_internal_def
;
5673 if (orig_pattern_stmt
)
5675 vect_init_pattern_stmt (vinfo
, pattern_stmt
,
5676 orig_stmt_info
, pattern_vectype
);
5678 /* Insert all the new pattern statements before the original one. */
5679 gimple_seq
*orig_def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
5680 gimple_stmt_iterator gsi
= gsi_for_stmt (orig_pattern_stmt
,
5682 gsi_insert_seq_before_without_update (&gsi
, def_seq
, GSI_SAME_STMT
);
5683 gsi_insert_before_without_update (&gsi
, pattern_stmt
, GSI_SAME_STMT
);
5685 /* Remove the pattern statement that this new pattern replaces. */
5686 gsi_remove (&gsi
, false);
5689 vect_set_pattern_stmt (vinfo
,
5690 pattern_stmt
, orig_stmt_info
, pattern_vectype
);
5692 /* Transfer reduction path info to the pattern. */
5693 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved
) != -1)
5696 if (!gimple_extract_op (orig_stmt_info_saved
->stmt
, &op
))
5698 tree lookfor
= op
.ops
[STMT_VINFO_REDUC_IDX (orig_stmt_info
)];
5699 /* Search the pattern def sequence and the main pattern stmt. Note
5700 we may have inserted all into a containing pattern def sequence
5701 so the following is a bit awkward. */
5702 gimple_stmt_iterator si
;
5706 si
= gsi_start (def_seq
);
5718 if (gimple_extract_op (s
, &op
))
5719 for (unsigned i
= 0; i
< op
.num_ops
; ++i
)
5720 if (op
.ops
[i
] == lookfor
)
5722 STMT_VINFO_REDUC_IDX (vinfo
->lookup_stmt (s
)) = i
;
5723 lookfor
= gimple_get_lhs (s
);
5727 if (s
== pattern_stmt
)
5729 if (!found
&& dump_enabled_p ())
5730 dump_printf_loc (MSG_NOTE
, vect_location
,
5731 "failed to update reduction index.\n");
5739 if (s
== pattern_stmt
)
5740 /* Found the end inside a bigger pattern def seq. */
5749 /* Function vect_pattern_recog_1
5752 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
5753 computation pattern.
5754 STMT_INFO: A stmt from which the pattern search should start.
5756 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
5757 a sequence of statements that has the same functionality and can be
5758 used to replace STMT_INFO. It returns the last statement in the sequence
5759 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
5760 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
5761 statement, having first checked that the target supports the new operation
5764 This function also does some bookkeeping, as explained in the documentation
5765 for vect_recog_pattern. */
5768 vect_pattern_recog_1 (vec_info
*vinfo
,
5769 vect_recog_func
*recog_func
, stmt_vec_info stmt_info
)
5771 gimple
*pattern_stmt
;
5772 loop_vec_info loop_vinfo
;
5773 tree pattern_vectype
;
5775 /* If this statement has already been replaced with pattern statements,
5776 leave the original statement alone, since the first match wins.
5777 Instead try to match against the definition statements that feed
5778 the main pattern statement. */
5779 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
5781 gimple_stmt_iterator gsi
;
5782 for (gsi
= gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
5783 !gsi_end_p (gsi
); gsi_next (&gsi
))
5784 vect_pattern_recog_1 (vinfo
, recog_func
,
5785 vinfo
->lookup_stmt (gsi_stmt (gsi
)));
5789 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
5790 pattern_stmt
= recog_func
->fn (vinfo
, stmt_info
, &pattern_vectype
);
5793 /* Clear any half-formed pattern definition sequence. */
5794 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
5798 loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5800 /* Found a vectorizable pattern. */
5801 if (dump_enabled_p ())
5802 dump_printf_loc (MSG_NOTE
, vect_location
,
5803 "%s pattern recognized: %G",
5804 recog_func
->name
, pattern_stmt
);
5806 /* Mark the stmts that are involved in the pattern. */
5807 vect_mark_pattern_stmts (vinfo
, stmt_info
, pattern_stmt
, pattern_vectype
);
5809 /* Patterns cannot be vectorized using SLP, because they change the order of
5814 stmt_vec_info
*elem_ptr
;
5815 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo
), ix
, ix2
,
5816 elem_ptr
, *elem_ptr
== stmt_info
);
5821 /* Function vect_pattern_recog
5824 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
5827 Output - for each computation idiom that is detected we create a new stmt
5828 that provides the same functionality and that can be vectorized. We
5829 also record some information in the struct_stmt_info of the relevant
5830 stmts, as explained below:
5832 At the entry to this function we have the following stmts, with the
5833 following initial value in the STMT_VINFO fields:
5835 stmt in_pattern_p related_stmt vec_stmt
5836 S1: a_i = .... - - -
5837 S2: a_2 = ..use(a_i).. - - -
5838 S3: a_1 = ..use(a_2).. - - -
5839 S4: a_0 = ..use(a_1).. - - -
5840 S5: ... = ..use(a_0).. - - -
5842 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
5843 represented by a single stmt. We then:
5844 - create a new stmt S6 equivalent to the pattern (the stmt is not
5845 inserted into the code)
5846 - fill in the STMT_VINFO fields as follows:
5848 in_pattern_p related_stmt vec_stmt
5849 S1: a_i = .... - - -
5850 S2: a_2 = ..use(a_i).. - - -
5851 S3: a_1 = ..use(a_2).. - - -
5852 S4: a_0 = ..use(a_1).. true S6 -
5853 '---> S6: a_new = .... - S4 -
5854 S5: ... = ..use(a_0).. - - -
5856 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
5857 to each other through the RELATED_STMT field).
5859 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
5860 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
5861 remain irrelevant unless used by stmts other than S4.
5863 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
5864 (because they are marked as irrelevant). It will vectorize S6, and record
5865 a pointer to the new vector stmt VS6 from S6 (as usual).
5866 S4 will be skipped, and S5 will be vectorized as usual:
5868 in_pattern_p related_stmt vec_stmt
5869 S1: a_i = .... - - -
5870 S2: a_2 = ..use(a_i).. - - -
5871 S3: a_1 = ..use(a_2).. - - -
5872 > VS6: va_new = .... - - -
5873 S4: a_0 = ..use(a_1).. true S6 VS6
5874 '---> S6: a_new = .... - S4 VS6
5875 > VS5: ... = ..vuse(va_new).. - - -
5876 S5: ... = ..use(a_0).. - - -
5878 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
5879 elsewhere), and we'll end up with:
5882 VS5: ... = ..vuse(va_new)..
5884 In case of more than one pattern statements, e.g., widen-mult with
5888 S2 a_T = (TYPE) a_t;
5889 '--> S3: a_it = (interm_type) a_t;
5890 S4 prod_T = a_T * CONST;
5891 '--> S5: prod_T' = a_it w* CONST;
5893 there may be other users of a_T outside the pattern. In that case S2 will
5894 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
5895 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
5896 be recorded in S3. */
5899 vect_pattern_recog (vec_info
*vinfo
)
5904 gimple_stmt_iterator si
;
5907 vect_determine_precisions (vinfo
);
5909 DUMP_VECT_SCOPE ("vect_pattern_recog");
5911 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
5913 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5914 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
5915 nbbs
= loop
->num_nodes
;
5917 /* Scan through the loop stmts, applying the pattern recognition
5918 functions starting at each stmt visited: */
5919 for (i
= 0; i
< nbbs
; i
++)
5921 basic_block bb
= bbs
[i
];
5922 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
5924 if (is_gimple_debug (gsi_stmt (si
)))
5926 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi_stmt (si
));
5927 /* Scan over all generic vect_recog_xxx_pattern functions. */
5928 for (j
= 0; j
< NUM_PATTERNS
; j
++)
5929 vect_pattern_recog_1 (vinfo
, &vect_vect_recog_func_ptrs
[j
],
5936 bb_vec_info bb_vinfo
= as_a
<bb_vec_info
> (vinfo
);
5937 for (unsigned i
= 0; i
< bb_vinfo
->bbs
.length (); ++i
)
5938 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb_vinfo
->bbs
[i
]);
5939 !gsi_end_p (gsi
); gsi_next (&gsi
))
5941 stmt_vec_info stmt_info
= bb_vinfo
->lookup_stmt (gsi_stmt (gsi
));
5942 if (!stmt_info
|| !STMT_VINFO_VECTORIZABLE (stmt_info
))
5945 /* Scan over all generic vect_recog_xxx_pattern functions. */
5946 for (j
= 0; j
< NUM_PATTERNS
; j
++)
5947 vect_pattern_recog_1 (vinfo
,
5948 &vect_vect_recog_func_ptrs
[j
], stmt_info
);
5952 /* After this no more add_stmt calls are allowed. */
5953 vinfo
->stmt_vec_info_ro
= true;