1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
37 #include "gimple-iterator.h"
39 #include "tree-vectorizer.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
47 #include "omp-simd-clone.h"
50 /* Return true if we have a useful VR_RANGE range for VAR, storing it
51 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
54 vect_get_range_info (tree var
, wide_int
*min_value
, wide_int
*max_value
)
56 value_range_kind vr_type
= get_range_info (var
, min_value
, max_value
);
57 wide_int nonzero
= get_nonzero_bits (var
);
58 signop sgn
= TYPE_SIGN (TREE_TYPE (var
));
59 if (intersect_range_with_nonzero_bits (vr_type
, min_value
, max_value
,
60 nonzero
, sgn
) == VR_RANGE
)
62 if (dump_enabled_p ())
64 dump_generic_expr_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, var
);
65 dump_printf (MSG_NOTE
, " has range [");
66 dump_hex (MSG_NOTE
, *min_value
);
67 dump_printf (MSG_NOTE
, ", ");
68 dump_hex (MSG_NOTE
, *max_value
);
69 dump_printf (MSG_NOTE
, "]\n");
75 if (dump_enabled_p ())
77 dump_generic_expr_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, var
);
78 dump_printf (MSG_NOTE
, " has no range info\n");
84 /* Report that we've found an instance of pattern PATTERN in
88 vect_pattern_detected (const char *name
, gimple
*stmt
)
90 if (dump_enabled_p ())
91 dump_printf_loc (MSG_NOTE
, vect_location
, "%s: detected: %G", name
, stmt
);
94 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
95 return the pattern statement's stmt_vec_info. Set its vector type to
96 VECTYPE if it doesn't have one already. */
99 vect_init_pattern_stmt (gimple
*pattern_stmt
, stmt_vec_info orig_stmt_info
,
102 vec_info
*vinfo
= orig_stmt_info
->vinfo
;
103 stmt_vec_info pattern_stmt_info
= vinfo
->lookup_stmt (pattern_stmt
);
104 if (pattern_stmt_info
== NULL
)
105 pattern_stmt_info
= orig_stmt_info
->vinfo
->add_stmt (pattern_stmt
);
106 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt_info
->stmt
));
108 pattern_stmt_info
->pattern_stmt_p
= true;
109 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt_info
;
110 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
111 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
112 if (!STMT_VINFO_VECTYPE (pattern_stmt_info
))
113 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vectype
;
114 return pattern_stmt_info
;
117 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
118 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
122 vect_set_pattern_stmt (gimple
*pattern_stmt
, stmt_vec_info orig_stmt_info
,
125 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
126 STMT_VINFO_RELATED_STMT (orig_stmt_info
)
127 = vect_init_pattern_stmt (pattern_stmt
, orig_stmt_info
, vectype
);
130 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
131 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
132 be different from the vector type of the final pattern statement. */
135 append_pattern_def_seq (stmt_vec_info stmt_info
, gimple
*new_stmt
,
136 tree vectype
= NULL_TREE
)
138 vec_info
*vinfo
= stmt_info
->vinfo
;
141 stmt_vec_info new_stmt_info
= vinfo
->add_stmt (new_stmt
);
142 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
144 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
148 /* The caller wants to perform new operations on vect_external variable
149 VAR, so that the result of the operations would also be vect_external.
150 Return the edge on which the operations can be performed, if one exists.
151 Return null if the operations should instead be treated as part of
152 the pattern that needs them. */
155 vect_get_external_def_edge (vec_info
*vinfo
, tree var
)
158 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
160 e
= loop_preheader_edge (loop_vinfo
->loop
);
161 if (!SSA_NAME_IS_DEFAULT_DEF (var
))
163 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
165 || !dominated_by_p (CDI_DOMINATORS
, e
->dest
, bb
))
172 /* Return true if the target supports a vector version of CODE,
173 where CODE is known to map to a direct optab. ITYPE specifies
174 the type of (some of) the scalar inputs and OTYPE specifies the
175 type of the scalar result.
177 If CODE allows the inputs and outputs to have different type
178 (such as for WIDEN_SUM_EXPR), it is the input mode rather
179 than the output mode that determines the appropriate target pattern.
180 Operand 0 of the target pattern then specifies the mode that the output
183 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
184 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
188 vect_supportable_direct_optab_p (tree otype
, tree_code code
,
189 tree itype
, tree
*vecotype_out
,
190 tree
*vecitype_out
= NULL
)
192 tree vecitype
= get_vectype_for_scalar_type (itype
);
196 tree vecotype
= get_vectype_for_scalar_type (otype
);
200 optab optab
= optab_for_tree_code (code
, vecitype
, optab_default
);
204 insn_code icode
= optab_handler (optab
, TYPE_MODE (vecitype
));
205 if (icode
== CODE_FOR_nothing
206 || insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (vecotype
))
209 *vecotype_out
= vecotype
;
211 *vecitype_out
= vecitype
;
215 /* Round bit precision PRECISION up to a full element. */
218 vect_element_precision (unsigned int precision
)
220 precision
= 1 << ceil_log2 (precision
);
221 return MAX (precision
, BITS_PER_UNIT
);
224 /* If OP is defined by a statement that's being considered for vectorization,
225 return information about that statement, otherwise return NULL. */
228 vect_get_internal_def (vec_info
*vinfo
, tree op
)
230 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (op
);
232 && STMT_VINFO_DEF_TYPE (def_stmt_info
) == vect_internal_def
)
233 return def_stmt_info
;
237 /* Check whether NAME, an ssa-name used in STMT_VINFO,
238 is a result of a type promotion, such that:
239 DEF_STMT: NAME = NOP (name0)
240 If CHECK_SIGN is TRUE, check that either both types are signed or both are
244 type_conversion_p (tree name
, stmt_vec_info stmt_vinfo
, bool check_sign
,
245 tree
*orig_type
, gimple
**def_stmt
, bool *promotion
)
247 tree type
= TREE_TYPE (name
);
249 enum vect_def_type dt
;
251 stmt_vec_info def_stmt_info
;
252 if (!vect_is_simple_use (name
, stmt_vinfo
->vinfo
, &dt
, &def_stmt_info
,
256 if (dt
!= vect_internal_def
257 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
263 if (!is_gimple_assign (*def_stmt
))
266 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt
)))
269 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
271 *orig_type
= TREE_TYPE (oprnd0
);
272 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*orig_type
)
273 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*orig_type
)) && check_sign
))
276 if (TYPE_PRECISION (type
) >= (TYPE_PRECISION (*orig_type
) * 2))
281 if (!vect_is_simple_use (oprnd0
, stmt_vinfo
->vinfo
, &dt
))
287 /* Holds information about an input operand after some sign changes
288 and type promotions have been peeled away. */
289 struct vect_unpromoted_value
{
290 vect_unpromoted_value ();
292 void set_op (tree
, vect_def_type
, stmt_vec_info
= NULL
);
294 /* The value obtained after peeling away zero or more casts. */
297 /* The type of OP. */
300 /* The definition type of OP. */
303 /* If OP is the result of peeling at least one cast, and if the cast
304 of OP itself is a vectorizable statement, CASTER identifies that
305 statement, otherwise it is null. */
306 stmt_vec_info caster
;
309 inline vect_unpromoted_value::vect_unpromoted_value ()
312 dt (vect_uninitialized_def
),
317 /* Set the operand to OP_IN, its definition type to DT_IN, and the
318 statement that casts it to CASTER_IN. */
321 vect_unpromoted_value::set_op (tree op_in
, vect_def_type dt_in
,
322 stmt_vec_info caster_in
)
325 type
= TREE_TYPE (op
);
330 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
331 to reach some vectorizable inner operand OP', continuing as long as it
332 is possible to convert OP' back to OP using a possible sign change
333 followed by a possible promotion P. Return this OP', or null if OP is
334 not a vectorizable SSA name. If there is a promotion P, describe its
335 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
336 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
337 have more than one user.
339 A successful return means that it is possible to go from OP' to OP
340 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
341 whereas the cast from UNPROM to OP might be a promotion, a sign
346 signed short *ptr = ...;
347 signed short C = *ptr;
348 unsigned short B = (unsigned short) C; // sign change
349 signed int A = (signed int) B; // unsigned promotion
350 ...possible other uses of A...
351 unsigned int OP = (unsigned int) A; // sign change
353 In this case it's possible to go directly from C to OP using:
355 OP = (unsigned int) (unsigned short) C;
356 +------------+ +--------------+
357 promotion sign change
359 so OP' would be C. The input to the promotion is B, so UNPROM
363 vect_look_through_possible_promotion (vec_info
*vinfo
, tree op
,
364 vect_unpromoted_value
*unprom
,
365 bool *single_use_p
= NULL
)
367 tree res
= NULL_TREE
;
368 tree op_type
= TREE_TYPE (op
);
369 unsigned int orig_precision
= TYPE_PRECISION (op_type
);
370 unsigned int min_precision
= orig_precision
;
371 stmt_vec_info caster
= NULL
;
372 while (TREE_CODE (op
) == SSA_NAME
&& INTEGRAL_TYPE_P (op_type
))
374 /* See whether OP is simple enough to vectorize. */
375 stmt_vec_info def_stmt_info
;
378 if (!vect_is_simple_use (op
, vinfo
, &dt
, &def_stmt_info
, &def_stmt
))
381 /* If OP is the input of a demotion, skip over it to see whether
382 OP is itself the result of a promotion. If so, the combined
383 effect of the promotion and the demotion might fit the required
384 pattern, otherwise neither operation fits.
386 This copes with cases such as the result of an arithmetic
387 operation being truncated before being stored, and where that
388 arithmetic operation has been recognized as an over-widened one. */
389 if (TYPE_PRECISION (op_type
) <= min_precision
)
391 /* Use OP as the UNPROM described above if we haven't yet
392 found a promotion, or if using the new input preserves the
393 sign of the previous promotion. */
395 || TYPE_PRECISION (unprom
->type
) == orig_precision
396 || TYPE_SIGN (unprom
->type
) == TYPE_SIGN (op_type
))
398 unprom
->set_op (op
, dt
, caster
);
399 min_precision
= TYPE_PRECISION (op_type
);
401 /* Stop if we've already seen a promotion and if this
402 conversion does more than change the sign. */
403 else if (TYPE_PRECISION (op_type
)
404 != TYPE_PRECISION (unprom
->type
))
407 /* The sequence now extends to OP. */
411 /* See whether OP is defined by a cast. Record it as CASTER if
412 the cast is potentially vectorizable. */
415 caster
= def_stmt_info
;
417 /* Ignore pattern statements, since we don't link uses for them. */
420 && !STMT_VINFO_RELATED_STMT (caster
)
421 && !has_single_use (res
))
422 *single_use_p
= false;
424 gassign
*assign
= dyn_cast
<gassign
*> (def_stmt
);
425 if (!assign
|| !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt
)))
428 /* Continue with the input to the cast. */
429 op
= gimple_assign_rhs1 (def_stmt
);
430 op_type
= TREE_TYPE (op
);
435 /* OP is an integer operand to an operation that returns TYPE, and we
436 want to treat the operation as a widening one. So far we can treat
437 it as widening from *COMMON_TYPE.
439 Return true if OP is suitable for such a widening operation,
440 either widening from *COMMON_TYPE or from some supertype of it.
441 Update *COMMON_TYPE to the supertype in the latter case.
443 SHIFT_P is true if OP is a shift amount. */
446 vect_joust_widened_integer (tree type
, bool shift_p
, tree op
,
449 /* Calculate the minimum precision required by OP, without changing
450 the sign of either operand. */
451 unsigned int precision
;
454 if (!wi::leu_p (wi::to_widest (op
), TYPE_PRECISION (type
) / 2))
456 precision
= TREE_INT_CST_LOW (op
);
460 precision
= wi::min_precision (wi::to_widest (op
),
461 TYPE_SIGN (*common_type
));
462 if (precision
* 2 > TYPE_PRECISION (type
))
466 /* If OP requires a wider type, switch to that type. The checks
467 above ensure that this is still narrower than the result. */
468 precision
= vect_element_precision (precision
);
469 if (TYPE_PRECISION (*common_type
) < precision
)
470 *common_type
= build_nonstandard_integer_type
471 (precision
, TYPE_UNSIGNED (*common_type
));
475 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
476 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
479 vect_joust_widened_type (tree type
, tree new_type
, tree
*common_type
)
481 if (types_compatible_p (*common_type
, new_type
))
484 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
485 if ((TYPE_PRECISION (new_type
) < TYPE_PRECISION (*common_type
))
486 && (TYPE_UNSIGNED (new_type
) || !TYPE_UNSIGNED (*common_type
)))
489 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
490 if (TYPE_PRECISION (*common_type
) < TYPE_PRECISION (new_type
)
491 && (TYPE_UNSIGNED (*common_type
) || !TYPE_UNSIGNED (new_type
)))
493 *common_type
= new_type
;
497 /* We have mismatched signs, with the signed type being
498 no wider than the unsigned type. In this case we need
499 a wider signed type. */
500 unsigned int precision
= MAX (TYPE_PRECISION (*common_type
),
501 TYPE_PRECISION (new_type
));
503 if (precision
* 2 > TYPE_PRECISION (type
))
506 *common_type
= build_nonstandard_integer_type (precision
, false);
510 /* Check whether STMT_INFO can be viewed as a tree of integer operations
511 in which each node either performs CODE or WIDENED_CODE, and where
512 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
513 specifies the maximum number of leaf operands. SHIFT_P says whether
514 CODE and WIDENED_CODE are some sort of shift.
516 If STMT_INFO is such a tree, return the number of leaf operands
517 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
518 to a type that (a) is narrower than the result of STMT_INFO and
519 (b) can hold all leaf operand values.
521 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
525 vect_widened_op_tree (stmt_vec_info stmt_info
, tree_code code
,
526 tree_code widened_code
, bool shift_p
,
527 unsigned int max_nops
,
528 vect_unpromoted_value
*unprom
, tree
*common_type
)
530 /* Check for an integer operation with the right code. */
531 vec_info
*vinfo
= stmt_info
->vinfo
;
532 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
536 tree_code rhs_code
= gimple_assign_rhs_code (assign
);
537 if (rhs_code
!= code
&& rhs_code
!= widened_code
)
540 tree type
= gimple_expr_type (assign
);
541 if (!INTEGRAL_TYPE_P (type
))
544 /* Assume that both operands will be leaf operands. */
547 /* Check the operands. */
548 unsigned int next_op
= 0;
549 for (unsigned int i
= 0; i
< 2; ++i
)
551 vect_unpromoted_value
*this_unprom
= &unprom
[next_op
];
552 unsigned int nops
= 1;
553 tree op
= gimple_op (assign
, i
+ 1);
554 if (i
== 1 && TREE_CODE (op
) == INTEGER_CST
)
556 /* We already have a common type from earlier operands.
557 Update it to account for OP. */
558 this_unprom
->set_op (op
, vect_constant_def
);
559 if (!vect_joust_widened_integer (type
, shift_p
, op
, common_type
))
564 /* Only allow shifts by constants. */
565 if (shift_p
&& i
== 1)
568 if (!vect_look_through_possible_promotion (stmt_info
->vinfo
, op
,
572 if (TYPE_PRECISION (this_unprom
->type
) == TYPE_PRECISION (type
))
574 /* The operand isn't widened. If STMT_INFO has the code
575 for an unwidened operation, recursively check whether
576 this operand is a node of the tree. */
579 || this_unprom
->dt
!= vect_internal_def
)
582 /* Give back the leaf slot allocated above now that we're
583 not treating this as a leaf operand. */
586 /* Recursively process the definition of the operand. */
587 stmt_vec_info def_stmt_info
588 = vinfo
->lookup_def (this_unprom
->op
);
589 nops
= vect_widened_op_tree (def_stmt_info
, code
, widened_code
,
590 shift_p
, max_nops
, this_unprom
,
599 /* Make sure that the operand is narrower than the result. */
600 if (TYPE_PRECISION (this_unprom
->type
) * 2
601 > TYPE_PRECISION (type
))
604 /* Update COMMON_TYPE for the new operand. */
606 *common_type
= this_unprom
->type
;
607 else if (!vect_joust_widened_type (type
, this_unprom
->type
,
617 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
618 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
621 vect_recog_temp_ssa_var (tree type
, gimple
*stmt
)
623 return make_temp_ssa_name (type
, stmt
, "patt");
626 /* STMT2_INFO describes a type conversion that could be split into STMT1
627 followed by a version of STMT2_INFO that takes NEW_RHS as its first
628 input. Try to do this using pattern statements, returning true on
632 vect_split_statement (stmt_vec_info stmt2_info
, tree new_rhs
,
633 gimple
*stmt1
, tree vectype
)
635 if (is_pattern_stmt_p (stmt2_info
))
637 /* STMT2_INFO is part of a pattern. Get the statement to which
638 the pattern is attached. */
639 stmt_vec_info orig_stmt2_info
= STMT_VINFO_RELATED_STMT (stmt2_info
);
640 vect_init_pattern_stmt (stmt1
, orig_stmt2_info
, vectype
);
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE
, vect_location
,
644 "Splitting pattern statement: %G", stmt2_info
->stmt
);
646 /* Since STMT2_INFO is a pattern statement, we can change it
647 in-situ without worrying about changing the code for the
649 gimple_assign_set_rhs1 (stmt2_info
->stmt
, new_rhs
);
651 if (dump_enabled_p ())
653 dump_printf_loc (MSG_NOTE
, vect_location
, "into: %G", stmt1
);
654 dump_printf_loc (MSG_NOTE
, vect_location
, "and: %G",
658 gimple_seq
*def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info
);
659 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info
) == stmt2_info
)
660 /* STMT2_INFO is the actual pattern statement. Add STMT1
661 to the end of the definition sequence. */
662 gimple_seq_add_stmt_without_update (def_seq
, stmt1
);
665 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
667 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt2_info
->stmt
, def_seq
);
668 gsi_insert_before_without_update (&gsi
, stmt1
, GSI_SAME_STMT
);
674 /* STMT2_INFO doesn't yet have a pattern. Try to create a
675 two-statement pattern now. */
676 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info
));
677 tree lhs_type
= TREE_TYPE (gimple_get_lhs (stmt2_info
->stmt
));
678 tree lhs_vectype
= get_vectype_for_scalar_type (lhs_type
);
682 if (dump_enabled_p ())
683 dump_printf_loc (MSG_NOTE
, vect_location
,
684 "Splitting statement: %G", stmt2_info
->stmt
);
686 /* Add STMT1 as a singleton pattern definition sequence. */
687 gimple_seq
*def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info
);
688 vect_init_pattern_stmt (stmt1
, stmt2_info
, vectype
);
689 gimple_seq_add_stmt_without_update (def_seq
, stmt1
);
691 /* Build the second of the two pattern statements. */
692 tree new_lhs
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
693 gassign
*new_stmt2
= gimple_build_assign (new_lhs
, NOP_EXPR
, new_rhs
);
694 vect_set_pattern_stmt (new_stmt2
, stmt2_info
, lhs_vectype
);
696 if (dump_enabled_p ())
698 dump_printf_loc (MSG_NOTE
, vect_location
,
699 "into pattern statements: %G", stmt1
);
700 dump_printf_loc (MSG_NOTE
, vect_location
, "and: %G", new_stmt2
);
707 /* Convert UNPROM to TYPE and return the result, adding new statements
708 to STMT_INFO's pattern definition statements if no better way is
709 available. VECTYPE is the vector form of TYPE. */
712 vect_convert_input (stmt_vec_info stmt_info
, tree type
,
713 vect_unpromoted_value
*unprom
, tree vectype
)
715 /* Check for a no-op conversion. */
716 if (types_compatible_p (type
, TREE_TYPE (unprom
->op
)))
719 /* Allow the caller to create constant vect_unpromoted_values. */
720 if (TREE_CODE (unprom
->op
) == INTEGER_CST
)
721 return wide_int_to_tree (type
, wi::to_widest (unprom
->op
));
723 /* See if we can reuse an existing result. */
726 tree lhs
= gimple_get_lhs (unprom
->caster
->stmt
);
727 if (types_compatible_p (TREE_TYPE (lhs
), type
))
731 /* We need a new conversion statement. */
732 tree new_op
= vect_recog_temp_ssa_var (type
, NULL
);
733 gassign
*new_stmt
= gimple_build_assign (new_op
, NOP_EXPR
, unprom
->op
);
735 /* If the operation is the input to a vectorizable cast, try splitting
736 that cast into two, taking the required result as a mid-way point. */
739 tree lhs
= gimple_get_lhs (unprom
->caster
->stmt
);
740 if (TYPE_PRECISION (TREE_TYPE (lhs
)) > TYPE_PRECISION (type
)
741 && TYPE_PRECISION (type
) > TYPE_PRECISION (unprom
->type
)
742 && (TYPE_UNSIGNED (unprom
->type
) || !TYPE_UNSIGNED (type
))
743 && vect_split_statement (unprom
->caster
, new_op
, new_stmt
, vectype
))
747 /* If OP is an external value, see if we can insert the new statement
748 on an incoming edge. */
749 if (unprom
->dt
== vect_external_def
)
750 if (edge e
= vect_get_external_def_edge (stmt_info
->vinfo
, unprom
->op
))
752 basic_block new_bb
= gsi_insert_on_edge_immediate (e
, new_stmt
);
753 gcc_assert (!new_bb
);
757 /* As a (common) last resort, add the statement to the pattern itself. */
758 append_pattern_def_seq (stmt_info
, new_stmt
, vectype
);
762 /* Invoke vect_convert_input for N elements of UNPROM and store the
763 result in the corresponding elements of RESULT. */
766 vect_convert_inputs (stmt_vec_info stmt_info
, unsigned int n
,
767 tree
*result
, tree type
, vect_unpromoted_value
*unprom
,
770 for (unsigned int i
= 0; i
< n
; ++i
)
773 for (j
= 0; j
< i
; ++j
)
774 if (unprom
[j
].op
== unprom
[i
].op
)
777 result
[i
] = result
[j
];
779 result
[i
] = vect_convert_input (stmt_info
, type
, &unprom
[i
], vectype
);
783 /* The caller has created a (possibly empty) sequence of pattern definition
784 statements followed by a single statement PATTERN_STMT. Cast the result
785 of this final statement to TYPE. If a new statement is needed, add
786 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
787 and return the new statement, otherwise return PATTERN_STMT as-is.
788 VECITYPE is the vector form of PATTERN_STMT's result type. */
791 vect_convert_output (stmt_vec_info stmt_info
, tree type
, gimple
*pattern_stmt
,
794 tree lhs
= gimple_get_lhs (pattern_stmt
);
795 if (!types_compatible_p (type
, TREE_TYPE (lhs
)))
797 append_pattern_def_seq (stmt_info
, pattern_stmt
, vecitype
);
798 tree cast_var
= vect_recog_temp_ssa_var (type
, NULL
);
799 pattern_stmt
= gimple_build_assign (cast_var
, NOP_EXPR
, lhs
);
804 /* Return true if STMT_VINFO describes a reduction for which reassociation
805 is allowed. If STMT_INFO is part of a group, assume that it's part of
806 a reduction chain and optimistically assume that all statements
807 except the last allow reassociation. */
810 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo
)
812 return (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
813 ? STMT_VINFO_REDUC_TYPE (stmt_vinfo
) != FOLD_LEFT_REDUCTION
814 : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo
) != NULL
);
817 /* As above, but also require it to have code CODE and to be a reduction
818 in the outermost loop. When returning true, store the operands in
819 *OP0_OUT and *OP1_OUT. */
822 vect_reassociating_reduction_p (stmt_vec_info stmt_info
, tree_code code
,
823 tree
*op0_out
, tree
*op1_out
)
825 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
829 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
830 if (!assign
|| gimple_assign_rhs_code (assign
) != code
)
833 /* We don't allow changing the order of the computation in the inner-loop
834 when doing outer-loop vectorization. */
835 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
836 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
))
839 if (!vect_reassociating_reduction_p (stmt_info
))
842 *op0_out
= gimple_assign_rhs1 (assign
);
843 *op1_out
= gimple_assign_rhs2 (assign
);
847 /* Function vect_recog_dot_prod_pattern
849 Try to find the following pattern:
855 sum_0 = phi <init, sum_1>
858 S3 x_T = (TYPE1) x_t;
859 S4 y_T = (TYPE1) y_t;
861 [S6 prod = (TYPE2) prod; #optional]
862 S7 sum_1 = prod + sum_0;
864 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
865 same size of 'TYPE1' or bigger. This is a special case of a reduction
870 * STMT_VINFO: The stmt from which the pattern search begins. In the
871 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
876 * TYPE_OUT: The type of the output of this pattern.
878 * Return value: A new stmt that will be used to replace the sequence of
879 stmts that constitute the pattern. In this case it will be:
880 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
882 Note: The dot-prod idiom is a widening reduction pattern that is
883 vectorized without preserving all the intermediate results. It
884 produces only N/2 (widened) results (by summing up pairs of
885 intermediate results) rather than all N results. Therefore, we
886 cannot allow this pattern when we want to get all the results and in
887 the correct order (as is the case when this computation is in an
888 inner-loop nested in an outer-loop that us being vectorized). */
891 vect_recog_dot_prod_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
894 gimple
*last_stmt
= stmt_vinfo
->stmt
;
895 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
896 tree type
, half_type
;
897 gimple
*pattern_stmt
;
900 /* Look for the following pattern
904 DDPROD = (TYPE2) DPROD;
905 sum_1 = DDPROD + sum_0;
907 - DX is double the size of X
908 - DY is double the size of Y
909 - DX, DY, DPROD all have the same type
910 - sum is the same size of DPROD or bigger
911 - sum has been recognized as a reduction variable.
913 This is equivalent to:
914 DPROD = X w* Y; #widen mult
915 sum_1 = DPROD w+ sum_0; #widen summation
917 DPROD = X w* Y; #widen mult
918 sum_1 = DPROD + sum_0; #summation
921 /* Starting from LAST_STMT, follow the defs of its uses in search
922 of the above pattern. */
924 if (!vect_reassociating_reduction_p (stmt_vinfo
, PLUS_EXPR
,
928 type
= gimple_expr_type (last_stmt
);
930 vect_unpromoted_value unprom_mult
;
931 oprnd0
= vect_look_through_possible_promotion (vinfo
, oprnd0
, &unprom_mult
);
933 /* So far so good. Since last_stmt was detected as a (summation) reduction,
934 we know that oprnd1 is the reduction variable (defined by a loop-header
935 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
936 Left to check that oprnd0 is defined by a (widen_)mult_expr */
940 stmt_vec_info mult_vinfo
= vect_get_internal_def (vinfo
, oprnd0
);
944 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
945 inside the loop (in case we are analyzing an outer-loop). */
946 vect_unpromoted_value unprom0
[2];
947 if (!vect_widened_op_tree (mult_vinfo
, MULT_EXPR
, WIDEN_MULT_EXPR
,
948 false, 2, unprom0
, &half_type
))
951 /* If there are two widening operations, make sure they agree on
952 the sign of the extension. */
953 if (TYPE_PRECISION (unprom_mult
.type
) != TYPE_PRECISION (type
)
954 && TYPE_SIGN (unprom_mult
.type
) != TYPE_SIGN (half_type
))
957 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt
);
960 if (!vect_supportable_direct_optab_p (type
, DOT_PROD_EXPR
, half_type
,
961 type_out
, &half_vectype
))
964 /* Get the inputs in the appropriate types. */
966 vect_convert_inputs (stmt_vinfo
, 2, mult_oprnd
, half_type
,
967 unprom0
, half_vectype
);
969 var
= vect_recog_temp_ssa_var (type
, NULL
);
970 pattern_stmt
= gimple_build_assign (var
, DOT_PROD_EXPR
,
971 mult_oprnd
[0], mult_oprnd
[1], oprnd1
);
977 /* Function vect_recog_sad_pattern
979 Try to find the following Sum of Absolute Difference (SAD) pattern:
982 signed TYPE1 diff, abs_diff;
985 sum_0 = phi <init, sum_1>
988 S3 x_T = (TYPE1) x_t;
989 S4 y_T = (TYPE1) y_t;
991 S6 abs_diff = ABS_EXPR <diff>;
992 [S7 abs_diff = (TYPE2) abs_diff; #optional]
993 S8 sum_1 = abs_diff + sum_0;
995 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
996 same size of 'TYPE1' or bigger. This is a special case of a reduction
1001 * STMT_VINFO: The stmt from which the pattern search begins. In the
1002 example, when this function is called with S8, the pattern
1003 {S3,S4,S5,S6,S7,S8} will be detected.
1007 * TYPE_OUT: The type of the output of this pattern.
1009 * Return value: A new stmt that will be used to replace the sequence of
1010 stmts that constitute the pattern. In this case it will be:
1011 SAD_EXPR <x_t, y_t, sum_0>
1015 vect_recog_sad_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
1017 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1018 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
1021 /* Look for the following pattern
1025 DAD = ABS_EXPR <DDIFF>;
1026 DDPROD = (TYPE2) DPROD;
1027 sum_1 = DAD + sum_0;
1029 - DX is at least double the size of X
1030 - DY is at least double the size of Y
1031 - DX, DY, DDIFF, DAD all have the same type
1032 - sum is the same size of DAD or bigger
1033 - sum has been recognized as a reduction variable.
1035 This is equivalent to:
1036 DDIFF = X w- Y; #widen sub
1037 DAD = ABS_EXPR <DDIFF>;
1038 sum_1 = DAD w+ sum_0; #widen summation
1040 DDIFF = X w- Y; #widen sub
1041 DAD = ABS_EXPR <DDIFF>;
1042 sum_1 = DAD + sum_0; #summation
1045 /* Starting from LAST_STMT, follow the defs of its uses in search
1046 of the above pattern. */
1048 tree plus_oprnd0
, plus_oprnd1
;
1049 if (!vect_reassociating_reduction_p (stmt_vinfo
, PLUS_EXPR
,
1050 &plus_oprnd0
, &plus_oprnd1
))
1053 tree sum_type
= gimple_expr_type (last_stmt
);
1055 /* Any non-truncating sequence of conversions is OK here, since
1056 with a successful match, the result of the ABS(U) is known to fit
1057 within the nonnegative range of the result type. (It cannot be the
1058 negative of the minimum signed value due to the range of the widening
1060 vect_unpromoted_value unprom_abs
;
1061 plus_oprnd0
= vect_look_through_possible_promotion (vinfo
, plus_oprnd0
,
1064 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1065 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1066 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1067 Then check that plus_oprnd0 is defined by an abs_expr. */
1072 stmt_vec_info abs_stmt_vinfo
= vect_get_internal_def (vinfo
, plus_oprnd0
);
1073 if (!abs_stmt_vinfo
)
1076 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1077 inside the loop (in case we are analyzing an outer-loop). */
1078 gassign
*abs_stmt
= dyn_cast
<gassign
*> (abs_stmt_vinfo
->stmt
);
1080 || (gimple_assign_rhs_code (abs_stmt
) != ABS_EXPR
1081 && gimple_assign_rhs_code (abs_stmt
) != ABSU_EXPR
))
1084 tree abs_oprnd
= gimple_assign_rhs1 (abs_stmt
);
1085 tree abs_type
= TREE_TYPE (abs_oprnd
);
1086 if (TYPE_UNSIGNED (abs_type
))
1089 /* Peel off conversions from the ABS input. This can involve sign
1090 changes (e.g. from an unsigned subtraction to a signed ABS input)
1091 or signed promotion, but it can't include unsigned promotion.
1092 (Note that ABS of an unsigned promotion should have been folded
1093 away before now anyway.) */
1094 vect_unpromoted_value unprom_diff
;
1095 abs_oprnd
= vect_look_through_possible_promotion (vinfo
, abs_oprnd
,
1099 if (TYPE_PRECISION (unprom_diff
.type
) != TYPE_PRECISION (abs_type
)
1100 && TYPE_UNSIGNED (unprom_diff
.type
))
1103 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1104 stmt_vec_info diff_stmt_vinfo
= vect_get_internal_def (vinfo
, abs_oprnd
);
1105 if (!diff_stmt_vinfo
)
1108 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1109 inside the loop (in case we are analyzing an outer-loop). */
1110 vect_unpromoted_value unprom
[2];
1111 if (!vect_widened_op_tree (diff_stmt_vinfo
, MINUS_EXPR
, MINUS_EXPR
,
1112 false, 2, unprom
, &half_type
))
1115 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt
);
1118 if (!vect_supportable_direct_optab_p (sum_type
, SAD_EXPR
, half_type
,
1119 type_out
, &half_vectype
))
1122 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1124 vect_convert_inputs (stmt_vinfo
, 2, sad_oprnd
, half_type
,
1125 unprom
, half_vectype
);
1127 tree var
= vect_recog_temp_ssa_var (sum_type
, NULL
);
1128 gimple
*pattern_stmt
= gimple_build_assign (var
, SAD_EXPR
, sad_oprnd
[0],
1129 sad_oprnd
[1], plus_oprnd1
);
1131 return pattern_stmt
;
1134 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1135 so that it can be treated as though it had the form:
1139 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1140 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1141 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1142 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1143 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1145 Try to replace the pattern with:
1149 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1150 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1151 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1152 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1154 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1156 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1157 name of the pattern being matched, for dump purposes. */
1160 vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info
, tree
*type_out
,
1161 tree_code orig_code
, tree_code wide_code
,
1162 bool shift_p
, const char *name
)
1164 gimple
*last_stmt
= last_stmt_info
->stmt
;
1166 vect_unpromoted_value unprom
[2];
1168 if (!vect_widened_op_tree (last_stmt_info
, orig_code
, orig_code
,
1169 shift_p
, 2, unprom
, &half_type
))
1172 /* Pattern detected. */
1173 vect_pattern_detected (name
, last_stmt
);
1175 tree type
= gimple_expr_type (last_stmt
);
1177 if (TYPE_PRECISION (type
) != TYPE_PRECISION (half_type
) * 2
1178 || TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type
))
1179 itype
= build_nonstandard_integer_type (TYPE_PRECISION (half_type
) * 2,
1180 TYPE_UNSIGNED (half_type
));
1182 /* Check target support */
1183 tree vectype
= get_vectype_for_scalar_type (half_type
);
1184 tree vecitype
= get_vectype_for_scalar_type (itype
);
1185 enum tree_code dummy_code
;
1187 auto_vec
<tree
> dummy_vec
;
1190 || !supportable_widening_operation (wide_code
, last_stmt_info
,
1192 &dummy_code
, &dummy_code
,
1193 &dummy_int
, &dummy_vec
))
1196 *type_out
= get_vectype_for_scalar_type (type
);
1201 vect_convert_inputs (last_stmt_info
, 2, oprnd
, half_type
, unprom
, vectype
);
1203 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
1204 gimple
*pattern_stmt
= gimple_build_assign (var
, wide_code
,
1205 oprnd
[0], oprnd
[1]);
1207 return vect_convert_output (last_stmt_info
, type
, pattern_stmt
, vecitype
);
1210 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1211 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1214 vect_recog_widen_mult_pattern (stmt_vec_info last_stmt_info
, tree
*type_out
)
1216 return vect_recog_widen_op_pattern (last_stmt_info
, type_out
, MULT_EXPR
,
1217 WIDEN_MULT_EXPR
, false,
1218 "vect_recog_widen_mult_pattern");
1221 /* Function vect_recog_pow_pattern
1223 Try to find the following pattern:
1227 with POW being one of pow, powf, powi, powif and N being
1232 * STMT_VINFO: The stmt from which the pattern search begins.
1236 * TYPE_OUT: The type of the output of this pattern.
1238 * Return value: A new stmt that will be used to replace the sequence of
1239 stmts that constitute the pattern. In this case it will be:
1246 vect_recog_pow_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
1248 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1253 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
1256 switch (gimple_call_combined_fn (last_stmt
))
1266 base
= gimple_call_arg (last_stmt
, 0);
1267 exp
= gimple_call_arg (last_stmt
, 1);
1268 if (TREE_CODE (exp
) != REAL_CST
1269 && TREE_CODE (exp
) != INTEGER_CST
)
1271 if (flag_unsafe_math_optimizations
1272 && TREE_CODE (base
) == REAL_CST
1273 && !gimple_call_internal_p (last_stmt
))
1275 combined_fn log_cfn
;
1276 built_in_function exp_bfn
;
1277 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt
)))
1280 log_cfn
= CFN_BUILT_IN_LOG
;
1281 exp_bfn
= BUILT_IN_EXP
;
1284 log_cfn
= CFN_BUILT_IN_LOGF
;
1285 exp_bfn
= BUILT_IN_EXPF
;
1288 log_cfn
= CFN_BUILT_IN_LOGL
;
1289 exp_bfn
= BUILT_IN_EXPL
;
1294 tree logc
= fold_const_call (log_cfn
, TREE_TYPE (base
), base
);
1295 tree exp_decl
= builtin_decl_implicit (exp_bfn
);
1296 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1297 does that, but if C is a power of 2, we want to use
1298 exp2 (log2 (C) * x) in the non-vectorized version, but for
1299 vectorization we don't have vectorized exp2. */
1301 && TREE_CODE (logc
) == REAL_CST
1303 && lookup_attribute ("omp declare simd",
1304 DECL_ATTRIBUTES (exp_decl
)))
1306 cgraph_node
*node
= cgraph_node::get_create (exp_decl
);
1307 if (node
->simd_clones
== NULL
)
1309 if (targetm
.simd_clone
.compute_vecsize_and_simdlen
== NULL
1310 || node
->definition
)
1312 expand_simd_clones (node
);
1313 if (node
->simd_clones
== NULL
)
1316 *type_out
= get_vectype_for_scalar_type (TREE_TYPE (base
));
1319 tree def
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1320 gimple
*g
= gimple_build_assign (def
, MULT_EXPR
, exp
, logc
);
1321 append_pattern_def_seq (stmt_vinfo
, g
);
1322 tree res
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1323 g
= gimple_build_call (exp_decl
, 1, def
);
1324 gimple_call_set_lhs (g
, res
);
1332 /* We now have a pow or powi builtin function call with a constant
1335 /* Catch squaring. */
1336 if ((tree_fits_shwi_p (exp
)
1337 && tree_to_shwi (exp
) == 2)
1338 || (TREE_CODE (exp
) == REAL_CST
1339 && real_equal (&TREE_REAL_CST (exp
), &dconst2
)))
1341 if (!vect_supportable_direct_optab_p (TREE_TYPE (base
), MULT_EXPR
,
1342 TREE_TYPE (base
), type_out
))
1345 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1346 stmt
= gimple_build_assign (var
, MULT_EXPR
, base
, base
);
1350 /* Catch square root. */
1351 if (TREE_CODE (exp
) == REAL_CST
1352 && real_equal (&TREE_REAL_CST (exp
), &dconsthalf
))
1354 *type_out
= get_vectype_for_scalar_type (TREE_TYPE (base
));
1356 && direct_internal_fn_supported_p (IFN_SQRT
, *type_out
,
1357 OPTIMIZE_FOR_SPEED
))
1359 gcall
*stmt
= gimple_build_call_internal (IFN_SQRT
, 1, base
);
1360 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
1361 gimple_call_set_lhs (stmt
, var
);
1362 gimple_call_set_nothrow (stmt
, true);
1371 /* Function vect_recog_widen_sum_pattern
1373 Try to find the following pattern:
1376 TYPE x_T, sum = init;
1378 sum_0 = phi <init, sum_1>
1380 S2 x_T = (TYPE) x_t;
1381 S3 sum_1 = x_T + sum_0;
1383 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1384 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1385 a special case of a reduction computation.
1389 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1390 when this function is called with S3, the pattern {S2,S3} will be detected.
1394 * TYPE_OUT: The type of the output of this pattern.
1396 * Return value: A new stmt that will be used to replace the sequence of
1397 stmts that constitute the pattern. In this case it will be:
1398 WIDEN_SUM <x_t, sum_0>
1400 Note: The widening-sum idiom is a widening reduction pattern that is
1401 vectorized without preserving all the intermediate results. It
1402 produces only N/2 (widened) results (by summing up pairs of
1403 intermediate results) rather than all N results. Therefore, we
1404 cannot allow this pattern when we want to get all the results and in
1405 the correct order (as is the case when this computation is in an
1406 inner-loop nested in an outer-loop that us being vectorized). */
1409 vect_recog_widen_sum_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
1411 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1412 tree oprnd0
, oprnd1
;
1413 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
1415 gimple
*pattern_stmt
;
1418 /* Look for the following pattern
1421 In which DX is at least double the size of X, and sum_1 has been
1422 recognized as a reduction variable.
1425 /* Starting from LAST_STMT, follow the defs of its uses in search
1426 of the above pattern. */
1428 if (!vect_reassociating_reduction_p (stmt_vinfo
, PLUS_EXPR
,
1432 type
= gimple_expr_type (last_stmt
);
1434 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1435 we know that oprnd1 is the reduction variable (defined by a loop-header
1436 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1437 Left to check that oprnd0 is defined by a cast from type 'type' to type
1440 vect_unpromoted_value unprom0
;
1441 if (!vect_look_through_possible_promotion (vinfo
, oprnd0
, &unprom0
)
1442 || TYPE_PRECISION (unprom0
.type
) * 2 > TYPE_PRECISION (type
))
1445 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt
);
1447 if (!vect_supportable_direct_optab_p (type
, WIDEN_SUM_EXPR
, unprom0
.type
,
1451 var
= vect_recog_temp_ssa_var (type
, NULL
);
1452 pattern_stmt
= gimple_build_assign (var
, WIDEN_SUM_EXPR
, unprom0
.op
, oprnd1
);
1454 return pattern_stmt
;
1457 /* Recognize cases in which an operation is performed in one type WTYPE
1458 but could be done more efficiently in a narrower type NTYPE. For example,
1461 ATYPE a; // narrower than NTYPE
1462 BTYPE b; // narrower than NTYPE
1463 WTYPE aw = (WTYPE) a;
1464 WTYPE bw = (WTYPE) b;
1465 WTYPE res = aw + bw; // only uses of aw and bw
1467 then it would be more efficient to do:
1469 NTYPE an = (NTYPE) a;
1470 NTYPE bn = (NTYPE) b;
1471 NTYPE resn = an + bn;
1472 WTYPE res = (WTYPE) resn;
1474 Other situations include things like:
1476 ATYPE a; // NTYPE or narrower
1477 WTYPE aw = (WTYPE) a;
1480 when only "(NTYPE) res" is significant. In that case it's more efficient
1481 to truncate "b" and do the operation on NTYPE instead:
1483 NTYPE an = (NTYPE) a;
1484 NTYPE bn = (NTYPE) b; // truncation
1485 NTYPE resn = an + bn;
1486 WTYPE res = (WTYPE) resn;
1488 All users of "res" should then use "resn" instead, making the final
1489 statement dead (not marked as relevant). The final statement is still
1490 needed to maintain the type correctness of the IR.
1492 vect_determine_precisions has already determined the minimum
1493 precison of the operation and the minimum precision required
1494 by users of the result. */
1497 vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info
, tree
*type_out
)
1499 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
1503 /* See whether we have found that this operation can be done on a
1504 narrower type without changing its semantics. */
1505 unsigned int new_precision
= last_stmt_info
->operation_precision
;
1509 vec_info
*vinfo
= last_stmt_info
->vinfo
;
1510 tree lhs
= gimple_assign_lhs (last_stmt
);
1511 tree type
= TREE_TYPE (lhs
);
1512 tree_code code
= gimple_assign_rhs_code (last_stmt
);
1514 /* Keep the first operand of a COND_EXPR as-is: only the other two
1515 operands are interesting. */
1516 unsigned int first_op
= (code
== COND_EXPR
? 2 : 1);
1518 /* Check the operands. */
1519 unsigned int nops
= gimple_num_ops (last_stmt
) - first_op
;
1520 auto_vec
<vect_unpromoted_value
, 3> unprom (nops
);
1521 unprom
.quick_grow (nops
);
1522 unsigned int min_precision
= 0;
1523 bool single_use_p
= false;
1524 for (unsigned int i
= 0; i
< nops
; ++i
)
1526 tree op
= gimple_op (last_stmt
, first_op
+ i
);
1527 if (TREE_CODE (op
) == INTEGER_CST
)
1528 unprom
[i
].set_op (op
, vect_constant_def
);
1529 else if (TREE_CODE (op
) == SSA_NAME
)
1531 bool op_single_use_p
= true;
1532 if (!vect_look_through_possible_promotion (vinfo
, op
, &unprom
[i
],
1537 (1) N bits of the result are needed;
1538 (2) all inputs are widened from M<N bits; and
1539 (3) one operand OP is a single-use SSA name
1541 we can shift the M->N widening from OP to the output
1542 without changing the number or type of extensions involved.
1543 This then reduces the number of copies of STMT_INFO.
1545 If instead of (3) more than one operand is a single-use SSA name,
1546 shifting the extension to the output is even more of a win.
1550 (1) N bits of the result are needed;
1551 (2) one operand OP2 is widened from M2<N bits;
1552 (3) another operand OP1 is widened from M1<M2 bits; and
1553 (4) both OP1 and OP2 are single-use
1555 the choice is between:
1557 (a) truncating OP2 to M1, doing the operation on M1,
1558 and then widening the result to N
1560 (b) widening OP1 to M2, doing the operation on M2, and then
1561 widening the result to N
1563 Both shift the M2->N widening of the inputs to the output.
1564 (a) additionally shifts the M1->M2 widening to the output;
1565 it requires fewer copies of STMT_INFO but requires an extra
1568 Which is better will depend on the complexity and cost of
1569 STMT_INFO, which is hard to predict at this stage. However,
1570 a clear tie-breaker in favor of (b) is the fact that the
1571 truncation in (a) increases the length of the operation chain.
1573 If instead of (4) only one of OP1 or OP2 is single-use,
1574 (b) is still a win over doing the operation in N bits:
1575 it still shifts the M2->N widening on the single-use operand
1576 to the output and reduces the number of STMT_INFO copies.
1578 If neither operand is single-use then operating on fewer than
1579 N bits might lead to more extensions overall. Whether it does
1580 or not depends on global information about the vectorization
1581 region, and whether that's a good trade-off would again
1582 depend on the complexity and cost of the statements involved,
1583 as well as things like register pressure that are not normally
1584 modelled at this stage. We therefore ignore these cases
1585 and just optimize the clear single-use wins above.
1587 Thus we take the maximum precision of the unpromoted operands
1588 and record whether any operand is single-use. */
1589 if (unprom
[i
].dt
== vect_internal_def
)
1591 min_precision
= MAX (min_precision
,
1592 TYPE_PRECISION (unprom
[i
].type
));
1593 single_use_p
|= op_single_use_p
;
1598 /* Although the operation could be done in operation_precision, we have
1599 to balance that against introducing extra truncations or extensions.
1600 Calculate the minimum precision that can be handled efficiently.
1602 The loop above determined that the operation could be handled
1603 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1604 extension from the inputs to the output without introducing more
1605 instructions, and would reduce the number of instructions required
1606 for STMT_INFO itself.
1608 vect_determine_precisions has also determined that the result only
1609 needs min_output_precision bits. Truncating by a factor of N times
1610 requires a tree of N - 1 instructions, so if TYPE is N times wider
1611 than min_output_precision, doing the operation in TYPE and truncating
1612 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1615 - truncating the input to a unary operation and doing the operation
1616 in the new type requires at most N - 1 + 1 = N instructions per
1619 - doing the same for a binary operation requires at most
1620 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1622 Both unary and binary operations require fewer instructions than
1623 this if the operands were extended from a suitable truncated form.
1624 Thus there is usually nothing to lose by doing operations in
1625 min_output_precision bits, but there can be something to gain. */
1627 min_precision
= last_stmt_info
->min_output_precision
;
1629 min_precision
= MIN (min_precision
, last_stmt_info
->min_output_precision
);
1631 /* Apply the minimum efficient precision we just calculated. */
1632 if (new_precision
< min_precision
)
1633 new_precision
= min_precision
;
1634 if (new_precision
>= TYPE_PRECISION (type
))
1637 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt
);
1639 *type_out
= get_vectype_for_scalar_type (type
);
1643 /* We've found a viable pattern. Get the new type of the operation. */
1644 bool unsigned_p
= (last_stmt_info
->operation_sign
== UNSIGNED
);
1645 tree new_type
= build_nonstandard_integer_type (new_precision
, unsigned_p
);
1647 /* We specifically don't check here whether the target supports the
1648 new operation, since it might be something that a later pattern
1649 wants to rewrite anyway. If targets have a minimum element size
1650 for some optabs, we should pattern-match smaller ops to larger ops
1651 where beneficial. */
1652 tree new_vectype
= get_vectype_for_scalar_type (new_type
);
1656 if (dump_enabled_p ())
1657 dump_printf_loc (MSG_NOTE
, vect_location
, "demoting %T to %T\n",
1660 /* Calculate the rhs operands for an operation on NEW_TYPE. */
1662 for (unsigned int i
= 1; i
< first_op
; ++i
)
1663 ops
[i
- 1] = gimple_op (last_stmt
, i
);
1664 vect_convert_inputs (last_stmt_info
, nops
, &ops
[first_op
- 1],
1665 new_type
, &unprom
[0], new_vectype
);
1667 /* Use the operation to produce a result of type NEW_TYPE. */
1668 tree new_var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1669 gimple
*pattern_stmt
= gimple_build_assign (new_var
, code
,
1670 ops
[0], ops
[1], ops
[2]);
1671 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
1673 if (dump_enabled_p ())
1674 dump_printf_loc (MSG_NOTE
, vect_location
,
1675 "created pattern stmt: %G", pattern_stmt
);
1677 pattern_stmt
= vect_convert_output (last_stmt_info
, type
,
1678 pattern_stmt
, new_vectype
);
1680 return pattern_stmt
;
1683 /* Recognize the patterns:
1685 ATYPE a; // narrower than TYPE
1686 BTYPE b; // narrower than TYPE
1687 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1688 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1690 where only the bottom half of avg is used. Try to transform them into:
1692 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1693 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1697 TYPE avg = (TYPE) avg';
1699 where NTYPE is no wider than half of TYPE. Since only the bottom half
1700 of avg is used, all or part of the cast of avg' should become redundant. */
1703 vect_recog_average_pattern (stmt_vec_info last_stmt_info
, tree
*type_out
)
1705 /* Check for a shift right by one bit. */
1706 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
1707 vec_info
*vinfo
= last_stmt_info
->vinfo
;
1709 || gimple_assign_rhs_code (last_stmt
) != RSHIFT_EXPR
1710 || !integer_onep (gimple_assign_rhs2 (last_stmt
)))
1713 /* Check that the shift result is wider than the users of the
1714 result need (i.e. that narrowing would be a natural choice). */
1715 tree lhs
= gimple_assign_lhs (last_stmt
);
1716 tree type
= TREE_TYPE (lhs
);
1717 unsigned int target_precision
1718 = vect_element_precision (last_stmt_info
->min_output_precision
);
1719 if (!INTEGRAL_TYPE_P (type
) || target_precision
>= TYPE_PRECISION (type
))
1722 /* Get the definition of the shift input. */
1723 tree rshift_rhs
= gimple_assign_rhs1 (last_stmt
);
1724 stmt_vec_info plus_stmt_info
= vect_get_internal_def (vinfo
, rshift_rhs
);
1725 if (!plus_stmt_info
)
1728 /* Check whether the shift input can be seen as a tree of additions on
1729 2 or 3 widened inputs.
1731 Note that the pattern should be a win even if the result of one or
1732 more additions is reused elsewhere: if the pattern matches, we'd be
1733 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
1734 internal_fn ifn
= IFN_AVG_FLOOR
;
1735 vect_unpromoted_value unprom
[3];
1737 unsigned int nops
= vect_widened_op_tree (plus_stmt_info
, PLUS_EXPR
,
1738 PLUS_EXPR
, false, 3,
1744 /* Check that one operand is 1. */
1746 for (i
= 0; i
< 3; ++i
)
1747 if (integer_onep (unprom
[i
].op
))
1751 /* Throw away the 1 operand and keep the other two. */
1753 unprom
[i
] = unprom
[2];
1757 vect_pattern_detected ("vect_recog_average_pattern", last_stmt
);
1761 (a) the operation can be viewed as:
1763 TYPE widened0 = (TYPE) UNPROM[0];
1764 TYPE widened1 = (TYPE) UNPROM[1];
1765 TYPE tmp1 = widened0 + widened1 {+ 1};
1766 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
1768 (b) the first two statements are equivalent to:
1770 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
1771 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
1773 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
1776 (d) all the operations can be performed correctly at twice the width of
1777 NEW_TYPE, due to the nature of the average operation; and
1779 (e) users of the result of the right shift need only TARGET_PRECISION
1780 bits, where TARGET_PRECISION is no more than half of TYPE's
1783 Under these circumstances, the only situation in which NEW_TYPE
1784 could be narrower than TARGET_PRECISION is if widened0, widened1
1785 and an addition result are all used more than once. Thus we can
1786 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
1787 as "free", whereas widening the result of the average instruction
1788 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
1789 therefore better not to go narrower than TARGET_PRECISION. */
1790 if (TYPE_PRECISION (new_type
) < target_precision
)
1791 new_type
= build_nonstandard_integer_type (target_precision
,
1792 TYPE_UNSIGNED (new_type
));
1794 /* Check for target support. */
1795 tree new_vectype
= get_vectype_for_scalar_type (new_type
);
1797 || !direct_internal_fn_supported_p (ifn
, new_vectype
,
1798 OPTIMIZE_FOR_SPEED
))
1801 /* The IR requires a valid vector type for the cast result, even though
1802 it's likely to be discarded. */
1803 *type_out
= get_vectype_for_scalar_type (type
);
1807 /* Generate the IFN_AVG* call. */
1808 tree new_var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1810 vect_convert_inputs (last_stmt_info
, 2, new_ops
, new_type
,
1811 unprom
, new_vectype
);
1812 gcall
*average_stmt
= gimple_build_call_internal (ifn
, 2, new_ops
[0],
1814 gimple_call_set_lhs (average_stmt
, new_var
);
1815 gimple_set_location (average_stmt
, gimple_location (last_stmt
));
1817 if (dump_enabled_p ())
1818 dump_printf_loc (MSG_NOTE
, vect_location
,
1819 "created pattern stmt: %G", average_stmt
);
1821 return vect_convert_output (last_stmt_info
, type
, average_stmt
, new_vectype
);
1824 /* Recognize cases in which the input to a cast is wider than its
1825 output, and the input is fed by a widening operation. Fold this
1826 by removing the unnecessary intermediate widening. E.g.:
1829 unsigned int b = (unsigned int) a;
1830 unsigned short c = (unsigned short) b;
1834 unsigned short c = (unsigned short) a;
1836 Although this is rare in input IR, it is an expected side-effect
1837 of the over-widening pattern above.
1839 This is beneficial also for integer-to-float conversions, if the
1840 widened integer has more bits than the float, and if the unwidened
1844 vect_recog_cast_forwprop_pattern (stmt_vec_info last_stmt_info
, tree
*type_out
)
1846 /* Check for a cast, including an integer-to-float conversion. */
1847 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
1850 tree_code code
= gimple_assign_rhs_code (last_stmt
);
1851 if (!CONVERT_EXPR_CODE_P (code
) && code
!= FLOAT_EXPR
)
1854 /* Make sure that the rhs is a scalar with a natural bitsize. */
1855 tree lhs
= gimple_assign_lhs (last_stmt
);
1858 tree lhs_type
= TREE_TYPE (lhs
);
1859 scalar_mode lhs_mode
;
1860 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type
)
1861 || !is_a
<scalar_mode
> (TYPE_MODE (lhs_type
), &lhs_mode
))
1864 /* Check for a narrowing operation (from a vector point of view). */
1865 tree rhs
= gimple_assign_rhs1 (last_stmt
);
1866 tree rhs_type
= TREE_TYPE (rhs
);
1867 if (!INTEGRAL_TYPE_P (rhs_type
)
1868 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type
)
1869 || TYPE_PRECISION (rhs_type
) <= GET_MODE_BITSIZE (lhs_mode
))
1872 /* Try to find an unpromoted input. */
1873 vec_info
*vinfo
= last_stmt_info
->vinfo
;
1874 vect_unpromoted_value unprom
;
1875 if (!vect_look_through_possible_promotion (vinfo
, rhs
, &unprom
)
1876 || TYPE_PRECISION (unprom
.type
) >= TYPE_PRECISION (rhs_type
))
1879 /* If the bits above RHS_TYPE matter, make sure that they're the
1880 same when extending from UNPROM as they are when extending from RHS. */
1881 if (!INTEGRAL_TYPE_P (lhs_type
)
1882 && TYPE_SIGN (rhs_type
) != TYPE_SIGN (unprom
.type
))
1885 /* We can get the same result by casting UNPROM directly, to avoid
1886 the unnecessary widening and narrowing. */
1887 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt
);
1889 *type_out
= get_vectype_for_scalar_type (lhs_type
);
1893 tree new_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
1894 gimple
*pattern_stmt
= gimple_build_assign (new_var
, code
, unprom
.op
);
1895 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
1897 return pattern_stmt
;
1900 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
1901 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
1904 vect_recog_widen_shift_pattern (stmt_vec_info last_stmt_info
, tree
*type_out
)
1906 return vect_recog_widen_op_pattern (last_stmt_info
, type_out
, LSHIFT_EXPR
,
1907 WIDEN_LSHIFT_EXPR
, true,
1908 "vect_recog_widen_shift_pattern");
1911 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1915 S0 a_t = b_t r<< c_t;
1919 * STMT_VINFO: The stmt from which the pattern search begins,
1920 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1924 S2 e_t = d_t & (B - 1);
1925 S3 f_t = b_t << c_t;
1926 S4 g_t = b_t >> e_t;
1929 where B is element bitsize of type.
1933 * TYPE_OUT: The type of the output of this pattern.
1935 * Return value: A new stmt that will be used to replace the rotate
1939 vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
1941 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1942 tree oprnd0
, oprnd1
, lhs
, var
, var1
, var2
, vectype
, type
, stype
, def
, def2
;
1943 gimple
*pattern_stmt
, *def_stmt
;
1944 enum tree_code rhs_code
;
1945 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
1946 enum vect_def_type dt
;
1947 optab optab1
, optab2
;
1948 edge ext_def
= NULL
;
1950 if (!is_gimple_assign (last_stmt
))
1953 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1963 lhs
= gimple_assign_lhs (last_stmt
);
1964 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1965 type
= TREE_TYPE (oprnd0
);
1966 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1967 if (TREE_CODE (oprnd0
) != SSA_NAME
1968 || TYPE_PRECISION (TREE_TYPE (lhs
)) != TYPE_PRECISION (type
)
1969 || !INTEGRAL_TYPE_P (type
)
1970 || !TYPE_UNSIGNED (type
))
1973 stmt_vec_info def_stmt_info
;
1974 if (!vect_is_simple_use (oprnd1
, vinfo
, &dt
, &def_stmt_info
, &def_stmt
))
1977 if (dt
!= vect_internal_def
1978 && dt
!= vect_constant_def
1979 && dt
!= vect_external_def
)
1982 vectype
= get_vectype_for_scalar_type (type
);
1983 if (vectype
== NULL_TREE
)
1986 /* If vector/vector or vector/scalar rotate is supported by the target,
1987 don't do anything here. */
1988 optab1
= optab_for_tree_code (rhs_code
, vectype
, optab_vector
);
1990 && optab_handler (optab1
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1993 if (is_a
<bb_vec_info
> (vinfo
) || dt
!= vect_internal_def
)
1995 optab2
= optab_for_tree_code (rhs_code
, vectype
, optab_scalar
);
1997 && optab_handler (optab2
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
2001 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2002 don't do anything here either. */
2003 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_vector
);
2004 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_vector
);
2006 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
2008 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
2010 if (! is_a
<bb_vec_info
> (vinfo
) && dt
== vect_internal_def
)
2012 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_scalar
);
2013 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_scalar
);
2015 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
2017 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
2021 *type_out
= vectype
;
2023 if (dt
== vect_external_def
2024 && TREE_CODE (oprnd1
) == SSA_NAME
)
2025 ext_def
= vect_get_external_def_edge (vinfo
, oprnd1
);
2028 scalar_int_mode mode
= SCALAR_INT_TYPE_MODE (type
);
2029 if (TREE_CODE (oprnd1
) == INTEGER_CST
2030 || TYPE_MODE (TREE_TYPE (oprnd1
)) == mode
)
2032 else if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
2034 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
2035 if (TYPE_MODE (TREE_TYPE (rhs1
)) == mode
2036 && TYPE_PRECISION (TREE_TYPE (rhs1
))
2037 == TYPE_PRECISION (type
))
2041 if (def
== NULL_TREE
)
2043 def
= vect_recog_temp_ssa_var (type
, NULL
);
2044 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
2048 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
2049 gcc_assert (!new_bb
);
2052 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2054 stype
= TREE_TYPE (def
);
2055 scalar_int_mode smode
= SCALAR_INT_TYPE_MODE (stype
);
2057 if (TREE_CODE (def
) == INTEGER_CST
)
2059 if (!tree_fits_uhwi_p (def
)
2060 || tree_to_uhwi (def
) >= GET_MODE_PRECISION (mode
)
2061 || integer_zerop (def
))
2063 def2
= build_int_cst (stype
,
2064 GET_MODE_PRECISION (mode
) - tree_to_uhwi (def
));
2068 tree vecstype
= get_vectype_for_scalar_type (stype
);
2070 if (vecstype
== NULL_TREE
)
2072 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
2073 def_stmt
= gimple_build_assign (def2
, NEGATE_EXPR
, def
);
2077 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
2078 gcc_assert (!new_bb
);
2081 append_pattern_def_seq (stmt_vinfo
, def_stmt
, vecstype
);
2083 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
2084 tree mask
= build_int_cst (stype
, GET_MODE_PRECISION (smode
) - 1);
2085 def_stmt
= gimple_build_assign (def2
, BIT_AND_EXPR
,
2086 gimple_assign_lhs (def_stmt
), mask
);
2090 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
2091 gcc_assert (!new_bb
);
2094 append_pattern_def_seq (stmt_vinfo
, def_stmt
, vecstype
);
2097 var1
= vect_recog_temp_ssa_var (type
, NULL
);
2098 def_stmt
= gimple_build_assign (var1
, rhs_code
== LROTATE_EXPR
2099 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
2101 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2103 var2
= vect_recog_temp_ssa_var (type
, NULL
);
2104 def_stmt
= gimple_build_assign (var2
, rhs_code
== LROTATE_EXPR
2105 ? RSHIFT_EXPR
: LSHIFT_EXPR
,
2107 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2109 /* Pattern detected. */
2110 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt
);
2112 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2113 var
= vect_recog_temp_ssa_var (type
, NULL
);
2114 pattern_stmt
= gimple_build_assign (var
, BIT_IOR_EXPR
, var1
, var2
);
2116 return pattern_stmt
;
2119 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2127 S3 res_T = b_T op a_t;
2129 where type 'TYPE' is a type with different size than 'type',
2130 and op is <<, >> or rotate.
2135 TYPE b_T, c_T, res_T;
2138 S1 a_t = (type) c_T;
2140 S3 res_T = b_T op a_t;
2144 * STMT_VINFO: The stmt from which the pattern search begins,
2145 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2146 with a shift/rotate which has same type on both operands, in the
2147 second case just b_T op c_T, in the first case with added cast
2148 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2152 * TYPE_OUT: The type of the output of this pattern.
2154 * Return value: A new stmt that will be used to replace the shift/rotate
2158 vect_recog_vector_vector_shift_pattern (stmt_vec_info stmt_vinfo
,
2161 gimple
*last_stmt
= stmt_vinfo
->stmt
;
2162 tree oprnd0
, oprnd1
, lhs
, var
;
2163 gimple
*pattern_stmt
;
2164 enum tree_code rhs_code
;
2165 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
2167 if (!is_gimple_assign (last_stmt
))
2170 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2182 lhs
= gimple_assign_lhs (last_stmt
);
2183 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2184 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2185 if (TREE_CODE (oprnd0
) != SSA_NAME
2186 || TREE_CODE (oprnd1
) != SSA_NAME
2187 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
2188 || !type_has_mode_precision_p (TREE_TYPE (oprnd1
))
2189 || TYPE_PRECISION (TREE_TYPE (lhs
))
2190 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2193 stmt_vec_info def_vinfo
= vect_get_internal_def (vinfo
, oprnd1
);
2197 *type_out
= get_vectype_for_scalar_type (TREE_TYPE (oprnd0
));
2198 if (*type_out
== NULL_TREE
)
2201 tree def
= NULL_TREE
;
2202 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_vinfo
->stmt
);
2203 if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
2205 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
2206 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
2207 && TYPE_PRECISION (TREE_TYPE (rhs1
))
2208 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2210 if (TYPE_PRECISION (TREE_TYPE (oprnd1
))
2211 >= TYPE_PRECISION (TREE_TYPE (rhs1
)))
2216 = build_low_bits_mask (TREE_TYPE (rhs1
),
2217 TYPE_PRECISION (TREE_TYPE (oprnd1
)));
2218 def
= vect_recog_temp_ssa_var (TREE_TYPE (rhs1
), NULL
);
2219 def_stmt
= gimple_build_assign (def
, BIT_AND_EXPR
, rhs1
, mask
);
2220 tree vecstype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
2221 append_pattern_def_seq (stmt_vinfo
, def_stmt
, vecstype
);
2226 if (def
== NULL_TREE
)
2228 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2229 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
2230 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2233 /* Pattern detected. */
2234 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt
);
2236 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2237 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2238 pattern_stmt
= gimple_build_assign (var
, rhs_code
, oprnd0
, def
);
2240 return pattern_stmt
;
2243 /* Return true iff the target has a vector optab implementing the operation
2244 CODE on type VECTYPE. */
2247 target_has_vecop_for_code (tree_code code
, tree vectype
)
2249 optab voptab
= optab_for_tree_code (code
, vectype
, optab_vector
);
2251 && optab_handler (voptab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
;
2254 /* Verify that the target has optabs of VECTYPE to perform all the steps
2255 needed by the multiplication-by-immediate synthesis algorithm described by
2256 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2257 present. Return true iff the target supports all the steps. */
2260 target_supports_mult_synth_alg (struct algorithm
*alg
, mult_variant var
,
2261 tree vectype
, bool synth_shift_p
)
2263 if (alg
->op
[0] != alg_zero
&& alg
->op
[0] != alg_m
)
2266 bool supports_vminus
= target_has_vecop_for_code (MINUS_EXPR
, vectype
);
2267 bool supports_vplus
= target_has_vecop_for_code (PLUS_EXPR
, vectype
);
2269 if (var
== negate_variant
2270 && !target_has_vecop_for_code (NEGATE_EXPR
, vectype
))
2273 /* If we must synthesize shifts with additions make sure that vector
2274 addition is available. */
2275 if ((var
== add_variant
|| synth_shift_p
) && !supports_vplus
)
2278 for (int i
= 1; i
< alg
->ops
; i
++)
2286 case alg_add_factor
:
2287 if (!supports_vplus
)
2292 case alg_sub_factor
:
2293 if (!supports_vminus
)
2299 case alg_impossible
:
2309 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2310 putting the final result in DEST. Append all statements but the last into
2311 VINFO. Return the last statement. */
2314 synth_lshift_by_additions (tree dest
, tree op
, HOST_WIDE_INT amnt
,
2315 stmt_vec_info vinfo
)
2318 tree itype
= TREE_TYPE (op
);
2320 gcc_assert (amnt
>= 0);
2321 for (i
= 0; i
< amnt
; i
++)
2323 tree tmp_var
= (i
< amnt
- 1) ? vect_recog_temp_ssa_var (itype
, NULL
)
2326 = gimple_build_assign (tmp_var
, PLUS_EXPR
, prev_res
, prev_res
);
2329 append_pattern_def_seq (vinfo
, stmt
);
2337 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2338 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2339 the process if necessary. Append the resulting assignment statements
2340 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2341 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2342 left shifts using additions. */
2345 apply_binop_and_append_stmt (tree_code code
, tree op1
, tree op2
,
2346 stmt_vec_info stmt_vinfo
, bool synth_shift_p
)
2348 if (integer_zerop (op2
)
2349 && (code
== LSHIFT_EXPR
2350 || code
== PLUS_EXPR
))
2352 gcc_assert (TREE_CODE (op1
) == SSA_NAME
);
2357 tree itype
= TREE_TYPE (op1
);
2358 tree tmp_var
= vect_recog_temp_ssa_var (itype
, NULL
);
2360 if (code
== LSHIFT_EXPR
2363 stmt
= synth_lshift_by_additions (tmp_var
, op1
, TREE_INT_CST_LOW (op2
),
2365 append_pattern_def_seq (stmt_vinfo
, stmt
);
2369 stmt
= gimple_build_assign (tmp_var
, code
, op1
, op2
);
2370 append_pattern_def_seq (stmt_vinfo
, stmt
);
2374 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2375 and simple arithmetic operations to be vectorized. Record the statements
2376 produced in STMT_VINFO and return the last statement in the sequence or
2377 NULL if it's not possible to synthesize such a multiplication.
2378 This function mirrors the behavior of expand_mult_const in expmed.c but
2379 works on tree-ssa form. */
2382 vect_synth_mult_by_constant (tree op
, tree val
,
2383 stmt_vec_info stmt_vinfo
)
2385 tree itype
= TREE_TYPE (op
);
2386 machine_mode mode
= TYPE_MODE (itype
);
2387 struct algorithm alg
;
2388 mult_variant variant
;
2389 if (!tree_fits_shwi_p (val
))
2392 /* Multiplication synthesis by shifts, adds and subs can introduce
2393 signed overflow where the original operation didn't. Perform the
2394 operations on an unsigned type and cast back to avoid this.
2395 In the future we may want to relax this for synthesis algorithms
2396 that we can prove do not cause unexpected overflow. */
2397 bool cast_to_unsigned_p
= !TYPE_OVERFLOW_WRAPS (itype
);
2399 tree multtype
= cast_to_unsigned_p
? unsigned_type_for (itype
) : itype
;
2401 /* Targets that don't support vector shifts but support vector additions
2402 can synthesize shifts that way. */
2403 bool synth_shift_p
= !vect_supportable_shift (LSHIFT_EXPR
, multtype
);
2405 HOST_WIDE_INT hwval
= tree_to_shwi (val
);
2406 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2407 The vectorizer's benefit analysis will decide whether it's beneficial
2409 bool possible
= choose_mult_variant (mode
, hwval
, &alg
,
2410 &variant
, MAX_COST
);
2414 tree vectype
= get_vectype_for_scalar_type (multtype
);
2417 || !target_supports_mult_synth_alg (&alg
, variant
,
2418 vectype
, synth_shift_p
))
2423 /* Clear out the sequence of statements so we can populate it below. */
2424 gimple
*stmt
= NULL
;
2426 if (cast_to_unsigned_p
)
2428 tree tmp_op
= vect_recog_temp_ssa_var (multtype
, NULL
);
2429 stmt
= gimple_build_assign (tmp_op
, CONVERT_EXPR
, op
);
2430 append_pattern_def_seq (stmt_vinfo
, stmt
);
2434 if (alg
.op
[0] == alg_zero
)
2435 accumulator
= build_int_cst (multtype
, 0);
2439 bool needs_fixup
= (variant
== negate_variant
)
2440 || (variant
== add_variant
);
2442 for (int i
= 1; i
< alg
.ops
; i
++)
2444 tree shft_log
= build_int_cst (multtype
, alg
.log
[i
]);
2445 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
2446 tree tmp_var
= NULL_TREE
;
2453 = synth_lshift_by_additions (accum_tmp
, accumulator
, alg
.log
[i
],
2456 stmt
= gimple_build_assign (accum_tmp
, LSHIFT_EXPR
, accumulator
,
2461 = apply_binop_and_append_stmt (LSHIFT_EXPR
, op
, shft_log
,
2462 stmt_vinfo
, synth_shift_p
);
2463 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
,
2467 tmp_var
= apply_binop_and_append_stmt (LSHIFT_EXPR
, op
,
2468 shft_log
, stmt_vinfo
,
2470 /* In some algorithms the first step involves zeroing the
2471 accumulator. If subtracting from such an accumulator
2472 just emit the negation directly. */
2473 if (integer_zerop (accumulator
))
2474 stmt
= gimple_build_assign (accum_tmp
, NEGATE_EXPR
, tmp_var
);
2476 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, accumulator
,
2481 = apply_binop_and_append_stmt (LSHIFT_EXPR
, accumulator
, shft_log
,
2482 stmt_vinfo
, synth_shift_p
);
2483 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, tmp_var
, op
);
2487 = apply_binop_and_append_stmt (LSHIFT_EXPR
, accumulator
, shft_log
,
2488 stmt_vinfo
, synth_shift_p
);
2489 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, tmp_var
, op
);
2491 case alg_add_factor
:
2493 = apply_binop_and_append_stmt (LSHIFT_EXPR
, accumulator
, shft_log
,
2494 stmt_vinfo
, synth_shift_p
);
2495 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
,
2498 case alg_sub_factor
:
2500 = apply_binop_and_append_stmt (LSHIFT_EXPR
, accumulator
, shft_log
,
2501 stmt_vinfo
, synth_shift_p
);
2502 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, tmp_var
,
2508 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2509 but rather return it directly. */
2511 if ((i
< alg
.ops
- 1) || needs_fixup
|| cast_to_unsigned_p
)
2512 append_pattern_def_seq (stmt_vinfo
, stmt
);
2513 accumulator
= accum_tmp
;
2515 if (variant
== negate_variant
)
2517 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
2518 stmt
= gimple_build_assign (accum_tmp
, NEGATE_EXPR
, accumulator
);
2519 accumulator
= accum_tmp
;
2520 if (cast_to_unsigned_p
)
2521 append_pattern_def_seq (stmt_vinfo
, stmt
);
2523 else if (variant
== add_variant
)
2525 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
2526 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
, op
);
2527 accumulator
= accum_tmp
;
2528 if (cast_to_unsigned_p
)
2529 append_pattern_def_seq (stmt_vinfo
, stmt
);
2531 /* Move back to a signed if needed. */
2532 if (cast_to_unsigned_p
)
2534 tree accum_tmp
= vect_recog_temp_ssa_var (itype
, NULL
);
2535 stmt
= gimple_build_assign (accum_tmp
, CONVERT_EXPR
, accumulator
);
2541 /* Detect multiplication by constant and convert it into a sequence of
2542 shifts and additions, subtractions, negations. We reuse the
2543 choose_mult_variant algorithms from expmed.c
2547 STMT_VINFO: The stmt from which the pattern search begins,
2552 * TYPE_OUT: The type of the output of this pattern.
2554 * Return value: A new stmt that will be used to replace
2555 the multiplication. */
2558 vect_recog_mult_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
2560 gimple
*last_stmt
= stmt_vinfo
->stmt
;
2561 tree oprnd0
, oprnd1
, vectype
, itype
;
2562 gimple
*pattern_stmt
;
2564 if (!is_gimple_assign (last_stmt
))
2567 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
2570 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2571 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2572 itype
= TREE_TYPE (oprnd0
);
2574 if (TREE_CODE (oprnd0
) != SSA_NAME
2575 || TREE_CODE (oprnd1
) != INTEGER_CST
2576 || !INTEGRAL_TYPE_P (itype
)
2577 || !type_has_mode_precision_p (itype
))
2580 vectype
= get_vectype_for_scalar_type (itype
);
2581 if (vectype
== NULL_TREE
)
2584 /* If the target can handle vectorized multiplication natively,
2585 don't attempt to optimize this. */
2586 optab mul_optab
= optab_for_tree_code (MULT_EXPR
, vectype
, optab_default
);
2587 if (mul_optab
!= unknown_optab
)
2589 machine_mode vec_mode
= TYPE_MODE (vectype
);
2590 int icode
= (int) optab_handler (mul_optab
, vec_mode
);
2591 if (icode
!= CODE_FOR_nothing
)
2595 pattern_stmt
= vect_synth_mult_by_constant (oprnd0
, oprnd1
, stmt_vinfo
);
2599 /* Pattern detected. */
2600 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt
);
2602 *type_out
= vectype
;
2604 return pattern_stmt
;
2607 /* Detect a signed division by a constant that wouldn't be
2608 otherwise vectorized:
2614 where type 'type' is an integral type and N is a constant.
2616 Similarly handle modulo by a constant:
2622 * STMT_VINFO: The stmt from which the pattern search begins,
2623 i.e. the division stmt. S1 is replaced by if N is a power
2624 of two constant and type is signed:
2625 S3 y_t = b_t < 0 ? N - 1 : 0;
2627 S1' a_t = x_t >> log2 (N);
2629 S4 is replaced if N is a power of two constant and
2630 type is signed by (where *_T temporaries have unsigned type):
2631 S9 y_T = b_t < 0 ? -1U : 0U;
2632 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2633 S7 z_t = (type) z_T;
2635 S5 x_t = w_t & (N - 1);
2636 S4' a_t = x_t - z_t;
2640 * TYPE_OUT: The type of the output of this pattern.
2642 * Return value: A new stmt that will be used to replace the division
2643 S1 or modulo S4 stmt. */
2646 vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
2648 gimple
*last_stmt
= stmt_vinfo
->stmt
;
2649 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
2650 gimple
*pattern_stmt
, *def_stmt
;
2651 enum tree_code rhs_code
;
2654 int dummy_int
, prec
;
2656 if (!is_gimple_assign (last_stmt
))
2659 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2662 case TRUNC_DIV_EXPR
:
2663 case EXACT_DIV_EXPR
:
2664 case TRUNC_MOD_EXPR
:
2670 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2671 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2672 itype
= TREE_TYPE (oprnd0
);
2673 if (TREE_CODE (oprnd0
) != SSA_NAME
2674 || TREE_CODE (oprnd1
) != INTEGER_CST
2675 || TREE_CODE (itype
) != INTEGER_TYPE
2676 || !type_has_mode_precision_p (itype
))
2679 scalar_int_mode itype_mode
= SCALAR_INT_TYPE_MODE (itype
);
2680 vectype
= get_vectype_for_scalar_type (itype
);
2681 if (vectype
== NULL_TREE
)
2684 if (optimize_bb_for_size_p (gimple_bb (last_stmt
)))
2686 /* If the target can handle vectorized division or modulo natively,
2687 don't attempt to optimize this, since native division is likely
2688 to give smaller code. */
2689 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
2690 if (optab
!= unknown_optab
)
2692 machine_mode vec_mode
= TYPE_MODE (vectype
);
2693 int icode
= (int) optab_handler (optab
, vec_mode
);
2694 if (icode
!= CODE_FOR_nothing
)
2699 prec
= TYPE_PRECISION (itype
);
2700 if (integer_pow2p (oprnd1
))
2702 if (TYPE_UNSIGNED (itype
) || tree_int_cst_sgn (oprnd1
) != 1)
2705 /* Pattern detected. */
2706 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt
);
2708 cond
= build2 (LT_EXPR
, boolean_type_node
, oprnd0
,
2709 build_int_cst (itype
, 0));
2710 if (rhs_code
== TRUNC_DIV_EXPR
2711 || rhs_code
== EXACT_DIV_EXPR
)
2713 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
2716 = gimple_build_assign (var
, COND_EXPR
, cond
,
2717 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2718 build_int_cst (itype
, 1)),
2719 build_int_cst (itype
, 0));
2720 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2721 var
= vect_recog_temp_ssa_var (itype
, NULL
);
2723 = gimple_build_assign (var
, PLUS_EXPR
, oprnd0
,
2724 gimple_assign_lhs (def_stmt
));
2725 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2727 shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
2729 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2730 RSHIFT_EXPR
, var
, shift
);
2735 if (compare_tree_int (oprnd1
, 2) == 0)
2737 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2738 def_stmt
= gimple_build_assign (signmask
, COND_EXPR
, cond
,
2739 build_int_cst (itype
, 1),
2740 build_int_cst (itype
, 0));
2741 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2746 = build_nonstandard_integer_type (prec
, 1);
2747 tree vecutype
= get_vectype_for_scalar_type (utype
);
2749 = build_int_cst (utype
, GET_MODE_BITSIZE (itype_mode
)
2750 - tree_log2 (oprnd1
));
2751 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
2753 def_stmt
= gimple_build_assign (var
, COND_EXPR
, cond
,
2754 build_int_cst (utype
, -1),
2755 build_int_cst (utype
, 0));
2756 append_pattern_def_seq (stmt_vinfo
, def_stmt
, vecutype
);
2757 var
= vect_recog_temp_ssa_var (utype
, NULL
);
2758 def_stmt
= gimple_build_assign (var
, RSHIFT_EXPR
,
2759 gimple_assign_lhs (def_stmt
),
2761 append_pattern_def_seq (stmt_vinfo
, def_stmt
, vecutype
);
2762 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2764 = gimple_build_assign (signmask
, NOP_EXPR
, var
);
2765 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2768 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2769 PLUS_EXPR
, oprnd0
, signmask
);
2770 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2772 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2773 BIT_AND_EXPR
, gimple_assign_lhs (def_stmt
),
2774 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2775 build_int_cst (itype
, 1)));
2776 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2779 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2780 MINUS_EXPR
, gimple_assign_lhs (def_stmt
),
2784 *type_out
= vectype
;
2785 return pattern_stmt
;
2788 if (prec
> HOST_BITS_PER_WIDE_INT
2789 || integer_zerop (oprnd1
))
2792 if (!can_mult_highpart_p (TYPE_MODE (vectype
), TYPE_UNSIGNED (itype
)))
2795 if (TYPE_UNSIGNED (itype
))
2797 unsigned HOST_WIDE_INT mh
, ml
;
2798 int pre_shift
, post_shift
;
2799 unsigned HOST_WIDE_INT d
= (TREE_INT_CST_LOW (oprnd1
)
2800 & GET_MODE_MASK (itype_mode
));
2801 tree t1
, t2
, t3
, t4
;
2803 if (d
>= (HOST_WIDE_INT_1U
<< (prec
- 1)))
2804 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2807 /* Find a suitable multiplier and right shift count
2808 instead of multiplying with D. */
2809 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
2811 /* If the suggested multiplier is more than SIZE bits, we can do better
2812 for even divisors, using an initial right shift. */
2813 if (mh
!= 0 && (d
& 1) == 0)
2815 pre_shift
= ctz_or_zero (d
);
2816 mh
= choose_multiplier (d
>> pre_shift
, prec
, prec
- pre_shift
,
2817 &ml
, &post_shift
, &dummy_int
);
2825 if (post_shift
- 1 >= prec
)
2828 /* t1 = oprnd0 h* ml;
2832 q = t4 >> (post_shift - 1); */
2833 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2834 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2835 build_int_cst (itype
, ml
));
2836 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2838 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2840 = gimple_build_assign (t2
, MINUS_EXPR
, oprnd0
, t1
);
2841 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2843 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2845 = gimple_build_assign (t3
, RSHIFT_EXPR
, t2
, integer_one_node
);
2846 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2848 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2850 = gimple_build_assign (t4
, PLUS_EXPR
, t1
, t3
);
2852 if (post_shift
!= 1)
2854 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2856 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2858 = gimple_build_assign (q
, RSHIFT_EXPR
, t4
,
2859 build_int_cst (itype
, post_shift
- 1));
2864 pattern_stmt
= def_stmt
;
2869 if (pre_shift
>= prec
|| post_shift
>= prec
)
2872 /* t1 = oprnd0 >> pre_shift;
2874 q = t2 >> post_shift; */
2877 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2879 = gimple_build_assign (t1
, RSHIFT_EXPR
, oprnd0
,
2880 build_int_cst (NULL
, pre_shift
));
2881 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2886 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2887 def_stmt
= gimple_build_assign (t2
, MULT_HIGHPART_EXPR
, t1
,
2888 build_int_cst (itype
, ml
));
2892 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2894 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2896 = gimple_build_assign (q
, RSHIFT_EXPR
, t2
,
2897 build_int_cst (itype
, post_shift
));
2902 pattern_stmt
= def_stmt
;
2907 unsigned HOST_WIDE_INT ml
;
2909 HOST_WIDE_INT d
= TREE_INT_CST_LOW (oprnd1
);
2910 unsigned HOST_WIDE_INT abs_d
;
2912 tree t1
, t2
, t3
, t4
;
2914 /* Give up for -1. */
2918 /* Since d might be INT_MIN, we have to cast to
2919 unsigned HOST_WIDE_INT before negating to avoid
2920 undefined signed overflow. */
2922 ? (unsigned HOST_WIDE_INT
) d
2923 : - (unsigned HOST_WIDE_INT
) d
);
2925 /* n rem d = n rem -d */
2926 if (rhs_code
== TRUNC_MOD_EXPR
&& d
< 0)
2929 oprnd1
= build_int_cst (itype
, abs_d
);
2931 else if (HOST_BITS_PER_WIDE_INT
>= prec
2932 && abs_d
== HOST_WIDE_INT_1U
<< (prec
- 1))
2933 /* This case is not handled correctly below. */
2936 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
, &post_shift
, &dummy_int
);
2937 if (ml
>= HOST_WIDE_INT_1U
<< (prec
- 1))
2940 ml
|= HOST_WIDE_INT_M1U
<< (prec
- 1);
2942 if (post_shift
>= prec
)
2945 /* t1 = oprnd0 h* ml; */
2946 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2947 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2948 build_int_cst (itype
, ml
));
2952 /* t2 = t1 + oprnd0; */
2953 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2954 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2955 def_stmt
= gimple_build_assign (t2
, PLUS_EXPR
, t1
, oprnd0
);
2962 /* t3 = t2 >> post_shift; */
2963 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2964 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2965 def_stmt
= gimple_build_assign (t3
, RSHIFT_EXPR
, t2
,
2966 build_int_cst (itype
, post_shift
));
2971 wide_int oprnd0_min
, oprnd0_max
;
2973 if (get_range_info (oprnd0
, &oprnd0_min
, &oprnd0_max
) == VR_RANGE
)
2975 if (!wi::neg_p (oprnd0_min
, TYPE_SIGN (itype
)))
2977 else if (wi::neg_p (oprnd0_max
, TYPE_SIGN (itype
)))
2981 if (msb
== 0 && d
>= 0)
2985 pattern_stmt
= def_stmt
;
2989 /* t4 = oprnd0 >> (prec - 1);
2990 or if we know from VRP that oprnd0 >= 0
2992 or if we know from VRP that oprnd0 < 0
2994 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2995 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2997 def_stmt
= gimple_build_assign (t4
, INTEGER_CST
,
2998 build_int_cst (itype
, msb
));
3000 def_stmt
= gimple_build_assign (t4
, RSHIFT_EXPR
, oprnd0
,
3001 build_int_cst (itype
, prec
- 1));
3002 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
3004 /* q = t3 - t4; or q = t4 - t3; */
3005 q
= vect_recog_temp_ssa_var (itype
, NULL
);
3006 pattern_stmt
= gimple_build_assign (q
, MINUS_EXPR
, d
< 0 ? t4
: t3
,
3011 if (rhs_code
== TRUNC_MOD_EXPR
)
3015 /* We divided. Now finish by:
3018 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
3020 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
3021 def_stmt
= gimple_build_assign (t1
, MULT_EXPR
, q
, oprnd1
);
3022 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
3024 r
= vect_recog_temp_ssa_var (itype
, NULL
);
3025 pattern_stmt
= gimple_build_assign (r
, MINUS_EXPR
, oprnd0
, t1
);
3028 /* Pattern detected. */
3029 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt
);
3031 *type_out
= vectype
;
3032 return pattern_stmt
;
3035 /* Function vect_recog_mixed_size_cond_pattern
3037 Try to find the following pattern:
3042 S1 a_T = x_t CMP y_t ? b_T : c_T;
3044 where type 'TYPE' is an integral type which has different size
3045 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3046 than 'type', the constants need to fit into an integer type
3047 with the same width as 'type') or results of conversion from 'type'.
3051 * STMT_VINFO: The stmt from which the pattern search begins.
3055 * TYPE_OUT: The type of the output of this pattern.
3057 * Return value: A new stmt that will be used to replace the pattern.
3058 Additionally a def_stmt is added.
3060 a_it = x_t CMP y_t ? b_it : c_it;
3061 a_T = (TYPE) a_it; */
3064 vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
3066 gimple
*last_stmt
= stmt_vinfo
->stmt
;
3067 tree cond_expr
, then_clause
, else_clause
;
3068 tree type
, vectype
, comp_vectype
, itype
= NULL_TREE
, vecitype
;
3069 gimple
*pattern_stmt
, *def_stmt
;
3070 tree orig_type0
= NULL_TREE
, orig_type1
= NULL_TREE
;
3071 gimple
*def_stmt0
= NULL
, *def_stmt1
= NULL
;
3073 tree comp_scalar_type
;
3075 if (!is_gimple_assign (last_stmt
)
3076 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
3077 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
3080 cond_expr
= gimple_assign_rhs1 (last_stmt
);
3081 then_clause
= gimple_assign_rhs2 (last_stmt
);
3082 else_clause
= gimple_assign_rhs3 (last_stmt
);
3084 if (!COMPARISON_CLASS_P (cond_expr
))
3087 comp_scalar_type
= TREE_TYPE (TREE_OPERAND (cond_expr
, 0));
3088 comp_vectype
= get_vectype_for_scalar_type (comp_scalar_type
);
3089 if (comp_vectype
== NULL_TREE
)
3092 type
= gimple_expr_type (last_stmt
);
3093 if (types_compatible_p (type
, comp_scalar_type
)
3094 || ((TREE_CODE (then_clause
) != INTEGER_CST
3095 || TREE_CODE (else_clause
) != INTEGER_CST
)
3096 && !INTEGRAL_TYPE_P (comp_scalar_type
))
3097 || !INTEGRAL_TYPE_P (type
))
3100 if ((TREE_CODE (then_clause
) != INTEGER_CST
3101 && !type_conversion_p (then_clause
, stmt_vinfo
, false, &orig_type0
,
3102 &def_stmt0
, &promotion
))
3103 || (TREE_CODE (else_clause
) != INTEGER_CST
3104 && !type_conversion_p (else_clause
, stmt_vinfo
, false, &orig_type1
,
3105 &def_stmt1
, &promotion
)))
3108 if (orig_type0
&& orig_type1
3109 && !types_compatible_p (orig_type0
, orig_type1
))
3114 if (!types_compatible_p (orig_type0
, comp_scalar_type
))
3116 then_clause
= gimple_assign_rhs1 (def_stmt0
);
3122 if (!types_compatible_p (orig_type1
, comp_scalar_type
))
3124 else_clause
= gimple_assign_rhs1 (def_stmt1
);
3129 HOST_WIDE_INT cmp_mode_size
3130 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype
));
3132 scalar_int_mode type_mode
= SCALAR_INT_TYPE_MODE (type
);
3133 if (GET_MODE_BITSIZE (type_mode
) == cmp_mode_size
)
3136 vectype
= get_vectype_for_scalar_type (type
);
3137 if (vectype
== NULL_TREE
)
3140 if (expand_vec_cond_expr_p (vectype
, comp_vectype
, TREE_CODE (cond_expr
)))
3143 if (itype
== NULL_TREE
)
3144 itype
= build_nonstandard_integer_type (cmp_mode_size
,
3145 TYPE_UNSIGNED (type
));
3147 if (itype
== NULL_TREE
3148 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype
)) != cmp_mode_size
)
3151 vecitype
= get_vectype_for_scalar_type (itype
);
3152 if (vecitype
== NULL_TREE
)
3155 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
, TREE_CODE (cond_expr
)))
3158 if (GET_MODE_BITSIZE (type_mode
) > cmp_mode_size
)
3160 if ((TREE_CODE (then_clause
) == INTEGER_CST
3161 && !int_fits_type_p (then_clause
, itype
))
3162 || (TREE_CODE (else_clause
) == INTEGER_CST
3163 && !int_fits_type_p (else_clause
, itype
)))
3167 def_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3168 COND_EXPR
, unshare_expr (cond_expr
),
3169 fold_convert (itype
, then_clause
),
3170 fold_convert (itype
, else_clause
));
3171 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
3172 NOP_EXPR
, gimple_assign_lhs (def_stmt
));
3174 append_pattern_def_seq (stmt_vinfo
, def_stmt
, vecitype
);
3175 *type_out
= vectype
;
3177 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt
);
3179 return pattern_stmt
;
3183 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3184 true if bool VAR can and should be optimized that way. Assume it shouldn't
3185 in case it's a result of a comparison which can be directly vectorized into
3186 a vector comparison. Fills in STMTS with all stmts visited during the
3190 check_bool_pattern (tree var
, vec_info
*vinfo
, hash_set
<gimple
*> &stmts
)
3193 enum tree_code rhs_code
;
3195 stmt_vec_info def_stmt_info
= vect_get_internal_def (vinfo
, var
);
3199 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_stmt_info
->stmt
);
3203 if (stmts
.contains (def_stmt
))
3206 rhs1
= gimple_assign_rhs1 (def_stmt
);
3207 rhs_code
= gimple_assign_rhs_code (def_stmt
);
3211 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
3216 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
3218 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
3223 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
3230 if (! check_bool_pattern (rhs1
, vinfo
, stmts
)
3231 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt
), vinfo
, stmts
))
3236 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
3238 tree vecitype
, comp_vectype
;
3240 /* If the comparison can throw, then is_gimple_condexpr will be
3241 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3242 if (stmt_could_throw_p (cfun
, def_stmt
))
3245 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
3246 if (comp_vectype
== NULL_TREE
)
3249 tree mask_type
= get_mask_type_for_scalar_type (TREE_TYPE (rhs1
));
3251 && expand_vec_cmp_expr_p (comp_vectype
, mask_type
, rhs_code
))
3254 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
3256 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
3258 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3259 vecitype
= get_vectype_for_scalar_type (itype
);
3260 if (vecitype
== NULL_TREE
)
3264 vecitype
= comp_vectype
;
3265 if (! expand_vec_cond_expr_p (vecitype
, comp_vectype
, rhs_code
))
3273 bool res
= stmts
.add (def_stmt
);
3274 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3281 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3282 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3283 pattern sequence. */
3286 adjust_bool_pattern_cast (tree type
, tree var
, stmt_vec_info stmt_info
)
3288 gimple
*cast_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
3290 append_pattern_def_seq (stmt_info
, cast_stmt
,
3291 get_vectype_for_scalar_type (type
));
3292 return gimple_assign_lhs (cast_stmt
);
3295 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3296 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3297 type, OUT_TYPE is the desired final integer type of the whole pattern.
3298 STMT_INFO is the info of the pattern root and is where pattern stmts should
3299 be associated with. DEFS is a map of pattern defs. */
3302 adjust_bool_pattern (tree var
, tree out_type
,
3303 stmt_vec_info stmt_info
, hash_map
<tree
, tree
> &defs
)
3305 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
3306 enum tree_code rhs_code
, def_rhs_code
;
3307 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
3309 gimple
*pattern_stmt
, *def_stmt
;
3310 tree trueval
= NULL_TREE
;
3312 rhs1
= gimple_assign_rhs1 (stmt
);
3313 rhs2
= gimple_assign_rhs2 (stmt
);
3314 rhs_code
= gimple_assign_rhs_code (stmt
);
3315 loc
= gimple_location (stmt
);
3320 irhs1
= *defs
.get (rhs1
);
3321 itype
= TREE_TYPE (irhs1
);
3323 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3328 irhs1
= *defs
.get (rhs1
);
3329 itype
= TREE_TYPE (irhs1
);
3331 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3332 BIT_XOR_EXPR
, irhs1
, build_int_cst (itype
, 1));
3336 /* Try to optimize x = y & (a < b ? 1 : 0); into
3337 x = (a < b ? y : 0);
3343 S1 a_b = x1 CMP1 y1;
3344 S2 b_b = x2 CMP2 y2;
3346 S4 d_T = (TYPE) c_b;
3348 we would normally emit:
3350 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3351 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3352 S3' c_T = a_T & b_T;
3355 but we can save one stmt by using the
3356 result of one of the COND_EXPRs in the other COND_EXPR and leave
3357 BIT_AND_EXPR stmt out:
3359 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3360 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3363 At least when VEC_COND_EXPR is implemented using masks
3364 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3365 computes the comparison masks and ands it, in one case with
3366 all ones vector, in the other case with a vector register.
3367 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3368 often more expensive. */
3369 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
3370 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3371 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3373 irhs1
= *defs
.get (rhs1
);
3374 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3375 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3376 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1
))))
3378 rhs_code
= def_rhs_code
;
3380 rhs2
= gimple_assign_rhs2 (def_stmt
);
3385 irhs2
= *defs
.get (rhs2
);
3388 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
3389 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3390 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3392 irhs2
= *defs
.get (rhs2
);
3393 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3394 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
3395 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1
))))
3397 rhs_code
= def_rhs_code
;
3399 rhs2
= gimple_assign_rhs2 (def_stmt
);
3404 irhs1
= *defs
.get (rhs1
);
3410 irhs1
= *defs
.get (rhs1
);
3411 irhs2
= *defs
.get (rhs2
);
3413 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3414 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
3416 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
3417 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
3418 int out_prec
= TYPE_PRECISION (out_type
);
3419 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
3420 irhs2
= adjust_bool_pattern_cast (TREE_TYPE (irhs1
), irhs2
,
3422 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
3423 irhs1
= adjust_bool_pattern_cast (TREE_TYPE (irhs2
), irhs1
,
3427 irhs1
= adjust_bool_pattern_cast (out_type
, irhs1
, stmt_info
);
3428 irhs2
= adjust_bool_pattern_cast (out_type
, irhs2
, stmt_info
);
3431 itype
= TREE_TYPE (irhs1
);
3433 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3434 rhs_code
, irhs1
, irhs2
);
3439 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
3440 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
3441 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
))
3442 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1
)),
3443 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1
)))))
3445 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
3447 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3450 itype
= TREE_TYPE (rhs1
);
3451 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
3452 if (trueval
== NULL_TREE
)
3453 trueval
= build_int_cst (itype
, 1);
3455 gcc_checking_assert (useless_type_conversion_p (itype
,
3456 TREE_TYPE (trueval
)));
3458 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3459 COND_EXPR
, cond_expr
, trueval
,
3460 build_int_cst (itype
, 0));
3464 gimple_set_location (pattern_stmt
, loc
);
3465 append_pattern_def_seq (stmt_info
, pattern_stmt
,
3466 get_vectype_for_scalar_type (itype
));
3467 defs
.put (var
, gimple_assign_lhs (pattern_stmt
));
3470 /* Comparison function to qsort a vector of gimple stmts after UID. */
3473 sort_after_uid (const void *p1
, const void *p2
)
3475 const gimple
*stmt1
= *(const gimple
* const *)p1
;
3476 const gimple
*stmt2
= *(const gimple
* const *)p2
;
3477 return gimple_uid (stmt1
) - gimple_uid (stmt2
);
3480 /* Create pattern stmts for all stmts participating in the bool pattern
3481 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
3482 OUT_TYPE. Return the def of the pattern root. */
3485 adjust_bool_stmts (hash_set
<gimple
*> &bool_stmt_set
,
3486 tree out_type
, stmt_vec_info stmt_info
)
3488 /* Gather original stmts in the bool pattern in their order of appearance
3490 auto_vec
<gimple
*> bool_stmts (bool_stmt_set
.elements ());
3491 for (hash_set
<gimple
*>::iterator i
= bool_stmt_set
.begin ();
3492 i
!= bool_stmt_set
.end (); ++i
)
3493 bool_stmts
.quick_push (*i
);
3494 bool_stmts
.qsort (sort_after_uid
);
3496 /* Now process them in that order, producing pattern stmts. */
3497 hash_map
<tree
, tree
> defs
;
3498 for (unsigned i
= 0; i
< bool_stmts
.length (); ++i
)
3499 adjust_bool_pattern (gimple_assign_lhs (bool_stmts
[i
]),
3500 out_type
, stmt_info
, defs
);
3502 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3503 gimple
*pattern_stmt
3504 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
3505 return gimple_assign_lhs (pattern_stmt
);
3508 /* Helper for search_type_for_mask. */
3511 search_type_for_mask_1 (tree var
, vec_info
*vinfo
,
3512 hash_map
<gimple
*, tree
> &cache
)
3515 enum tree_code rhs_code
;
3516 tree res
= NULL_TREE
, res2
;
3518 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var
)))
3521 stmt_vec_info def_stmt_info
= vect_get_internal_def (vinfo
, var
);
3525 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_stmt_info
->stmt
);
3529 tree
*c
= cache
.get (def_stmt
);
3533 rhs_code
= gimple_assign_rhs_code (def_stmt
);
3534 rhs1
= gimple_assign_rhs1 (def_stmt
);
3541 res
= search_type_for_mask_1 (rhs1
, vinfo
, cache
);
3547 res
= search_type_for_mask_1 (rhs1
, vinfo
, cache
);
3548 res2
= search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt
), vinfo
,
3550 if (!res
|| (res2
&& TYPE_PRECISION (res
) > TYPE_PRECISION (res2
)))
3555 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
3557 tree comp_vectype
, mask_type
;
3559 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
3561 res
= search_type_for_mask_1 (rhs1
, vinfo
, cache
);
3562 res2
= search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt
),
3564 if (!res
|| (res2
&& TYPE_PRECISION (res
) > TYPE_PRECISION (res2
)))
3569 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
3570 if (comp_vectype
== NULL_TREE
)
3576 mask_type
= get_mask_type_for_scalar_type (TREE_TYPE (rhs1
));
3578 || !expand_vec_cmp_expr_p (comp_vectype
, mask_type
, rhs_code
))
3584 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
3585 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
3587 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
3588 res
= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3591 res
= TREE_TYPE (rhs1
);
3595 cache
.put (def_stmt
, res
);
3599 /* Return the proper type for converting bool VAR into
3600 an integer value or NULL_TREE if no such type exists.
3601 The type is chosen so that converted value has the
3602 same number of elements as VAR's vector type. */
3605 search_type_for_mask (tree var
, vec_info
*vinfo
)
3607 hash_map
<gimple
*, tree
> cache
;
3608 return search_type_for_mask_1 (var
, vinfo
, cache
);
3611 /* Function vect_recog_bool_pattern
3613 Try to find pattern like following:
3615 bool a_b, b_b, c_b, d_b, e_b;
3618 S1 a_b = x1 CMP1 y1;
3619 S2 b_b = x2 CMP2 y2;
3621 S4 d_b = x3 CMP3 y3;
3623 S6 f_T = (TYPE) e_b;
3625 where type 'TYPE' is an integral type. Or a similar pattern
3628 S6 f_Y = e_b ? r_Y : s_Y;
3630 as results from if-conversion of a complex condition.
3634 * STMT_VINFO: The stmt at the end from which the pattern
3635 search begins, i.e. cast of a bool to
3640 * TYPE_OUT: The type of the output of this pattern.
3642 * Return value: A new stmt that will be used to replace the pattern.
3644 Assuming size of TYPE is the same as size of all comparisons
3645 (otherwise some casts would be added where needed), the above
3646 sequence we create related pattern stmts:
3647 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3648 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3649 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3650 S5' e_T = c_T | d_T;
3653 Instead of the above S3' we could emit:
3654 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3655 S3' c_T = a_T | b_T;
3656 but the above is more efficient. */
3659 vect_recog_bool_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
3661 gimple
*last_stmt
= stmt_vinfo
->stmt
;
3662 enum tree_code rhs_code
;
3663 tree var
, lhs
, rhs
, vectype
;
3664 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
3665 gimple
*pattern_stmt
;
3667 if (!is_gimple_assign (last_stmt
))
3670 var
= gimple_assign_rhs1 (last_stmt
);
3671 lhs
= gimple_assign_lhs (last_stmt
);
3673 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var
)))
3676 hash_set
<gimple
*> bool_stmts
;
3678 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3679 if (CONVERT_EXPR_CODE_P (rhs_code
))
3681 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3682 || TYPE_PRECISION (TREE_TYPE (lhs
)) == 1)
3684 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3685 if (vectype
== NULL_TREE
)
3688 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
3690 rhs
= adjust_bool_stmts (bool_stmts
, TREE_TYPE (lhs
), stmt_vinfo
);
3691 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3692 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3693 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3696 = gimple_build_assign (lhs
, NOP_EXPR
, rhs
);
3700 tree type
= search_type_for_mask (var
, vinfo
);
3701 tree cst0
, cst1
, tmp
;
3706 /* We may directly use cond with narrowed type to avoid
3707 multiple cond exprs with following result packing and
3708 perform single cond with packed mask instead. In case
3709 of widening we better make cond first and then extract
3711 if (TYPE_MODE (type
) == TYPE_MODE (TREE_TYPE (lhs
)))
3712 type
= TREE_TYPE (lhs
);
3714 cst0
= build_int_cst (type
, 0);
3715 cst1
= build_int_cst (type
, 1);
3716 tmp
= vect_recog_temp_ssa_var (type
, NULL
);
3717 pattern_stmt
= gimple_build_assign (tmp
, COND_EXPR
, var
, cst1
, cst0
);
3719 if (!useless_type_conversion_p (type
, TREE_TYPE (lhs
)))
3721 tree new_vectype
= get_vectype_for_scalar_type (type
);
3722 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
, new_vectype
);
3724 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3725 pattern_stmt
= gimple_build_assign (lhs
, CONVERT_EXPR
, tmp
);
3729 *type_out
= vectype
;
3730 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
3732 return pattern_stmt
;
3734 else if (rhs_code
== COND_EXPR
3735 && TREE_CODE (var
) == SSA_NAME
)
3737 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3738 if (vectype
== NULL_TREE
)
3741 /* Build a scalar type for the boolean result that when
3742 vectorized matches the vector type of the result in
3743 size and number of elements. */
3745 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype
)),
3746 TYPE_VECTOR_SUBPARTS (vectype
));
3749 = build_nonstandard_integer_type (prec
,
3750 TYPE_UNSIGNED (TREE_TYPE (var
)));
3751 if (get_vectype_for_scalar_type (type
) == NULL_TREE
)
3754 if (!check_bool_pattern (var
, vinfo
, bool_stmts
))
3757 rhs
= adjust_bool_stmts (bool_stmts
, type
, stmt_vinfo
);
3759 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3761 = gimple_build_assign (lhs
, COND_EXPR
,
3762 build2 (NE_EXPR
, boolean_type_node
,
3763 rhs
, build_int_cst (type
, 0)),
3764 gimple_assign_rhs2 (last_stmt
),
3765 gimple_assign_rhs3 (last_stmt
));
3766 *type_out
= vectype
;
3767 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
3769 return pattern_stmt
;
3771 else if (rhs_code
== SSA_NAME
3772 && STMT_VINFO_DATA_REF (stmt_vinfo
))
3774 stmt_vec_info pattern_stmt_info
;
3775 vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
3776 gcc_assert (vectype
!= NULL_TREE
);
3777 if (!VECTOR_MODE_P (TYPE_MODE (vectype
)))
3780 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
3781 rhs
= adjust_bool_stmts (bool_stmts
, TREE_TYPE (vectype
), stmt_vinfo
);
3784 tree type
= search_type_for_mask (var
, vinfo
);
3785 tree cst0
, cst1
, new_vectype
;
3790 if (TYPE_MODE (type
) == TYPE_MODE (TREE_TYPE (vectype
)))
3791 type
= TREE_TYPE (vectype
);
3793 cst0
= build_int_cst (type
, 0);
3794 cst1
= build_int_cst (type
, 1);
3795 new_vectype
= get_vectype_for_scalar_type (type
);
3797 rhs
= vect_recog_temp_ssa_var (type
, NULL
);
3798 pattern_stmt
= gimple_build_assign (rhs
, COND_EXPR
, var
, cst1
, cst0
);
3799 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
, new_vectype
);
3802 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
3803 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3805 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3806 gimple
*cast_stmt
= gimple_build_assign (rhs2
, NOP_EXPR
, rhs
);
3807 append_pattern_def_seq (stmt_vinfo
, cast_stmt
);
3810 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3811 pattern_stmt_info
= vinfo
->add_stmt (pattern_stmt
);
3812 vinfo
->move_dr (pattern_stmt_info
, stmt_vinfo
);
3813 *type_out
= vectype
;
3814 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
3816 return pattern_stmt
;
3823 /* A helper for vect_recog_mask_conversion_pattern. Build
3824 conversion of MASK to a type suitable for masking VECTYPE.
3825 Built statement gets required vectype and is appended to
3826 a pattern sequence of STMT_VINFO.
3828 Return converted mask. */
3831 build_mask_conversion (tree mask
, tree vectype
, stmt_vec_info stmt_vinfo
)
3836 masktype
= build_same_sized_truth_vector_type (vectype
);
3837 tmp
= vect_recog_temp_ssa_var (TREE_TYPE (masktype
), NULL
);
3838 stmt
= gimple_build_assign (tmp
, CONVERT_EXPR
, mask
);
3839 append_pattern_def_seq (stmt_vinfo
, stmt
, masktype
);
3845 /* Function vect_recog_mask_conversion_pattern
3847 Try to find statements which require boolean type
3848 converison. Additional conversion statements are
3849 added to handle such cases. For example:
3859 S4 c_1 = m_3 ? c_2 : c_3;
3861 Will be transformed into:
3865 S3'' m_2' = (_Bool[bitsize=32])m_2
3866 S3' m_3' = m_1 & m_2';
3867 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3868 S4' c_1' = m_3'' ? c_2 : c_3; */
3871 vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
3873 gimple
*last_stmt
= stmt_vinfo
->stmt
;
3874 enum tree_code rhs_code
;
3875 tree lhs
= NULL_TREE
, rhs1
, rhs2
, tmp
, rhs1_type
, rhs2_type
;
3876 tree vectype1
, vectype2
;
3877 stmt_vec_info pattern_stmt_info
;
3878 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
3880 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3881 if (is_gimple_call (last_stmt
)
3882 && gimple_call_internal_p (last_stmt
))
3884 gcall
*pattern_stmt
;
3886 internal_fn ifn
= gimple_call_internal_fn (last_stmt
);
3887 int mask_argno
= internal_fn_mask_index (ifn
);
3891 bool store_p
= internal_store_fn_p (ifn
);
3894 int rhs_index
= internal_fn_stored_value_index (ifn
);
3895 tree rhs
= gimple_call_arg (last_stmt
, rhs_index
);
3896 vectype1
= get_vectype_for_scalar_type (TREE_TYPE (rhs
));
3900 lhs
= gimple_call_lhs (last_stmt
);
3901 vectype1
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3904 tree mask_arg
= gimple_call_arg (last_stmt
, mask_argno
);
3905 tree mask_arg_type
= search_type_for_mask (mask_arg
, vinfo
);
3908 vectype2
= get_mask_type_for_scalar_type (mask_arg_type
);
3910 if (!vectype1
|| !vectype2
3911 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1
),
3912 TYPE_VECTOR_SUBPARTS (vectype2
)))
3915 tmp
= build_mask_conversion (mask_arg
, vectype1
, stmt_vinfo
);
3917 auto_vec
<tree
, 8> args
;
3918 unsigned int nargs
= gimple_call_num_args (last_stmt
);
3919 args
.safe_grow (nargs
);
3920 for (unsigned int i
= 0; i
< nargs
; ++i
)
3921 args
[i
] = ((int) i
== mask_argno
3923 : gimple_call_arg (last_stmt
, i
));
3924 pattern_stmt
= gimple_build_call_internal_vec (ifn
, args
);
3928 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3929 gimple_call_set_lhs (pattern_stmt
, lhs
);
3931 gimple_call_set_nothrow (pattern_stmt
, true);
3933 pattern_stmt_info
= vinfo
->add_stmt (pattern_stmt
);
3934 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
3935 vinfo
->move_dr (pattern_stmt_info
, stmt_vinfo
);
3937 *type_out
= vectype1
;
3938 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
3940 return pattern_stmt
;
3943 if (!is_gimple_assign (last_stmt
))
3946 gimple
*pattern_stmt
;
3947 lhs
= gimple_assign_lhs (last_stmt
);
3948 rhs1
= gimple_assign_rhs1 (last_stmt
);
3949 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3951 /* Check for cond expression requiring mask conversion. */
3952 if (rhs_code
== COND_EXPR
)
3954 vectype1
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3956 if (TREE_CODE (rhs1
) == SSA_NAME
)
3958 rhs1_type
= search_type_for_mask (rhs1
, vinfo
);
3962 else if (COMPARISON_CLASS_P (rhs1
))
3964 /* Check whether we're comparing scalar booleans and (if so)
3965 whether a better mask type exists than the mask associated
3966 with boolean-sized elements. This avoids unnecessary packs
3967 and unpacks if the booleans are set from comparisons of
3968 wider types. E.g. in:
3970 int x1, x2, x3, x4, y1, y1;
3972 bool b1 = (x1 == x2);
3973 bool b2 = (x3 == x4);
3974 ... = b1 == b2 ? y1 : y2;
3976 it is better for b1 and b2 to use the mask type associated
3977 with int elements rather bool (byte) elements. */
3978 rhs1_type
= search_type_for_mask (TREE_OPERAND (rhs1
, 0), vinfo
);
3980 rhs1_type
= TREE_TYPE (TREE_OPERAND (rhs1
, 0));
3985 vectype2
= get_mask_type_for_scalar_type (rhs1_type
);
3987 if (!vectype1
|| !vectype2
)
3990 /* Continue if a conversion is needed. Also continue if we have
3991 a comparison whose vector type would normally be different from
3992 VECTYPE2 when considered in isolation. In that case we'll
3993 replace the comparison with an SSA name (so that we can record
3994 its vector type) and behave as though the comparison was an SSA
3995 name from the outset. */
3996 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1
),
3997 TYPE_VECTOR_SUBPARTS (vectype2
))
3998 && (TREE_CODE (rhs1
) == SSA_NAME
3999 || rhs1_type
== TREE_TYPE (TREE_OPERAND (rhs1
, 0))))
4002 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4003 in place, we can handle it in vectorizable_condition. This avoids
4004 unnecessary promotion stmts and increased vectorization factor. */
4005 if (COMPARISON_CLASS_P (rhs1
)
4006 && INTEGRAL_TYPE_P (rhs1_type
)
4007 && known_le (TYPE_VECTOR_SUBPARTS (vectype1
),
4008 TYPE_VECTOR_SUBPARTS (vectype2
)))
4010 enum vect_def_type dt
;
4011 if (vect_is_simple_use (TREE_OPERAND (rhs1
, 0), vinfo
, &dt
)
4012 && dt
== vect_external_def
4013 && vect_is_simple_use (TREE_OPERAND (rhs1
, 1), vinfo
, &dt
)
4014 && (dt
== vect_external_def
4015 || dt
== vect_constant_def
))
4017 tree wide_scalar_type
= build_nonstandard_integer_type
4018 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1
))),
4019 TYPE_UNSIGNED (rhs1_type
));
4020 tree vectype3
= get_vectype_for_scalar_type (wide_scalar_type
);
4021 if (expand_vec_cond_expr_p (vectype1
, vectype3
, TREE_CODE (rhs1
)))
4026 /* If rhs1 is a comparison we need to move it into a
4027 separate statement. */
4028 if (TREE_CODE (rhs1
) != SSA_NAME
)
4030 tmp
= vect_recog_temp_ssa_var (TREE_TYPE (rhs1
), NULL
);
4031 pattern_stmt
= gimple_build_assign (tmp
, rhs1
);
4033 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
, vectype2
);
4036 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
4037 TYPE_VECTOR_SUBPARTS (vectype2
)))
4038 tmp
= build_mask_conversion (rhs1
, vectype1
, stmt_vinfo
);
4042 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
4043 pattern_stmt
= gimple_build_assign (lhs
, COND_EXPR
, tmp
,
4044 gimple_assign_rhs2 (last_stmt
),
4045 gimple_assign_rhs3 (last_stmt
));
4047 *type_out
= vectype1
;
4048 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
4050 return pattern_stmt
;
4053 /* Now check for binary boolean operations requiring conversion for
4055 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs
)))
4058 if (rhs_code
!= BIT_IOR_EXPR
4059 && rhs_code
!= BIT_XOR_EXPR
4060 && rhs_code
!= BIT_AND_EXPR
4061 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
)
4064 rhs2
= gimple_assign_rhs2 (last_stmt
);
4066 rhs1_type
= search_type_for_mask (rhs1
, vinfo
);
4067 rhs2_type
= search_type_for_mask (rhs2
, vinfo
);
4069 if (!rhs1_type
|| !rhs2_type
4070 || TYPE_PRECISION (rhs1_type
) == TYPE_PRECISION (rhs2_type
))
4073 if (TYPE_PRECISION (rhs1_type
) < TYPE_PRECISION (rhs2_type
))
4075 vectype1
= get_mask_type_for_scalar_type (rhs1_type
);
4078 rhs2
= build_mask_conversion (rhs2
, vectype1
, stmt_vinfo
);
4082 vectype1
= get_mask_type_for_scalar_type (rhs2_type
);
4085 rhs1
= build_mask_conversion (rhs1
, vectype1
, stmt_vinfo
);
4088 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
4089 pattern_stmt
= gimple_build_assign (lhs
, rhs_code
, rhs1
, rhs2
);
4091 *type_out
= vectype1
;
4092 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
4094 return pattern_stmt
;
4097 /* STMT_INFO is a load or store. If the load or store is conditional, return
4098 the boolean condition under which it occurs, otherwise return null. */
4101 vect_get_load_store_mask (stmt_vec_info stmt_info
)
4103 if (gassign
*def_assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
4105 gcc_assert (gimple_assign_single_p (def_assign
));
4109 if (gcall
*def_call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
4111 internal_fn ifn
= gimple_call_internal_fn (def_call
);
4112 int mask_index
= internal_fn_mask_index (ifn
);
4113 return gimple_call_arg (def_call
, mask_index
);
4119 /* Return the scalar offset type that an internal gather/scatter function
4120 should use. GS_INFO describes the gather/scatter operation. */
4123 vect_get_gather_scatter_offset_type (gather_scatter_info
*gs_info
)
4125 tree offset_type
= TREE_TYPE (gs_info
->offset
);
4126 unsigned int element_bits
= tree_to_uhwi (TYPE_SIZE (gs_info
->element_type
));
4128 /* Enforced by vect_check_gather_scatter. */
4129 unsigned int offset_bits
= TYPE_PRECISION (offset_type
);
4130 gcc_assert (element_bits
>= offset_bits
);
4132 /* If the offset is narrower than the elements, extend it according
4134 if (element_bits
> offset_bits
)
4135 return build_nonstandard_integer_type (element_bits
,
4136 TYPE_UNSIGNED (offset_type
));
4141 /* Return MASK if MASK is suitable for masking an operation on vectors
4142 of type VECTYPE, otherwise convert it into such a form and return
4143 the result. Associate any conversion statements with STMT_INFO's
4147 vect_convert_mask_for_vectype (tree mask
, tree vectype
,
4148 stmt_vec_info stmt_info
, vec_info
*vinfo
)
4150 tree mask_type
= search_type_for_mask (mask
, vinfo
);
4153 tree mask_vectype
= get_mask_type_for_scalar_type (mask_type
);
4155 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
),
4156 TYPE_VECTOR_SUBPARTS (mask_vectype
)))
4157 mask
= build_mask_conversion (mask
, vectype
, stmt_info
);
4162 /* Return the equivalent of:
4164 fold_convert (TYPE, VALUE)
4166 with the expectation that the operation will be vectorized.
4167 If new statements are needed, add them as pattern statements
4171 vect_add_conversion_to_pattern (tree type
, tree value
, stmt_vec_info stmt_info
)
4173 if (useless_type_conversion_p (type
, TREE_TYPE (value
)))
4176 tree new_value
= vect_recog_temp_ssa_var (type
, NULL
);
4177 gassign
*conversion
= gimple_build_assign (new_value
, CONVERT_EXPR
, value
);
4178 append_pattern_def_seq (stmt_info
, conversion
,
4179 get_vectype_for_scalar_type (type
));
4183 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4184 internal function. Return the final statement on success and set
4185 *TYPE_OUT to the vector type being loaded or stored.
4187 This function only handles gathers and scatters that were recognized
4188 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4191 vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info
, tree
*type_out
)
4193 /* Currently we only support this for loop vectorization. */
4194 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (stmt_info
->vinfo
);
4198 /* Make sure that we're looking at a gather load or scatter store. */
4199 data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
4200 if (!dr
|| !STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
4203 /* Get the boolean that controls whether the load or store happens.
4204 This is null if the operation is unconditional. */
4205 tree mask
= vect_get_load_store_mask (stmt_info
);
4207 /* Make sure that the target supports an appropriate internal
4208 function for the gather/scatter operation. */
4209 gather_scatter_info gs_info
;
4210 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, &gs_info
)
4214 /* Convert the mask to the right form. */
4215 tree gs_vectype
= get_vectype_for_scalar_type (gs_info
.element_type
);
4217 mask
= vect_convert_mask_for_vectype (mask
, gs_vectype
, stmt_info
,
4220 /* Get the invariant base and non-invariant offset, converting the
4221 latter to the same width as the vector elements. */
4222 tree base
= gs_info
.base
;
4223 tree offset_type
= vect_get_gather_scatter_offset_type (&gs_info
);
4224 tree offset
= vect_add_conversion_to_pattern (offset_type
, gs_info
.offset
,
4227 /* Build the new pattern statement. */
4228 tree scale
= size_int (gs_info
.scale
);
4229 gcall
*pattern_stmt
;
4230 if (DR_IS_READ (dr
))
4233 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 4, base
,
4234 offset
, scale
, mask
);
4236 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 3, base
,
4238 tree load_lhs
= vect_recog_temp_ssa_var (gs_info
.element_type
, NULL
);
4239 gimple_call_set_lhs (pattern_stmt
, load_lhs
);
4243 tree rhs
= vect_get_store_rhs (stmt_info
);
4245 pattern_stmt
= gimple_build_call_internal (IFN_MASK_SCATTER_STORE
, 5,
4246 base
, offset
, scale
, rhs
,
4249 pattern_stmt
= gimple_build_call_internal (IFN_SCATTER_STORE
, 4,
4250 base
, offset
, scale
, rhs
);
4252 gimple_call_set_nothrow (pattern_stmt
, true);
4254 /* Copy across relevant vectorization info and associate DR with the
4255 new pattern statement instead of the original statement. */
4256 stmt_vec_info pattern_stmt_info
= loop_vinfo
->add_stmt (pattern_stmt
);
4257 loop_vinfo
->move_dr (pattern_stmt_info
, stmt_info
);
4259 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4260 *type_out
= vectype
;
4261 vect_pattern_detected ("gather/scatter pattern", stmt_info
->stmt
);
4263 return pattern_stmt
;
4266 /* Return true if TYPE is a non-boolean integer type. These are the types
4267 that we want to consider for narrowing. */
4270 vect_narrowable_type_p (tree type
)
4272 return INTEGRAL_TYPE_P (type
) && !VECT_SCALAR_BOOLEAN_TYPE_P (type
);
4275 /* Return true if the operation given by CODE can be truncated to N bits
4276 when only N bits of the output are needed. This is only true if bit N+1
4277 of the inputs has no effect on the low N bits of the result. */
4280 vect_truncatable_operation_p (tree_code code
)
4298 /* Record that STMT_INFO could be changed from operating on TYPE to
4299 operating on a type with the precision and sign given by PRECISION
4300 and SIGN respectively. PRECISION is an arbitrary bit precision;
4301 it might not be a whole number of bytes. */
4304 vect_set_operation_type (stmt_vec_info stmt_info
, tree type
,
4305 unsigned int precision
, signop sign
)
4307 /* Round the precision up to a whole number of bytes. */
4308 precision
= vect_element_precision (precision
);
4309 if (precision
< TYPE_PRECISION (type
)
4310 && (!stmt_info
->operation_precision
4311 || stmt_info
->operation_precision
> precision
))
4313 stmt_info
->operation_precision
= precision
;
4314 stmt_info
->operation_sign
= sign
;
4318 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4319 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4320 is an arbitrary bit precision; it might not be a whole number of bytes. */
4323 vect_set_min_input_precision (stmt_vec_info stmt_info
, tree type
,
4324 unsigned int min_input_precision
)
4326 /* This operation in isolation only requires the inputs to have
4327 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4328 that MIN_INPUT_PRECISION is a natural precision for the chain
4329 as a whole. E.g. consider something like:
4331 unsigned short *x, *y;
4332 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4334 The right shift can be done on unsigned chars, and only requires the
4335 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4336 approach would mean turning a natural chain of single-vector unsigned
4337 short operations into one that truncates "*x" and then extends
4338 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4339 operation and one vector for each unsigned char operation.
4340 This would be a significant pessimization.
4342 Instead only propagate the maximum of this precision and the precision
4343 required by the users of the result. This means that we don't pessimize
4344 the case above but continue to optimize things like:
4348 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4350 Here we would truncate two vectors of *x to a single vector of
4351 unsigned chars and use single-vector unsigned char operations for
4352 everything else, rather than doing two unsigned short copies of
4353 "(*x & 0xf0) >> 4" and then truncating the result. */
4354 min_input_precision
= MAX (min_input_precision
,
4355 stmt_info
->min_output_precision
);
4357 if (min_input_precision
< TYPE_PRECISION (type
)
4358 && (!stmt_info
->min_input_precision
4359 || stmt_info
->min_input_precision
> min_input_precision
))
4360 stmt_info
->min_input_precision
= min_input_precision
;
4363 /* Subroutine of vect_determine_min_output_precision. Return true if
4364 we can calculate a reduced number of output bits for STMT_INFO,
4365 whose result is LHS. */
4368 vect_determine_min_output_precision_1 (stmt_vec_info stmt_info
, tree lhs
)
4370 /* Take the maximum precision required by users of the result. */
4371 vec_info
*vinfo
= stmt_info
->vinfo
;
4372 unsigned int precision
= 0;
4373 imm_use_iterator iter
;
4375 FOR_EACH_IMM_USE_FAST (use
, iter
, lhs
)
4377 gimple
*use_stmt
= USE_STMT (use
);
4378 if (is_gimple_debug (use_stmt
))
4380 stmt_vec_info use_stmt_info
= vinfo
->lookup_stmt (use_stmt
);
4381 if (!use_stmt_info
|| !use_stmt_info
->min_input_precision
)
4383 /* The input precision recorded for COND_EXPRs applies only to the
4384 "then" and "else" values. */
4385 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
4387 && gimple_assign_rhs_code (assign
) == COND_EXPR
4388 && use
->use
!= gimple_assign_rhs2_ptr (assign
)
4389 && use
->use
!= gimple_assign_rhs3_ptr (assign
))
4391 precision
= MAX (precision
, use_stmt_info
->min_input_precision
);
4394 if (dump_enabled_p ())
4395 dump_printf_loc (MSG_NOTE
, vect_location
,
4396 "only the low %d bits of %T are significant\n",
4398 stmt_info
->min_output_precision
= precision
;
4402 /* Calculate min_output_precision for STMT_INFO. */
4405 vect_determine_min_output_precision (stmt_vec_info stmt_info
)
4407 /* We're only interested in statements with a narrowable result. */
4408 tree lhs
= gimple_get_lhs (stmt_info
->stmt
);
4410 || TREE_CODE (lhs
) != SSA_NAME
4411 || !vect_narrowable_type_p (TREE_TYPE (lhs
)))
4414 if (!vect_determine_min_output_precision_1 (stmt_info
, lhs
))
4415 stmt_info
->min_output_precision
= TYPE_PRECISION (TREE_TYPE (lhs
));
4418 /* Use range information to decide whether STMT (described by STMT_INFO)
4419 could be done in a narrower type. This is effectively a forward
4420 propagation, since it uses context-independent information that applies
4421 to all users of an SSA name. */
4424 vect_determine_precisions_from_range (stmt_vec_info stmt_info
, gassign
*stmt
)
4426 tree lhs
= gimple_assign_lhs (stmt
);
4427 if (!lhs
|| TREE_CODE (lhs
) != SSA_NAME
)
4430 tree type
= TREE_TYPE (lhs
);
4431 if (!vect_narrowable_type_p (type
))
4434 /* First see whether we have any useful range information for the result. */
4435 unsigned int precision
= TYPE_PRECISION (type
);
4436 signop sign
= TYPE_SIGN (type
);
4437 wide_int min_value
, max_value
;
4438 if (!vect_get_range_info (lhs
, &min_value
, &max_value
))
4441 tree_code code
= gimple_assign_rhs_code (stmt
);
4442 unsigned int nops
= gimple_num_ops (stmt
);
4444 if (!vect_truncatable_operation_p (code
))
4445 /* Check that all relevant input operands are compatible, and update
4446 [MIN_VALUE, MAX_VALUE] to include their ranges. */
4447 for (unsigned int i
= 1; i
< nops
; ++i
)
4449 tree op
= gimple_op (stmt
, i
);
4450 if (TREE_CODE (op
) == INTEGER_CST
)
4452 /* Don't require the integer to have RHS_TYPE (which it might
4453 not for things like shift amounts, etc.), but do require it
4455 if (!int_fits_type_p (op
, type
))
4458 min_value
= wi::min (min_value
, wi::to_wide (op
, precision
), sign
);
4459 max_value
= wi::max (max_value
, wi::to_wide (op
, precision
), sign
);
4461 else if (TREE_CODE (op
) == SSA_NAME
)
4463 /* Ignore codes that don't take uniform arguments. */
4464 if (!types_compatible_p (TREE_TYPE (op
), type
))
4467 wide_int op_min_value
, op_max_value
;
4468 if (!vect_get_range_info (op
, &op_min_value
, &op_max_value
))
4471 min_value
= wi::min (min_value
, op_min_value
, sign
);
4472 max_value
= wi::max (max_value
, op_max_value
, sign
);
4478 /* Try to switch signed types for unsigned types if we can.
4479 This is better for two reasons. First, unsigned ops tend
4480 to be cheaper than signed ops. Second, it means that we can
4484 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4489 unsigned short res_1 = (unsigned short) c & 0xff00;
4490 int res = (int) res_1;
4492 where the intermediate result res_1 has unsigned rather than
4494 if (sign
== SIGNED
&& !wi::neg_p (min_value
))
4497 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
4498 unsigned int precision1
= wi::min_precision (min_value
, sign
);
4499 unsigned int precision2
= wi::min_precision (max_value
, sign
);
4500 unsigned int value_precision
= MAX (precision1
, precision2
);
4501 if (value_precision
>= precision
)
4504 if (dump_enabled_p ())
4505 dump_printf_loc (MSG_NOTE
, vect_location
, "can narrow to %s:%d"
4506 " without loss of precision: %G",
4507 sign
== SIGNED
? "signed" : "unsigned",
4508 value_precision
, stmt
);
4510 vect_set_operation_type (stmt_info
, type
, value_precision
, sign
);
4511 vect_set_min_input_precision (stmt_info
, type
, value_precision
);
4514 /* Use information about the users of STMT's result to decide whether
4515 STMT (described by STMT_INFO) could be done in a narrower type.
4516 This is effectively a backward propagation. */
4519 vect_determine_precisions_from_users (stmt_vec_info stmt_info
, gassign
*stmt
)
4521 tree_code code
= gimple_assign_rhs_code (stmt
);
4522 unsigned int opno
= (code
== COND_EXPR
? 2 : 1);
4523 tree type
= TREE_TYPE (gimple_op (stmt
, opno
));
4524 if (!vect_narrowable_type_p (type
))
4527 unsigned int precision
= TYPE_PRECISION (type
);
4528 unsigned int operation_precision
, min_input_precision
;
4532 /* Only the bits that contribute to the output matter. Don't change
4533 the precision of the operation itself. */
4534 operation_precision
= precision
;
4535 min_input_precision
= stmt_info
->min_output_precision
;
4541 tree shift
= gimple_assign_rhs2 (stmt
);
4542 if (TREE_CODE (shift
) != INTEGER_CST
4543 || !wi::ltu_p (wi::to_widest (shift
), precision
))
4545 unsigned int const_shift
= TREE_INT_CST_LOW (shift
);
4546 if (code
== LSHIFT_EXPR
)
4548 /* We need CONST_SHIFT fewer bits of the input. */
4549 operation_precision
= stmt_info
->min_output_precision
;
4550 min_input_precision
= (MAX (operation_precision
, const_shift
)
4555 /* We need CONST_SHIFT extra bits to do the operation. */
4556 operation_precision
= (stmt_info
->min_output_precision
4558 min_input_precision
= operation_precision
;
4564 if (vect_truncatable_operation_p (code
))
4566 /* Input bit N has no effect on output bits N-1 and lower. */
4567 operation_precision
= stmt_info
->min_output_precision
;
4568 min_input_precision
= operation_precision
;
4574 if (operation_precision
< precision
)
4576 if (dump_enabled_p ())
4577 dump_printf_loc (MSG_NOTE
, vect_location
, "can narrow to %s:%d"
4578 " without affecting users: %G",
4579 TYPE_UNSIGNED (type
) ? "unsigned" : "signed",
4580 operation_precision
, stmt
);
4581 vect_set_operation_type (stmt_info
, type
, operation_precision
,
4584 vect_set_min_input_precision (stmt_info
, type
, min_input_precision
);
4587 /* Handle vect_determine_precisions for STMT_INFO, given that we
4588 have already done so for the users of its result. */
4591 vect_determine_stmt_precisions (stmt_vec_info stmt_info
)
4593 vect_determine_min_output_precision (stmt_info
);
4594 if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
4596 vect_determine_precisions_from_range (stmt_info
, stmt
);
4597 vect_determine_precisions_from_users (stmt_info
, stmt
);
4601 /* Walk backwards through the vectorizable region to determine the
4602 values of these fields:
4604 - min_output_precision
4605 - min_input_precision
4606 - operation_precision
4607 - operation_sign. */
4610 vect_determine_precisions (vec_info
*vinfo
)
4612 DUMP_VECT_SCOPE ("vect_determine_precisions");
4614 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
4616 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4617 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
4618 unsigned int nbbs
= loop
->num_nodes
;
4620 for (unsigned int i
= 0; i
< nbbs
; i
++)
4622 basic_block bb
= bbs
[nbbs
- i
- 1];
4623 for (gimple_stmt_iterator si
= gsi_last_bb (bb
);
4624 !gsi_end_p (si
); gsi_prev (&si
))
4625 vect_determine_stmt_precisions
4626 (vinfo
->lookup_stmt (gsi_stmt (si
)));
4631 bb_vec_info bb_vinfo
= as_a
<bb_vec_info
> (vinfo
);
4632 gimple_stmt_iterator si
= bb_vinfo
->region_end
;
4637 si
= gsi_last_bb (bb_vinfo
->bb
);
4640 stmt
= gsi_stmt (si
);
4641 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (stmt
);
4642 if (stmt_info
&& STMT_VINFO_VECTORIZABLE (stmt_info
))
4643 vect_determine_stmt_precisions (stmt_info
);
4645 while (stmt
!= gsi_stmt (bb_vinfo
->region_begin
));
4649 typedef gimple
*(*vect_recog_func_ptr
) (stmt_vec_info
, tree
*);
4651 struct vect_recog_func
4653 vect_recog_func_ptr fn
;
4657 /* Note that ordering matters - the first pattern matching on a stmt is
4658 taken which means usually the more complex one needs to preceed the
4659 less comples onex (widen_sum only after dot_prod or sad for example). */
4660 static vect_recog_func vect_vect_recog_func_ptrs
[] = {
4661 { vect_recog_over_widening_pattern
, "over_widening" },
4662 /* Must come after over_widening, which narrows the shift as much as
4663 possible beforehand. */
4664 { vect_recog_average_pattern
, "average" },
4665 { vect_recog_cast_forwprop_pattern
, "cast_forwprop" },
4666 { vect_recog_widen_mult_pattern
, "widen_mult" },
4667 { vect_recog_dot_prod_pattern
, "dot_prod" },
4668 { vect_recog_sad_pattern
, "sad" },
4669 { vect_recog_widen_sum_pattern
, "widen_sum" },
4670 { vect_recog_pow_pattern
, "pow" },
4671 { vect_recog_widen_shift_pattern
, "widen_shift" },
4672 { vect_recog_rotate_pattern
, "rotate" },
4673 { vect_recog_vector_vector_shift_pattern
, "vector_vector_shift" },
4674 { vect_recog_divmod_pattern
, "divmod" },
4675 { vect_recog_mult_pattern
, "mult" },
4676 { vect_recog_mixed_size_cond_pattern
, "mixed_size_cond" },
4677 { vect_recog_bool_pattern
, "bool" },
4678 /* This must come before mask conversion, and includes the parts
4679 of mask conversion that are needed for gather and scatter
4680 internal functions. */
4681 { vect_recog_gather_scatter_pattern
, "gather_scatter" },
4682 { vect_recog_mask_conversion_pattern
, "mask_conversion" }
4685 const unsigned int NUM_PATTERNS
= ARRAY_SIZE (vect_vect_recog_func_ptrs
);
4687 /* Mark statements that are involved in a pattern. */
4690 vect_mark_pattern_stmts (stmt_vec_info orig_stmt_info
, gimple
*pattern_stmt
,
4691 tree pattern_vectype
)
4693 gimple
*def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
4695 gimple
*orig_pattern_stmt
= NULL
;
4696 if (is_pattern_stmt_p (orig_stmt_info
))
4698 /* We're replacing a statement in an existing pattern definition
4700 orig_pattern_stmt
= orig_stmt_info
->stmt
;
4701 if (dump_enabled_p ())
4702 dump_printf_loc (MSG_NOTE
, vect_location
,
4703 "replacing earlier pattern %G", orig_pattern_stmt
);
4705 /* To keep the book-keeping simple, just swap the lhs of the
4706 old and new statements, so that the old one has a valid but
4708 tree old_lhs
= gimple_get_lhs (orig_pattern_stmt
);
4709 gimple_set_lhs (orig_pattern_stmt
, gimple_get_lhs (pattern_stmt
));
4710 gimple_set_lhs (pattern_stmt
, old_lhs
);
4712 if (dump_enabled_p ())
4713 dump_printf_loc (MSG_NOTE
, vect_location
, "with %G", pattern_stmt
);
4715 /* Switch to the statement that ORIG replaces. */
4716 orig_stmt_info
= STMT_VINFO_RELATED_STMT (orig_stmt_info
);
4718 /* We shouldn't be replacing the main pattern statement. */
4719 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info
)->stmt
4720 != orig_pattern_stmt
);
4724 for (gimple_stmt_iterator si
= gsi_start (def_seq
);
4725 !gsi_end_p (si
); gsi_next (&si
))
4726 vect_init_pattern_stmt (gsi_stmt (si
), orig_stmt_info
, pattern_vectype
);
4728 if (orig_pattern_stmt
)
4730 vect_init_pattern_stmt (pattern_stmt
, orig_stmt_info
, pattern_vectype
);
4732 /* Insert all the new pattern statements before the original one. */
4733 gimple_seq
*orig_def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
4734 gimple_stmt_iterator gsi
= gsi_for_stmt (orig_pattern_stmt
,
4736 gsi_insert_seq_before_without_update (&gsi
, def_seq
, GSI_SAME_STMT
);
4737 gsi_insert_before_without_update (&gsi
, pattern_stmt
, GSI_SAME_STMT
);
4739 /* Remove the pattern statement that this new pattern replaces. */
4740 gsi_remove (&gsi
, false);
4743 vect_set_pattern_stmt (pattern_stmt
, orig_stmt_info
, pattern_vectype
);
4746 /* Function vect_pattern_recog_1
4749 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4750 computation pattern.
4751 STMT_INFO: A stmt from which the pattern search should start.
4753 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
4754 a sequence of statements that has the same functionality and can be
4755 used to replace STMT_INFO. It returns the last statement in the sequence
4756 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
4757 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
4758 statement, having first checked that the target supports the new operation
4761 This function also does some bookkeeping, as explained in the documentation
4762 for vect_recog_pattern. */
4765 vect_pattern_recog_1 (vect_recog_func
*recog_func
, stmt_vec_info stmt_info
)
4767 vec_info
*vinfo
= stmt_info
->vinfo
;
4768 gimple
*pattern_stmt
;
4769 loop_vec_info loop_vinfo
;
4770 tree pattern_vectype
;
4772 /* If this statement has already been replaced with pattern statements,
4773 leave the original statement alone, since the first match wins.
4774 Instead try to match against the definition statements that feed
4775 the main pattern statement. */
4776 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
4778 gimple_stmt_iterator gsi
;
4779 for (gsi
= gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
4780 !gsi_end_p (gsi
); gsi_next (&gsi
))
4781 vect_pattern_recog_1 (recog_func
, vinfo
->lookup_stmt (gsi_stmt (gsi
)));
4785 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
4786 pattern_stmt
= recog_func
->fn (stmt_info
, &pattern_vectype
);
4789 /* Clear any half-formed pattern definition sequence. */
4790 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
4794 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4795 gcc_assert (pattern_vectype
);
4797 /* Found a vectorizable pattern. */
4798 if (dump_enabled_p ())
4799 dump_printf_loc (MSG_NOTE
, vect_location
,
4800 "%s pattern recognized: %G",
4801 recog_func
->name
, pattern_stmt
);
4803 /* Mark the stmts that are involved in the pattern. */
4804 vect_mark_pattern_stmts (stmt_info
, pattern_stmt
, pattern_vectype
);
4806 /* Patterns cannot be vectorized using SLP, because they change the order of
4811 stmt_vec_info
*elem_ptr
;
4812 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo
), ix
, ix2
,
4813 elem_ptr
, *elem_ptr
== stmt_info
);
4818 /* Function vect_pattern_recog
4821 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4824 Output - for each computation idiom that is detected we create a new stmt
4825 that provides the same functionality and that can be vectorized. We
4826 also record some information in the struct_stmt_info of the relevant
4827 stmts, as explained below:
4829 At the entry to this function we have the following stmts, with the
4830 following initial value in the STMT_VINFO fields:
4832 stmt in_pattern_p related_stmt vec_stmt
4833 S1: a_i = .... - - -
4834 S2: a_2 = ..use(a_i).. - - -
4835 S3: a_1 = ..use(a_2).. - - -
4836 S4: a_0 = ..use(a_1).. - - -
4837 S5: ... = ..use(a_0).. - - -
4839 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4840 represented by a single stmt. We then:
4841 - create a new stmt S6 equivalent to the pattern (the stmt is not
4842 inserted into the code)
4843 - fill in the STMT_VINFO fields as follows:
4845 in_pattern_p related_stmt vec_stmt
4846 S1: a_i = .... - - -
4847 S2: a_2 = ..use(a_i).. - - -
4848 S3: a_1 = ..use(a_2).. - - -
4849 S4: a_0 = ..use(a_1).. true S6 -
4850 '---> S6: a_new = .... - S4 -
4851 S5: ... = ..use(a_0).. - - -
4853 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4854 to each other through the RELATED_STMT field).
4856 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4857 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4858 remain irrelevant unless used by stmts other than S4.
4860 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4861 (because they are marked as irrelevant). It will vectorize S6, and record
4862 a pointer to the new vector stmt VS6 from S6 (as usual).
4863 S4 will be skipped, and S5 will be vectorized as usual:
4865 in_pattern_p related_stmt vec_stmt
4866 S1: a_i = .... - - -
4867 S2: a_2 = ..use(a_i).. - - -
4868 S3: a_1 = ..use(a_2).. - - -
4869 > VS6: va_new = .... - - -
4870 S4: a_0 = ..use(a_1).. true S6 VS6
4871 '---> S6: a_new = .... - S4 VS6
4872 > VS5: ... = ..vuse(va_new).. - - -
4873 S5: ... = ..use(a_0).. - - -
4875 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4876 elsewhere), and we'll end up with:
4879 VS5: ... = ..vuse(va_new)..
4881 In case of more than one pattern statements, e.g., widen-mult with
4885 S2 a_T = (TYPE) a_t;
4886 '--> S3: a_it = (interm_type) a_t;
4887 S4 prod_T = a_T * CONST;
4888 '--> S5: prod_T' = a_it w* CONST;
4890 there may be other users of a_T outside the pattern. In that case S2 will
4891 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4892 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4893 be recorded in S3. */
4896 vect_pattern_recog (vec_info
*vinfo
)
4901 gimple_stmt_iterator si
;
4904 vect_determine_precisions (vinfo
);
4906 DUMP_VECT_SCOPE ("vect_pattern_recog");
4908 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
4910 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4911 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
4912 nbbs
= loop
->num_nodes
;
4914 /* Scan through the loop stmts, applying the pattern recognition
4915 functions starting at each stmt visited: */
4916 for (i
= 0; i
< nbbs
; i
++)
4918 basic_block bb
= bbs
[i
];
4919 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
4921 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi_stmt (si
));
4922 /* Scan over all generic vect_recog_xxx_pattern functions. */
4923 for (j
= 0; j
< NUM_PATTERNS
; j
++)
4924 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs
[j
],
4931 bb_vec_info bb_vinfo
= as_a
<bb_vec_info
> (vinfo
);
4932 for (si
= bb_vinfo
->region_begin
;
4933 gsi_stmt (si
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&si
))
4935 gimple
*stmt
= gsi_stmt (si
);
4936 stmt_vec_info stmt_info
= bb_vinfo
->lookup_stmt (stmt
);
4937 if (stmt_info
&& !STMT_VINFO_VECTORIZABLE (stmt_info
))
4940 /* Scan over all generic vect_recog_xxx_pattern functions. */
4941 for (j
= 0; j
< NUM_PATTERNS
; j
++)
4942 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs
[j
], stmt_info
);