1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
37 #include "gimple-iterator.h"
39 #include "tree-vectorizer.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
47 #include "omp-simd-clone.h"
50 /* Return true if we have a useful VR_RANGE range for VAR, storing it
51 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
54 vect_get_range_info (tree var
, wide_int
*min_value
, wide_int
*max_value
)
56 value_range_type vr_type
= get_range_info (var
, min_value
, max_value
);
57 wide_int nonzero
= get_nonzero_bits (var
);
58 signop sgn
= TYPE_SIGN (TREE_TYPE (var
));
59 if (intersect_range_with_nonzero_bits (vr_type
, min_value
, max_value
,
60 nonzero
, sgn
) == VR_RANGE
)
62 if (dump_enabled_p ())
64 dump_generic_expr_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, var
);
65 dump_printf (MSG_NOTE
, " has range [");
66 dump_hex (MSG_NOTE
, *min_value
);
67 dump_printf (MSG_NOTE
, ", ");
68 dump_hex (MSG_NOTE
, *max_value
);
69 dump_printf (MSG_NOTE
, "]\n");
75 if (dump_enabled_p ())
77 dump_generic_expr_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, var
);
78 dump_printf (MSG_NOTE
, " has no range info\n");
84 /* Report that we've found an instance of pattern PATTERN in
88 vect_pattern_detected (const char *name
, gimple
*stmt
)
90 if (dump_enabled_p ())
92 dump_printf_loc (MSG_NOTE
, vect_location
, "%s: detected: ", name
);
93 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
97 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
98 return the pattern statement's stmt_vec_info. Set its vector type to
99 VECTYPE if it doesn't have one already. */
102 vect_init_pattern_stmt (gimple
*pattern_stmt
, stmt_vec_info orig_stmt_info
,
105 vec_info
*vinfo
= orig_stmt_info
->vinfo
;
106 stmt_vec_info pattern_stmt_info
= vinfo
->lookup_stmt (pattern_stmt
);
107 if (pattern_stmt_info
== NULL_STMT_VEC_INFO
)
108 pattern_stmt_info
= orig_stmt_info
->vinfo
->add_stmt (pattern_stmt
);
109 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt_info
->stmt
));
111 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt_info
;
112 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
113 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
114 if (!STMT_VINFO_VECTYPE (pattern_stmt_info
))
115 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vectype
;
116 return pattern_stmt_info
;
119 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
120 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
124 vect_set_pattern_stmt (gimple
*pattern_stmt
, stmt_vec_info orig_stmt_info
,
127 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
128 STMT_VINFO_RELATED_STMT (orig_stmt_info
)
129 = vect_init_pattern_stmt (pattern_stmt
, orig_stmt_info
, vectype
);
132 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
133 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
134 be different from the vector type of the final pattern statement. */
137 append_pattern_def_seq (stmt_vec_info stmt_info
, gimple
*new_stmt
,
138 tree vectype
= NULL_TREE
)
140 vec_info
*vinfo
= stmt_info
->vinfo
;
143 stmt_vec_info new_stmt_info
= vinfo
->add_stmt (new_stmt
);
144 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
146 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
150 /* The caller wants to perform new operations on vect_external variable
151 VAR, so that the result of the operations would also be vect_external.
152 Return the edge on which the operations can be performed, if one exists.
153 Return null if the operations should instead be treated as part of
154 the pattern that needs them. */
157 vect_get_external_def_edge (vec_info
*vinfo
, tree var
)
160 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
162 e
= loop_preheader_edge (loop_vinfo
->loop
);
163 if (!SSA_NAME_IS_DEFAULT_DEF (var
))
165 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
167 || !dominated_by_p (CDI_DOMINATORS
, e
->dest
, bb
))
174 /* Return true if the target supports a vector version of CODE,
175 where CODE is known to map to a direct optab. ITYPE specifies
176 the type of (some of) the scalar inputs and OTYPE specifies the
177 type of the scalar result.
179 If CODE allows the inputs and outputs to have different type
180 (such as for WIDEN_SUM_EXPR), it is the input mode rather
181 than the output mode that determines the appropriate target pattern.
182 Operand 0 of the target pattern then specifies the mode that the output
185 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
186 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
190 vect_supportable_direct_optab_p (tree otype
, tree_code code
,
191 tree itype
, tree
*vecotype_out
,
192 tree
*vecitype_out
= NULL
)
194 tree vecitype
= get_vectype_for_scalar_type (itype
);
198 tree vecotype
= get_vectype_for_scalar_type (otype
);
202 optab optab
= optab_for_tree_code (code
, vecitype
, optab_default
);
206 insn_code icode
= optab_handler (optab
, TYPE_MODE (vecitype
));
207 if (icode
== CODE_FOR_nothing
208 || insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (vecotype
))
211 *vecotype_out
= vecotype
;
213 *vecitype_out
= vecitype
;
217 /* Round bit precision PRECISION up to a full element. */
220 vect_element_precision (unsigned int precision
)
222 precision
= 1 << ceil_log2 (precision
);
223 return MAX (precision
, BITS_PER_UNIT
);
226 /* If OP is defined by a statement that's being considered for vectorization,
227 return information about that statement, otherwise return NULL. */
230 vect_get_internal_def (vec_info
*vinfo
, tree op
)
232 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (op
);
234 && STMT_VINFO_DEF_TYPE (def_stmt_info
) == vect_internal_def
)
235 return def_stmt_info
;
239 /* Check whether NAME, an ssa-name used in STMT_VINFO,
240 is a result of a type promotion, such that:
241 DEF_STMT: NAME = NOP (name0)
242 If CHECK_SIGN is TRUE, check that either both types are signed or both are
246 type_conversion_p (tree name
, stmt_vec_info stmt_vinfo
, bool check_sign
,
247 tree
*orig_type
, gimple
**def_stmt
, bool *promotion
)
249 tree type
= TREE_TYPE (name
);
251 enum vect_def_type dt
;
253 stmt_vec_info def_stmt_info
;
254 if (!vect_is_simple_use (name
, stmt_vinfo
->vinfo
, &dt
, &def_stmt_info
,
258 if (dt
!= vect_internal_def
259 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
265 if (!is_gimple_assign (*def_stmt
))
268 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt
)))
271 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
273 *orig_type
= TREE_TYPE (oprnd0
);
274 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*orig_type
)
275 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*orig_type
)) && check_sign
))
278 if (TYPE_PRECISION (type
) >= (TYPE_PRECISION (*orig_type
) * 2))
283 if (!vect_is_simple_use (oprnd0
, stmt_vinfo
->vinfo
, &dt
))
289 /* Holds information about an input operand after some sign changes
290 and type promotions have been peeled away. */
291 struct vect_unpromoted_value
{
292 vect_unpromoted_value ();
294 void set_op (tree
, vect_def_type
, stmt_vec_info
= NULL
);
296 /* The value obtained after peeling away zero or more casts. */
299 /* The type of OP. */
302 /* The definition type of OP. */
305 /* If OP is the result of peeling at least one cast, and if the cast
306 of OP itself is a vectorizable statement, CASTER identifies that
307 statement, otherwise it is null. */
308 stmt_vec_info caster
;
311 inline vect_unpromoted_value::vect_unpromoted_value ()
314 dt (vect_uninitialized_def
),
319 /* Set the operand to OP_IN, its definition type to DT_IN, and the
320 statement that casts it to CASTER_IN. */
323 vect_unpromoted_value::set_op (tree op_in
, vect_def_type dt_in
,
324 stmt_vec_info caster_in
)
327 type
= TREE_TYPE (op
);
332 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
333 to reach some vectorizable inner operand OP', continuing as long as it
334 is possible to convert OP' back to OP using a possible sign change
335 followed by a possible promotion P. Return this OP', or null if OP is
336 not a vectorizable SSA name. If there is a promotion P, describe its
337 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
338 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
339 have more than one user.
341 A successful return means that it is possible to go from OP' to OP
342 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
343 whereas the cast from UNPROM to OP might be a promotion, a sign
348 signed short *ptr = ...;
349 signed short C = *ptr;
350 unsigned short B = (unsigned short) C; // sign change
351 signed int A = (signed int) B; // unsigned promotion
352 ...possible other uses of A...
353 unsigned int OP = (unsigned int) A; // sign change
355 In this case it's possible to go directly from C to OP using:
357 OP = (unsigned int) (unsigned short) C;
358 +------------+ +--------------+
359 promotion sign change
361 so OP' would be C. The input to the promotion is B, so UNPROM
365 vect_look_through_possible_promotion (vec_info
*vinfo
, tree op
,
366 vect_unpromoted_value
*unprom
,
367 bool *single_use_p
= NULL
)
369 tree res
= NULL_TREE
;
370 tree op_type
= TREE_TYPE (op
);
371 unsigned int orig_precision
= TYPE_PRECISION (op_type
);
372 stmt_vec_info caster
= NULL
;
373 while (TREE_CODE (op
) == SSA_NAME
&& INTEGRAL_TYPE_P (op_type
))
375 /* See whether OP is simple enough to vectorize. */
376 stmt_vec_info def_stmt_info
;
379 if (!vect_is_simple_use (op
, vinfo
, &dt
, &def_stmt_info
, &def_stmt
))
382 /* If OP is the input of a demotion, skip over it to see whether
383 OP is itself the result of a promotion. If so, the combined
384 effect of the promotion and the demotion might fit the required
385 pattern, otherwise neither operation fits.
387 This copes with cases such as the result of an arithmetic
388 operation being truncated before being stored, and where that
389 arithmetic operation has been recognized as an over-widened one. */
390 if (TYPE_PRECISION (op_type
) <= orig_precision
)
392 /* Use OP as the UNPROM described above if we haven't yet
393 found a promotion, or if using the new input preserves the
394 sign of the previous promotion. */
396 || TYPE_PRECISION (unprom
->type
) == orig_precision
397 || TYPE_SIGN (unprom
->type
) == TYPE_SIGN (op_type
))
398 unprom
->set_op (op
, dt
, caster
);
399 /* Stop if we've already seen a promotion and if this
400 conversion does more than change the sign. */
401 else if (TYPE_PRECISION (op_type
)
402 != TYPE_PRECISION (unprom
->type
))
405 /* The sequence now extends to OP. */
409 /* See whether OP is defined by a cast. Record it as CASTER if
410 the cast is potentially vectorizable. */
413 caster
= def_stmt_info
;
415 /* Ignore pattern statements, since we don't link uses for them. */
418 && !STMT_VINFO_RELATED_STMT (caster
)
419 && !has_single_use (res
))
420 *single_use_p
= false;
422 gassign
*assign
= dyn_cast
<gassign
*> (def_stmt
);
423 if (!assign
|| !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt
)))
426 /* Continue with the input to the cast. */
427 op
= gimple_assign_rhs1 (def_stmt
);
428 op_type
= TREE_TYPE (op
);
433 /* OP is an integer operand to an operation that returns TYPE, and we
434 want to treat the operation as a widening one. So far we can treat
435 it as widening from *COMMON_TYPE.
437 Return true if OP is suitable for such a widening operation,
438 either widening from *COMMON_TYPE or from some supertype of it.
439 Update *COMMON_TYPE to the supertype in the latter case.
441 SHIFT_P is true if OP is a shift amount. */
444 vect_joust_widened_integer (tree type
, bool shift_p
, tree op
,
447 /* Calculate the minimum precision required by OP, without changing
448 the sign of either operand. */
449 unsigned int precision
;
452 if (!wi::leu_p (wi::to_widest (op
), TYPE_PRECISION (type
) / 2))
454 precision
= TREE_INT_CST_LOW (op
);
458 precision
= wi::min_precision (wi::to_widest (op
),
459 TYPE_SIGN (*common_type
));
460 if (precision
* 2 > TYPE_PRECISION (type
))
464 /* If OP requires a wider type, switch to that type. The checks
465 above ensure that this is still narrower than the result. */
466 precision
= vect_element_precision (precision
);
467 if (TYPE_PRECISION (*common_type
) < precision
)
468 *common_type
= build_nonstandard_integer_type
469 (precision
, TYPE_UNSIGNED (*common_type
));
473 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
474 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
477 vect_joust_widened_type (tree type
, tree new_type
, tree
*common_type
)
479 if (types_compatible_p (*common_type
, new_type
))
482 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
483 if ((TYPE_PRECISION (new_type
) < TYPE_PRECISION (*common_type
))
484 && (TYPE_UNSIGNED (new_type
) || !TYPE_UNSIGNED (*common_type
)))
487 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
488 if (TYPE_PRECISION (*common_type
) < TYPE_PRECISION (new_type
)
489 && (TYPE_UNSIGNED (*common_type
) || !TYPE_UNSIGNED (new_type
)))
491 *common_type
= new_type
;
495 /* We have mismatched signs, with the signed type being
496 no wider than the unsigned type. In this case we need
497 a wider signed type. */
498 unsigned int precision
= MAX (TYPE_PRECISION (*common_type
),
499 TYPE_PRECISION (new_type
));
501 if (precision
* 2 > TYPE_PRECISION (type
))
504 *common_type
= build_nonstandard_integer_type (precision
, false);
508 /* Check whether STMT_INFO can be viewed as a tree of integer operations
509 in which each node either performs CODE or WIDENED_CODE, and where
510 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
511 specifies the maximum number of leaf operands. SHIFT_P says whether
512 CODE and WIDENED_CODE are some sort of shift.
514 If STMT_INFO is such a tree, return the number of leaf operands
515 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
516 to a type that (a) is narrower than the result of STMT_INFO and
517 (b) can hold all leaf operand values.
519 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
523 vect_widened_op_tree (stmt_vec_info stmt_info
, tree_code code
,
524 tree_code widened_code
, bool shift_p
,
525 unsigned int max_nops
,
526 vect_unpromoted_value
*unprom
, tree
*common_type
)
528 /* Check for an integer operation with the right code. */
529 vec_info
*vinfo
= stmt_info
->vinfo
;
530 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
534 tree_code rhs_code
= gimple_assign_rhs_code (assign
);
535 if (rhs_code
!= code
&& rhs_code
!= widened_code
)
538 tree type
= gimple_expr_type (assign
);
539 if (!INTEGRAL_TYPE_P (type
))
542 /* Assume that both operands will be leaf operands. */
545 /* Check the operands. */
546 unsigned int next_op
= 0;
547 for (unsigned int i
= 0; i
< 2; ++i
)
549 vect_unpromoted_value
*this_unprom
= &unprom
[next_op
];
550 unsigned int nops
= 1;
551 tree op
= gimple_op (assign
, i
+ 1);
552 if (i
== 1 && TREE_CODE (op
) == INTEGER_CST
)
554 /* We already have a common type from earlier operands.
555 Update it to account for OP. */
556 this_unprom
->set_op (op
, vect_constant_def
);
557 if (!vect_joust_widened_integer (type
, shift_p
, op
, common_type
))
562 /* Only allow shifts by constants. */
563 if (shift_p
&& i
== 1)
566 if (!vect_look_through_possible_promotion (stmt_info
->vinfo
, op
,
570 if (TYPE_PRECISION (this_unprom
->type
) == TYPE_PRECISION (type
))
572 /* The operand isn't widened. If STMT_INFO has the code
573 for an unwidened operation, recursively check whether
574 this operand is a node of the tree. */
577 || this_unprom
->dt
!= vect_internal_def
)
580 /* Give back the leaf slot allocated above now that we're
581 not treating this as a leaf operand. */
584 /* Recursively process the definition of the operand. */
585 stmt_vec_info def_stmt_info
586 = vinfo
->lookup_def (this_unprom
->op
);
587 nops
= vect_widened_op_tree (def_stmt_info
, code
, widened_code
,
588 shift_p
, max_nops
, this_unprom
,
597 /* Make sure that the operand is narrower than the result. */
598 if (TYPE_PRECISION (this_unprom
->type
) * 2
599 > TYPE_PRECISION (type
))
602 /* Update COMMON_TYPE for the new operand. */
604 *common_type
= this_unprom
->type
;
605 else if (!vect_joust_widened_type (type
, this_unprom
->type
,
615 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
616 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
619 vect_recog_temp_ssa_var (tree type
, gimple
*stmt
)
621 return make_temp_ssa_name (type
, stmt
, "patt");
624 /* STMT2_INFO describes a type conversion that could be split into STMT1
625 followed by a version of STMT2_INFO that takes NEW_RHS as its first
626 input. Try to do this using pattern statements, returning true on
630 vect_split_statement (stmt_vec_info stmt2_info
, tree new_rhs
,
631 gimple
*stmt1
, tree vectype
)
633 if (is_pattern_stmt_p (stmt2_info
))
635 /* STMT2_INFO is part of a pattern. Get the statement to which
636 the pattern is attached. */
637 stmt_vec_info orig_stmt2_info
= STMT_VINFO_RELATED_STMT (stmt2_info
);
638 vect_init_pattern_stmt (stmt1
, orig_stmt2_info
, vectype
);
640 if (dump_enabled_p ())
642 dump_printf_loc (MSG_NOTE
, vect_location
,
643 "Splitting pattern statement: ");
644 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt2_info
->stmt
, 0);
647 /* Since STMT2_INFO is a pattern statement, we can change it
648 in-situ without worrying about changing the code for the
650 gimple_assign_set_rhs1 (stmt2_info
->stmt
, new_rhs
);
652 if (dump_enabled_p ())
654 dump_printf_loc (MSG_NOTE
, vect_location
, "into: ");
655 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt1
, 0);
656 dump_printf_loc (MSG_NOTE
, vect_location
, "and: ");
657 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt2_info
->stmt
, 0);
660 gimple_seq
*def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info
);
661 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info
) == stmt2_info
)
662 /* STMT2_INFO is the actual pattern statement. Add STMT1
663 to the end of the definition sequence. */
664 gimple_seq_add_stmt_without_update (def_seq
, stmt1
);
667 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
669 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt2_info
->stmt
, def_seq
);
670 gsi_insert_before_without_update (&gsi
, stmt1
, GSI_SAME_STMT
);
676 /* STMT2_INFO doesn't yet have a pattern. Try to create a
677 two-statement pattern now. */
678 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info
));
679 tree lhs_type
= TREE_TYPE (gimple_get_lhs (stmt2_info
->stmt
));
680 tree lhs_vectype
= get_vectype_for_scalar_type (lhs_type
);
684 if (dump_enabled_p ())
686 dump_printf_loc (MSG_NOTE
, vect_location
,
687 "Splitting statement: ");
688 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt2_info
->stmt
, 0);
691 /* Add STMT1 as a singleton pattern definition sequence. */
692 gimple_seq
*def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info
);
693 vect_init_pattern_stmt (stmt1
, stmt2_info
, vectype
);
694 gimple_seq_add_stmt_without_update (def_seq
, stmt1
);
696 /* Build the second of the two pattern statements. */
697 tree new_lhs
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
698 gassign
*new_stmt2
= gimple_build_assign (new_lhs
, NOP_EXPR
, new_rhs
);
699 vect_set_pattern_stmt (new_stmt2
, stmt2_info
, lhs_vectype
);
701 if (dump_enabled_p ())
703 dump_printf_loc (MSG_NOTE
, vect_location
,
704 "into pattern statements: ");
705 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt1
, 0);
706 dump_printf_loc (MSG_NOTE
, vect_location
, "and: ");
707 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt2
, 0);
714 /* Convert UNPROM to TYPE and return the result, adding new statements
715 to STMT_INFO's pattern definition statements if no better way is
716 available. VECTYPE is the vector form of TYPE. */
719 vect_convert_input (stmt_vec_info stmt_info
, tree type
,
720 vect_unpromoted_value
*unprom
, tree vectype
)
722 /* Check for a no-op conversion. */
723 if (types_compatible_p (type
, TREE_TYPE (unprom
->op
)))
726 /* Allow the caller to create constant vect_unpromoted_values. */
727 if (TREE_CODE (unprom
->op
) == INTEGER_CST
)
728 return wide_int_to_tree (type
, wi::to_widest (unprom
->op
));
730 /* See if we can reuse an existing result. */
733 tree lhs
= gimple_get_lhs (unprom
->caster
->stmt
);
734 if (types_compatible_p (TREE_TYPE (lhs
), type
))
738 /* We need a new conversion statement. */
739 tree new_op
= vect_recog_temp_ssa_var (type
, NULL
);
740 gassign
*new_stmt
= gimple_build_assign (new_op
, NOP_EXPR
, unprom
->op
);
742 /* If the operation is the input to a vectorizable cast, try splitting
743 that cast into two, taking the required result as a mid-way point. */
746 tree lhs
= gimple_get_lhs (unprom
->caster
->stmt
);
747 if (TYPE_PRECISION (TREE_TYPE (lhs
)) > TYPE_PRECISION (type
)
748 && TYPE_PRECISION (type
) > TYPE_PRECISION (unprom
->type
)
749 && (TYPE_UNSIGNED (unprom
->type
) || !TYPE_UNSIGNED (type
))
750 && vect_split_statement (unprom
->caster
, new_op
, new_stmt
, vectype
))
754 /* If OP is an external value, see if we can insert the new statement
755 on an incoming edge. */
756 if (unprom
->dt
== vect_external_def
)
757 if (edge e
= vect_get_external_def_edge (stmt_info
->vinfo
, unprom
->op
))
759 basic_block new_bb
= gsi_insert_on_edge_immediate (e
, new_stmt
);
760 gcc_assert (!new_bb
);
764 /* As a (common) last resort, add the statement to the pattern itself. */
765 append_pattern_def_seq (stmt_info
, new_stmt
, vectype
);
769 /* Invoke vect_convert_input for N elements of UNPROM and store the
770 result in the corresponding elements of RESULT. */
773 vect_convert_inputs (stmt_vec_info stmt_info
, unsigned int n
,
774 tree
*result
, tree type
, vect_unpromoted_value
*unprom
,
777 for (unsigned int i
= 0; i
< n
; ++i
)
780 for (j
= 0; j
< i
; ++j
)
781 if (unprom
[j
].op
== unprom
[i
].op
)
784 result
[i
] = result
[j
];
786 result
[i
] = vect_convert_input (stmt_info
, type
, &unprom
[i
], vectype
);
790 /* The caller has created a (possibly empty) sequence of pattern definition
791 statements followed by a single statement PATTERN_STMT. Cast the result
792 of this final statement to TYPE. If a new statement is needed, add
793 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
794 and return the new statement, otherwise return PATTERN_STMT as-is.
795 VECITYPE is the vector form of PATTERN_STMT's result type. */
798 vect_convert_output (stmt_vec_info stmt_info
, tree type
, gimple
*pattern_stmt
,
801 tree lhs
= gimple_get_lhs (pattern_stmt
);
802 if (!types_compatible_p (type
, TREE_TYPE (lhs
)))
804 append_pattern_def_seq (stmt_info
, pattern_stmt
, vecitype
);
805 tree cast_var
= vect_recog_temp_ssa_var (type
, NULL
);
806 pattern_stmt
= gimple_build_assign (cast_var
, NOP_EXPR
, lhs
);
811 /* Return true if STMT_VINFO describes a reduction for which reassociation
812 is allowed. If STMT_INFO is part of a group, assume that it's part of
813 a reduction chain and optimistically assume that all statements
814 except the last allow reassociation. */
817 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo
)
819 return (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
820 ? STMT_VINFO_REDUC_TYPE (stmt_vinfo
) != FOLD_LEFT_REDUCTION
821 : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo
) != NULL_STMT_VEC_INFO
);
824 /* As above, but also require it to have code CODE and to be a reduction
825 in the outermost loop. When returning true, store the operands in
826 *OP0_OUT and *OP1_OUT. */
829 vect_reassociating_reduction_p (stmt_vec_info stmt_info
, tree_code code
,
830 tree
*op0_out
, tree
*op1_out
)
832 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
836 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
837 if (!assign
|| gimple_assign_rhs_code (assign
) != code
)
840 /* We don't allow changing the order of the computation in the inner-loop
841 when doing outer-loop vectorization. */
842 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
843 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
))
846 if (!vect_reassociating_reduction_p (stmt_info
))
849 *op0_out
= gimple_assign_rhs1 (assign
);
850 *op1_out
= gimple_assign_rhs2 (assign
);
854 /* Function vect_recog_dot_prod_pattern
856 Try to find the following pattern:
862 sum_0 = phi <init, sum_1>
865 S3 x_T = (TYPE1) x_t;
866 S4 y_T = (TYPE1) y_t;
868 [S6 prod = (TYPE2) prod; #optional]
869 S7 sum_1 = prod + sum_0;
871 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
872 same size of 'TYPE1' or bigger. This is a special case of a reduction
877 * STMT_VINFO: The stmt from which the pattern search begins. In the
878 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
883 * TYPE_OUT: The type of the output of this pattern.
885 * Return value: A new stmt that will be used to replace the sequence of
886 stmts that constitute the pattern. In this case it will be:
887 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
889 Note: The dot-prod idiom is a widening reduction pattern that is
890 vectorized without preserving all the intermediate results. It
891 produces only N/2 (widened) results (by summing up pairs of
892 intermediate results) rather than all N results. Therefore, we
893 cannot allow this pattern when we want to get all the results and in
894 the correct order (as is the case when this computation is in an
895 inner-loop nested in an outer-loop that us being vectorized). */
898 vect_recog_dot_prod_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
901 gimple
*last_stmt
= stmt_vinfo
->stmt
;
902 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
903 tree type
, half_type
;
904 gimple
*pattern_stmt
;
907 /* Look for the following pattern
911 DDPROD = (TYPE2) DPROD;
912 sum_1 = DDPROD + sum_0;
914 - DX is double the size of X
915 - DY is double the size of Y
916 - DX, DY, DPROD all have the same type
917 - sum is the same size of DPROD or bigger
918 - sum has been recognized as a reduction variable.
920 This is equivalent to:
921 DPROD = X w* Y; #widen mult
922 sum_1 = DPROD w+ sum_0; #widen summation
924 DPROD = X w* Y; #widen mult
925 sum_1 = DPROD + sum_0; #summation
928 /* Starting from LAST_STMT, follow the defs of its uses in search
929 of the above pattern. */
931 if (!vect_reassociating_reduction_p (stmt_vinfo
, PLUS_EXPR
,
935 type
= gimple_expr_type (last_stmt
);
937 vect_unpromoted_value unprom_mult
;
938 oprnd0
= vect_look_through_possible_promotion (vinfo
, oprnd0
, &unprom_mult
);
940 /* So far so good. Since last_stmt was detected as a (summation) reduction,
941 we know that oprnd1 is the reduction variable (defined by a loop-header
942 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
943 Left to check that oprnd0 is defined by a (widen_)mult_expr */
947 stmt_vec_info mult_vinfo
= vect_get_internal_def (vinfo
, oprnd0
);
951 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
952 inside the loop (in case we are analyzing an outer-loop). */
953 vect_unpromoted_value unprom0
[2];
954 if (!vect_widened_op_tree (mult_vinfo
, MULT_EXPR
, WIDEN_MULT_EXPR
,
955 false, 2, unprom0
, &half_type
))
958 /* If there are two widening operations, make sure they agree on
959 the sign of the extension. */
960 if (TYPE_PRECISION (unprom_mult
.type
) != TYPE_PRECISION (type
)
961 && TYPE_SIGN (unprom_mult
.type
) != TYPE_SIGN (half_type
))
964 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt
);
967 if (!vect_supportable_direct_optab_p (type
, DOT_PROD_EXPR
, half_type
,
968 type_out
, &half_vectype
))
971 /* Get the inputs in the appropriate types. */
973 vect_convert_inputs (stmt_vinfo
, 2, mult_oprnd
, half_type
,
974 unprom0
, half_vectype
);
976 var
= vect_recog_temp_ssa_var (type
, NULL
);
977 pattern_stmt
= gimple_build_assign (var
, DOT_PROD_EXPR
,
978 mult_oprnd
[0], mult_oprnd
[1], oprnd1
);
984 /* Function vect_recog_sad_pattern
986 Try to find the following Sum of Absolute Difference (SAD) pattern:
989 signed TYPE1 diff, abs_diff;
992 sum_0 = phi <init, sum_1>
995 S3 x_T = (TYPE1) x_t;
996 S4 y_T = (TYPE1) y_t;
998 S6 abs_diff = ABS_EXPR <diff>;
999 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1000 S8 sum_1 = abs_diff + sum_0;
1002 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1003 same size of 'TYPE1' or bigger. This is a special case of a reduction
1008 * STMT_VINFO: The stmt from which the pattern search begins. In the
1009 example, when this function is called with S8, the pattern
1010 {S3,S4,S5,S6,S7,S8} will be detected.
1014 * TYPE_OUT: The type of the output of this pattern.
1016 * Return value: A new stmt that will be used to replace the sequence of
1017 stmts that constitute the pattern. In this case it will be:
1018 SAD_EXPR <x_t, y_t, sum_0>
1022 vect_recog_sad_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
1024 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1025 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
1028 /* Look for the following pattern
1032 DAD = ABS_EXPR <DDIFF>;
1033 DDPROD = (TYPE2) DPROD;
1034 sum_1 = DAD + sum_0;
1036 - DX is at least double the size of X
1037 - DY is at least double the size of Y
1038 - DX, DY, DDIFF, DAD all have the same type
1039 - sum is the same size of DAD or bigger
1040 - sum has been recognized as a reduction variable.
1042 This is equivalent to:
1043 DDIFF = X w- Y; #widen sub
1044 DAD = ABS_EXPR <DDIFF>;
1045 sum_1 = DAD w+ sum_0; #widen summation
1047 DDIFF = X w- Y; #widen sub
1048 DAD = ABS_EXPR <DDIFF>;
1049 sum_1 = DAD + sum_0; #summation
1052 /* Starting from LAST_STMT, follow the defs of its uses in search
1053 of the above pattern. */
1055 tree plus_oprnd0
, plus_oprnd1
;
1056 if (!vect_reassociating_reduction_p (stmt_vinfo
, PLUS_EXPR
,
1057 &plus_oprnd0
, &plus_oprnd1
))
1060 tree sum_type
= gimple_expr_type (last_stmt
);
1062 /* Any non-truncating sequence of conversions is OK here, since
1063 with a successful match, the result of the ABS(U) is known to fit
1064 within the nonnegative range of the result type. (It cannot be the
1065 negative of the minimum signed value due to the range of the widening
1067 vect_unpromoted_value unprom_abs
;
1068 plus_oprnd0
= vect_look_through_possible_promotion (vinfo
, plus_oprnd0
,
1071 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1072 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1073 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1074 Then check that plus_oprnd0 is defined by an abs_expr. */
1079 stmt_vec_info abs_stmt_vinfo
= vect_get_internal_def (vinfo
, plus_oprnd0
);
1080 if (!abs_stmt_vinfo
)
1083 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1084 inside the loop (in case we are analyzing an outer-loop). */
1085 gassign
*abs_stmt
= dyn_cast
<gassign
*> (abs_stmt_vinfo
->stmt
);
1087 || (gimple_assign_rhs_code (abs_stmt
) != ABS_EXPR
1088 && gimple_assign_rhs_code (abs_stmt
) != ABSU_EXPR
))
1091 tree abs_oprnd
= gimple_assign_rhs1 (abs_stmt
);
1092 tree abs_type
= TREE_TYPE (abs_oprnd
);
1093 if (TYPE_UNSIGNED (abs_type
))
1096 /* Peel off conversions from the ABS input. This can involve sign
1097 changes (e.g. from an unsigned subtraction to a signed ABS input)
1098 or signed promotion, but it can't include unsigned promotion.
1099 (Note that ABS of an unsigned promotion should have been folded
1100 away before now anyway.) */
1101 vect_unpromoted_value unprom_diff
;
1102 abs_oprnd
= vect_look_through_possible_promotion (vinfo
, abs_oprnd
,
1106 if (TYPE_PRECISION (unprom_diff
.type
) != TYPE_PRECISION (abs_type
)
1107 && TYPE_UNSIGNED (unprom_diff
.type
))
1110 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1111 stmt_vec_info diff_stmt_vinfo
= vect_get_internal_def (vinfo
, abs_oprnd
);
1112 if (!diff_stmt_vinfo
)
1115 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1116 inside the loop (in case we are analyzing an outer-loop). */
1117 vect_unpromoted_value unprom
[2];
1118 if (!vect_widened_op_tree (diff_stmt_vinfo
, MINUS_EXPR
, MINUS_EXPR
,
1119 false, 2, unprom
, &half_type
))
1122 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt
);
1125 if (!vect_supportable_direct_optab_p (sum_type
, SAD_EXPR
, half_type
,
1126 type_out
, &half_vectype
))
1129 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1131 vect_convert_inputs (stmt_vinfo
, 2, sad_oprnd
, half_type
,
1132 unprom
, half_vectype
);
1134 tree var
= vect_recog_temp_ssa_var (sum_type
, NULL
);
1135 gimple
*pattern_stmt
= gimple_build_assign (var
, SAD_EXPR
, sad_oprnd
[0],
1136 sad_oprnd
[1], plus_oprnd1
);
1138 return pattern_stmt
;
1141 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1142 so that it can be treated as though it had the form:
1146 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1147 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1148 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1149 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1150 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1152 Try to replace the pattern with:
1156 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1157 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1158 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1159 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1161 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1163 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1164 name of the pattern being matched, for dump purposes. */
1167 vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info
, tree
*type_out
,
1168 tree_code orig_code
, tree_code wide_code
,
1169 bool shift_p
, const char *name
)
1171 gimple
*last_stmt
= last_stmt_info
->stmt
;
1173 vect_unpromoted_value unprom
[2];
1175 if (!vect_widened_op_tree (last_stmt_info
, orig_code
, orig_code
,
1176 shift_p
, 2, unprom
, &half_type
))
1179 /* Pattern detected. */
1180 vect_pattern_detected (name
, last_stmt
);
1182 tree type
= gimple_expr_type (last_stmt
);
1184 if (TYPE_PRECISION (type
) != TYPE_PRECISION (half_type
) * 2
1185 || TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type
))
1186 itype
= build_nonstandard_integer_type (TYPE_PRECISION (half_type
) * 2,
1187 TYPE_UNSIGNED (half_type
));
1189 /* Check target support */
1190 tree vectype
= get_vectype_for_scalar_type (half_type
);
1191 tree vecitype
= get_vectype_for_scalar_type (itype
);
1192 enum tree_code dummy_code
;
1194 auto_vec
<tree
> dummy_vec
;
1197 || !supportable_widening_operation (wide_code
, last_stmt_info
,
1199 &dummy_code
, &dummy_code
,
1200 &dummy_int
, &dummy_vec
))
1203 *type_out
= get_vectype_for_scalar_type (type
);
1208 vect_convert_inputs (last_stmt_info
, 2, oprnd
, half_type
, unprom
, vectype
);
1210 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
1211 gimple
*pattern_stmt
= gimple_build_assign (var
, wide_code
,
1212 oprnd
[0], oprnd
[1]);
1214 return vect_convert_output (last_stmt_info
, type
, pattern_stmt
, vecitype
);
1217 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1218 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1221 vect_recog_widen_mult_pattern (stmt_vec_info last_stmt_info
, tree
*type_out
)
1223 return vect_recog_widen_op_pattern (last_stmt_info
, type_out
, MULT_EXPR
,
1224 WIDEN_MULT_EXPR
, false,
1225 "vect_recog_widen_mult_pattern");
1228 /* Function vect_recog_pow_pattern
1230 Try to find the following pattern:
1234 with POW being one of pow, powf, powi, powif and N being
1239 * STMT_VINFO: The stmt from which the pattern search begins.
1243 * TYPE_OUT: The type of the output of this pattern.
1245 * Return value: A new stmt that will be used to replace the sequence of
1246 stmts that constitute the pattern. In this case it will be:
1253 vect_recog_pow_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
1255 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1260 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
1263 switch (gimple_call_combined_fn (last_stmt
))
1273 base
= gimple_call_arg (last_stmt
, 0);
1274 exp
= gimple_call_arg (last_stmt
, 1);
1275 if (TREE_CODE (exp
) != REAL_CST
1276 && TREE_CODE (exp
) != INTEGER_CST
)
1278 if (flag_unsafe_math_optimizations
1279 && TREE_CODE (base
) == REAL_CST
1280 && !gimple_call_internal_p (last_stmt
))
1282 combined_fn log_cfn
;
1283 built_in_function exp_bfn
;
1284 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt
)))
1287 log_cfn
= CFN_BUILT_IN_LOG
;
1288 exp_bfn
= BUILT_IN_EXP
;
1291 log_cfn
= CFN_BUILT_IN_LOGF
;
1292 exp_bfn
= BUILT_IN_EXPF
;
1295 log_cfn
= CFN_BUILT_IN_LOGL
;
1296 exp_bfn
= BUILT_IN_EXPL
;
1301 tree logc
= fold_const_call (log_cfn
, TREE_TYPE (base
), base
);
1302 tree exp_decl
= builtin_decl_implicit (exp_bfn
);
1303 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1304 does that, but if C is a power of 2, we want to use
1305 exp2 (log2 (C) * x) in the non-vectorized version, but for
1306 vectorization we don't have vectorized exp2. */
1308 && TREE_CODE (logc
) == REAL_CST
1310 && lookup_attribute ("omp declare simd",
1311 DECL_ATTRIBUTES (exp_decl
)))
1313 cgraph_node
*node
= cgraph_node::get_create (exp_decl
);
1314 if (node
->simd_clones
== NULL
)
1316 if (targetm
.simd_clone
.compute_vecsize_and_simdlen
== NULL
1317 || node
->definition
)
1319 expand_simd_clones (node
);
1320 if (node
->simd_clones
== NULL
)
1323 *type_out
= get_vectype_for_scalar_type (TREE_TYPE (base
));
1326 tree def
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1327 gimple
*g
= gimple_build_assign (def
, MULT_EXPR
, exp
, logc
);
1328 append_pattern_def_seq (stmt_vinfo
, g
);
1329 tree res
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1330 g
= gimple_build_call (exp_decl
, 1, def
);
1331 gimple_call_set_lhs (g
, res
);
1339 /* We now have a pow or powi builtin function call with a constant
1342 /* Catch squaring. */
1343 if ((tree_fits_shwi_p (exp
)
1344 && tree_to_shwi (exp
) == 2)
1345 || (TREE_CODE (exp
) == REAL_CST
1346 && real_equal (&TREE_REAL_CST (exp
), &dconst2
)))
1348 if (!vect_supportable_direct_optab_p (TREE_TYPE (base
), MULT_EXPR
,
1349 TREE_TYPE (base
), type_out
))
1352 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1353 stmt
= gimple_build_assign (var
, MULT_EXPR
, base
, base
);
1357 /* Catch square root. */
1358 if (TREE_CODE (exp
) == REAL_CST
1359 && real_equal (&TREE_REAL_CST (exp
), &dconsthalf
))
1361 *type_out
= get_vectype_for_scalar_type (TREE_TYPE (base
));
1363 && direct_internal_fn_supported_p (IFN_SQRT
, *type_out
,
1364 OPTIMIZE_FOR_SPEED
))
1366 gcall
*stmt
= gimple_build_call_internal (IFN_SQRT
, 1, base
);
1367 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
1368 gimple_call_set_lhs (stmt
, var
);
1369 gimple_call_set_nothrow (stmt
, true);
1378 /* Function vect_recog_widen_sum_pattern
1380 Try to find the following pattern:
1383 TYPE x_T, sum = init;
1385 sum_0 = phi <init, sum_1>
1387 S2 x_T = (TYPE) x_t;
1388 S3 sum_1 = x_T + sum_0;
1390 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1391 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1392 a special case of a reduction computation.
1396 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1397 when this function is called with S3, the pattern {S2,S3} will be detected.
1401 * TYPE_OUT: The type of the output of this pattern.
1403 * Return value: A new stmt that will be used to replace the sequence of
1404 stmts that constitute the pattern. In this case it will be:
1405 WIDEN_SUM <x_t, sum_0>
1407 Note: The widening-sum idiom is a widening reduction pattern that is
1408 vectorized without preserving all the intermediate results. It
1409 produces only N/2 (widened) results (by summing up pairs of
1410 intermediate results) rather than all N results. Therefore, we
1411 cannot allow this pattern when we want to get all the results and in
1412 the correct order (as is the case when this computation is in an
1413 inner-loop nested in an outer-loop that us being vectorized). */
1416 vect_recog_widen_sum_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
1418 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1419 tree oprnd0
, oprnd1
;
1420 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
1422 gimple
*pattern_stmt
;
1425 /* Look for the following pattern
1428 In which DX is at least double the size of X, and sum_1 has been
1429 recognized as a reduction variable.
1432 /* Starting from LAST_STMT, follow the defs of its uses in search
1433 of the above pattern. */
1435 if (!vect_reassociating_reduction_p (stmt_vinfo
, PLUS_EXPR
,
1439 type
= gimple_expr_type (last_stmt
);
1441 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1442 we know that oprnd1 is the reduction variable (defined by a loop-header
1443 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1444 Left to check that oprnd0 is defined by a cast from type 'type' to type
1447 vect_unpromoted_value unprom0
;
1448 if (!vect_look_through_possible_promotion (vinfo
, oprnd0
, &unprom0
)
1449 || TYPE_PRECISION (unprom0
.type
) * 2 > TYPE_PRECISION (type
))
1452 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt
);
1454 if (!vect_supportable_direct_optab_p (type
, WIDEN_SUM_EXPR
, unprom0
.type
,
1458 var
= vect_recog_temp_ssa_var (type
, NULL
);
1459 pattern_stmt
= gimple_build_assign (var
, WIDEN_SUM_EXPR
, unprom0
.op
, oprnd1
);
1461 return pattern_stmt
;
1464 /* Recognize cases in which an operation is performed in one type WTYPE
1465 but could be done more efficiently in a narrower type NTYPE. For example,
1468 ATYPE a; // narrower than NTYPE
1469 BTYPE b; // narrower than NTYPE
1470 WTYPE aw = (WTYPE) a;
1471 WTYPE bw = (WTYPE) b;
1472 WTYPE res = aw + bw; // only uses of aw and bw
1474 then it would be more efficient to do:
1476 NTYPE an = (NTYPE) a;
1477 NTYPE bn = (NTYPE) b;
1478 NTYPE resn = an + bn;
1479 WTYPE res = (WTYPE) resn;
1481 Other situations include things like:
1483 ATYPE a; // NTYPE or narrower
1484 WTYPE aw = (WTYPE) a;
1487 when only "(NTYPE) res" is significant. In that case it's more efficient
1488 to truncate "b" and do the operation on NTYPE instead:
1490 NTYPE an = (NTYPE) a;
1491 NTYPE bn = (NTYPE) b; // truncation
1492 NTYPE resn = an + bn;
1493 WTYPE res = (WTYPE) resn;
1495 All users of "res" should then use "resn" instead, making the final
1496 statement dead (not marked as relevant). The final statement is still
1497 needed to maintain the type correctness of the IR.
1499 vect_determine_precisions has already determined the minimum
1500 precison of the operation and the minimum precision required
1501 by users of the result. */
1504 vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info
, tree
*type_out
)
1506 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
1510 /* See whether we have found that this operation can be done on a
1511 narrower type without changing its semantics. */
1512 unsigned int new_precision
= last_stmt_info
->operation_precision
;
1516 vec_info
*vinfo
= last_stmt_info
->vinfo
;
1517 tree lhs
= gimple_assign_lhs (last_stmt
);
1518 tree type
= TREE_TYPE (lhs
);
1519 tree_code code
= gimple_assign_rhs_code (last_stmt
);
1521 /* Keep the first operand of a COND_EXPR as-is: only the other two
1522 operands are interesting. */
1523 unsigned int first_op
= (code
== COND_EXPR
? 2 : 1);
1525 /* Check the operands. */
1526 unsigned int nops
= gimple_num_ops (last_stmt
) - first_op
;
1527 auto_vec
<vect_unpromoted_value
, 3> unprom (nops
);
1528 unprom
.quick_grow (nops
);
1529 unsigned int min_precision
= 0;
1530 bool single_use_p
= false;
1531 for (unsigned int i
= 0; i
< nops
; ++i
)
1533 tree op
= gimple_op (last_stmt
, first_op
+ i
);
1534 if (TREE_CODE (op
) == INTEGER_CST
)
1535 unprom
[i
].set_op (op
, vect_constant_def
);
1536 else if (TREE_CODE (op
) == SSA_NAME
)
1538 bool op_single_use_p
= true;
1539 if (!vect_look_through_possible_promotion (vinfo
, op
, &unprom
[i
],
1544 (1) N bits of the result are needed;
1545 (2) all inputs are widened from M<N bits; and
1546 (3) one operand OP is a single-use SSA name
1548 we can shift the M->N widening from OP to the output
1549 without changing the number or type of extensions involved.
1550 This then reduces the number of copies of STMT_INFO.
1552 If instead of (3) more than one operand is a single-use SSA name,
1553 shifting the extension to the output is even more of a win.
1557 (1) N bits of the result are needed;
1558 (2) one operand OP2 is widened from M2<N bits;
1559 (3) another operand OP1 is widened from M1<M2 bits; and
1560 (4) both OP1 and OP2 are single-use
1562 the choice is between:
1564 (a) truncating OP2 to M1, doing the operation on M1,
1565 and then widening the result to N
1567 (b) widening OP1 to M2, doing the operation on M2, and then
1568 widening the result to N
1570 Both shift the M2->N widening of the inputs to the output.
1571 (a) additionally shifts the M1->M2 widening to the output;
1572 it requires fewer copies of STMT_INFO but requires an extra
1575 Which is better will depend on the complexity and cost of
1576 STMT_INFO, which is hard to predict at this stage. However,
1577 a clear tie-breaker in favor of (b) is the fact that the
1578 truncation in (a) increases the length of the operation chain.
1580 If instead of (4) only one of OP1 or OP2 is single-use,
1581 (b) is still a win over doing the operation in N bits:
1582 it still shifts the M2->N widening on the single-use operand
1583 to the output and reduces the number of STMT_INFO copies.
1585 If neither operand is single-use then operating on fewer than
1586 N bits might lead to more extensions overall. Whether it does
1587 or not depends on global information about the vectorization
1588 region, and whether that's a good trade-off would again
1589 depend on the complexity and cost of the statements involved,
1590 as well as things like register pressure that are not normally
1591 modelled at this stage. We therefore ignore these cases
1592 and just optimize the clear single-use wins above.
1594 Thus we take the maximum precision of the unpromoted operands
1595 and record whether any operand is single-use. */
1596 if (unprom
[i
].dt
== vect_internal_def
)
1598 min_precision
= MAX (min_precision
,
1599 TYPE_PRECISION (unprom
[i
].type
));
1600 single_use_p
|= op_single_use_p
;
1605 /* Although the operation could be done in operation_precision, we have
1606 to balance that against introducing extra truncations or extensions.
1607 Calculate the minimum precision that can be handled efficiently.
1609 The loop above determined that the operation could be handled
1610 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1611 extension from the inputs to the output without introducing more
1612 instructions, and would reduce the number of instructions required
1613 for STMT_INFO itself.
1615 vect_determine_precisions has also determined that the result only
1616 needs min_output_precision bits. Truncating by a factor of N times
1617 requires a tree of N - 1 instructions, so if TYPE is N times wider
1618 than min_output_precision, doing the operation in TYPE and truncating
1619 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1622 - truncating the input to a unary operation and doing the operation
1623 in the new type requires at most N - 1 + 1 = N instructions per
1626 - doing the same for a binary operation requires at most
1627 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1629 Both unary and binary operations require fewer instructions than
1630 this if the operands were extended from a suitable truncated form.
1631 Thus there is usually nothing to lose by doing operations in
1632 min_output_precision bits, but there can be something to gain. */
1634 min_precision
= last_stmt_info
->min_output_precision
;
1636 min_precision
= MIN (min_precision
, last_stmt_info
->min_output_precision
);
1638 /* Apply the minimum efficient precision we just calculated. */
1639 if (new_precision
< min_precision
)
1640 new_precision
= min_precision
;
1641 if (new_precision
>= TYPE_PRECISION (type
))
1644 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt
);
1646 *type_out
= get_vectype_for_scalar_type (type
);
1650 /* We've found a viable pattern. Get the new type of the operation. */
1651 bool unsigned_p
= (last_stmt_info
->operation_sign
== UNSIGNED
);
1652 tree new_type
= build_nonstandard_integer_type (new_precision
, unsigned_p
);
1654 /* We specifically don't check here whether the target supports the
1655 new operation, since it might be something that a later pattern
1656 wants to rewrite anyway. If targets have a minimum element size
1657 for some optabs, we should pattern-match smaller ops to larger ops
1658 where beneficial. */
1659 tree new_vectype
= get_vectype_for_scalar_type (new_type
);
1663 if (dump_enabled_p ())
1665 dump_printf_loc (MSG_NOTE
, vect_location
, "demoting ");
1666 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, type
);
1667 dump_printf (MSG_NOTE
, " to ");
1668 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, new_type
);
1669 dump_printf (MSG_NOTE
, "\n");
1672 /* Calculate the rhs operands for an operation on NEW_TYPE. */
1674 for (unsigned int i
= 1; i
< first_op
; ++i
)
1675 ops
[i
- 1] = gimple_op (last_stmt
, i
);
1676 vect_convert_inputs (last_stmt_info
, nops
, &ops
[first_op
- 1],
1677 new_type
, &unprom
[0], new_vectype
);
1679 /* Use the operation to produce a result of type NEW_TYPE. */
1680 tree new_var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1681 gimple
*pattern_stmt
= gimple_build_assign (new_var
, code
,
1682 ops
[0], ops
[1], ops
[2]);
1683 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
1685 if (dump_enabled_p ())
1687 dump_printf_loc (MSG_NOTE
, vect_location
,
1688 "created pattern stmt: ");
1689 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1692 pattern_stmt
= vect_convert_output (last_stmt_info
, type
,
1693 pattern_stmt
, new_vectype
);
1695 return pattern_stmt
;
1698 /* Recognize the patterns:
1700 ATYPE a; // narrower than TYPE
1701 BTYPE b; // narrower than TYPE
1702 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1703 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1705 where only the bottom half of avg is used. Try to transform them into:
1707 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1708 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1712 TYPE avg = (TYPE) avg';
1714 where NTYPE is no wider than half of TYPE. Since only the bottom half
1715 of avg is used, all or part of the cast of avg' should become redundant. */
1718 vect_recog_average_pattern (stmt_vec_info last_stmt_info
, tree
*type_out
)
1720 /* Check for a shift right by one bit. */
1721 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
1722 vec_info
*vinfo
= last_stmt_info
->vinfo
;
1724 || gimple_assign_rhs_code (last_stmt
) != RSHIFT_EXPR
1725 || !integer_onep (gimple_assign_rhs2 (last_stmt
)))
1728 /* Check that the shift result is wider than the users of the
1729 result need (i.e. that narrowing would be a natural choice). */
1730 tree lhs
= gimple_assign_lhs (last_stmt
);
1731 tree type
= TREE_TYPE (lhs
);
1732 unsigned int target_precision
1733 = vect_element_precision (last_stmt_info
->min_output_precision
);
1734 if (!INTEGRAL_TYPE_P (type
) || target_precision
>= TYPE_PRECISION (type
))
1737 /* Get the definition of the shift input. */
1738 tree rshift_rhs
= gimple_assign_rhs1 (last_stmt
);
1739 stmt_vec_info plus_stmt_info
= vect_get_internal_def (vinfo
, rshift_rhs
);
1740 if (!plus_stmt_info
)
1743 /* Check whether the shift input can be seen as a tree of additions on
1744 2 or 3 widened inputs.
1746 Note that the pattern should be a win even if the result of one or
1747 more additions is reused elsewhere: if the pattern matches, we'd be
1748 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
1749 internal_fn ifn
= IFN_AVG_FLOOR
;
1750 vect_unpromoted_value unprom
[3];
1752 unsigned int nops
= vect_widened_op_tree (plus_stmt_info
, PLUS_EXPR
,
1753 PLUS_EXPR
, false, 3,
1759 /* Check that one operand is 1. */
1761 for (i
= 0; i
< 3; ++i
)
1762 if (integer_onep (unprom
[i
].op
))
1766 /* Throw away the 1 operand and keep the other two. */
1768 unprom
[i
] = unprom
[2];
1772 vect_pattern_detected ("vect_recog_average_pattern", last_stmt
);
1776 (a) the operation can be viewed as:
1778 TYPE widened0 = (TYPE) UNPROM[0];
1779 TYPE widened1 = (TYPE) UNPROM[1];
1780 TYPE tmp1 = widened0 + widened1 {+ 1};
1781 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
1783 (b) the first two statements are equivalent to:
1785 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
1786 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
1788 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
1791 (d) all the operations can be performed correctly at twice the width of
1792 NEW_TYPE, due to the nature of the average operation; and
1794 (e) users of the result of the right shift need only TARGET_PRECISION
1795 bits, where TARGET_PRECISION is no more than half of TYPE's
1798 Under these circumstances, the only situation in which NEW_TYPE
1799 could be narrower than TARGET_PRECISION is if widened0, widened1
1800 and an addition result are all used more than once. Thus we can
1801 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
1802 as "free", whereas widening the result of the average instruction
1803 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
1804 therefore better not to go narrower than TARGET_PRECISION. */
1805 if (TYPE_PRECISION (new_type
) < target_precision
)
1806 new_type
= build_nonstandard_integer_type (target_precision
,
1807 TYPE_UNSIGNED (new_type
));
1809 /* Check for target support. */
1810 tree new_vectype
= get_vectype_for_scalar_type (new_type
);
1812 || !direct_internal_fn_supported_p (ifn
, new_vectype
,
1813 OPTIMIZE_FOR_SPEED
))
1816 /* The IR requires a valid vector type for the cast result, even though
1817 it's likely to be discarded. */
1818 *type_out
= get_vectype_for_scalar_type (type
);
1822 /* Generate the IFN_AVG* call. */
1823 tree new_var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1825 vect_convert_inputs (last_stmt_info
, 2, new_ops
, new_type
,
1826 unprom
, new_vectype
);
1827 gcall
*average_stmt
= gimple_build_call_internal (ifn
, 2, new_ops
[0],
1829 gimple_call_set_lhs (average_stmt
, new_var
);
1830 gimple_set_location (average_stmt
, gimple_location (last_stmt
));
1832 if (dump_enabled_p ())
1834 dump_printf_loc (MSG_NOTE
, vect_location
,
1835 "created pattern stmt: ");
1836 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, average_stmt
, 0);
1839 return vect_convert_output (last_stmt_info
, type
, average_stmt
, new_vectype
);
1842 /* Recognize cases in which the input to a cast is wider than its
1843 output, and the input is fed by a widening operation. Fold this
1844 by removing the unnecessary intermediate widening. E.g.:
1847 unsigned int b = (unsigned int) a;
1848 unsigned short c = (unsigned short) b;
1852 unsigned short c = (unsigned short) a;
1854 Although this is rare in input IR, it is an expected side-effect
1855 of the over-widening pattern above.
1857 This is beneficial also for integer-to-float conversions, if the
1858 widened integer has more bits than the float, and if the unwidened
1862 vect_recog_cast_forwprop_pattern (stmt_vec_info last_stmt_info
, tree
*type_out
)
1864 /* Check for a cast, including an integer-to-float conversion. */
1865 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
1868 tree_code code
= gimple_assign_rhs_code (last_stmt
);
1869 if (!CONVERT_EXPR_CODE_P (code
) && code
!= FLOAT_EXPR
)
1872 /* Make sure that the rhs is a scalar with a natural bitsize. */
1873 tree lhs
= gimple_assign_lhs (last_stmt
);
1876 tree lhs_type
= TREE_TYPE (lhs
);
1877 scalar_mode lhs_mode
;
1878 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type
)
1879 || !is_a
<scalar_mode
> (TYPE_MODE (lhs_type
), &lhs_mode
))
1882 /* Check for a narrowing operation (from a vector point of view). */
1883 tree rhs
= gimple_assign_rhs1 (last_stmt
);
1884 tree rhs_type
= TREE_TYPE (rhs
);
1885 if (!INTEGRAL_TYPE_P (rhs_type
)
1886 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type
)
1887 || TYPE_PRECISION (rhs_type
) <= GET_MODE_BITSIZE (lhs_mode
))
1890 /* Try to find an unpromoted input. */
1891 vec_info
*vinfo
= last_stmt_info
->vinfo
;
1892 vect_unpromoted_value unprom
;
1893 if (!vect_look_through_possible_promotion (vinfo
, rhs
, &unprom
)
1894 || TYPE_PRECISION (unprom
.type
) >= TYPE_PRECISION (rhs_type
))
1897 /* If the bits above RHS_TYPE matter, make sure that they're the
1898 same when extending from UNPROM as they are when extending from RHS. */
1899 if (!INTEGRAL_TYPE_P (lhs_type
)
1900 && TYPE_SIGN (rhs_type
) != TYPE_SIGN (unprom
.type
))
1903 /* We can get the same result by casting UNPROM directly, to avoid
1904 the unnecessary widening and narrowing. */
1905 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt
);
1907 *type_out
= get_vectype_for_scalar_type (lhs_type
);
1911 tree new_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
1912 gimple
*pattern_stmt
= gimple_build_assign (new_var
, code
, unprom
.op
);
1913 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
1915 return pattern_stmt
;
1918 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
1919 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
1922 vect_recog_widen_shift_pattern (stmt_vec_info last_stmt_info
, tree
*type_out
)
1924 return vect_recog_widen_op_pattern (last_stmt_info
, type_out
, LSHIFT_EXPR
,
1925 WIDEN_LSHIFT_EXPR
, true,
1926 "vect_recog_widen_shift_pattern");
1929 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1933 S0 a_t = b_t r<< c_t;
1937 * STMT_VINFO: The stmt from which the pattern search begins,
1938 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1942 S2 e_t = d_t & (B - 1);
1943 S3 f_t = b_t << c_t;
1944 S4 g_t = b_t >> e_t;
1947 where B is element bitsize of type.
1951 * TYPE_OUT: The type of the output of this pattern.
1953 * Return value: A new stmt that will be used to replace the rotate
1957 vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
1959 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1960 tree oprnd0
, oprnd1
, lhs
, var
, var1
, var2
, vectype
, type
, stype
, def
, def2
;
1961 gimple
*pattern_stmt
, *def_stmt
;
1962 enum tree_code rhs_code
;
1963 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
1964 enum vect_def_type dt
;
1965 optab optab1
, optab2
;
1966 edge ext_def
= NULL
;
1968 if (!is_gimple_assign (last_stmt
))
1971 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1981 lhs
= gimple_assign_lhs (last_stmt
);
1982 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1983 type
= TREE_TYPE (oprnd0
);
1984 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1985 if (TREE_CODE (oprnd0
) != SSA_NAME
1986 || TYPE_PRECISION (TREE_TYPE (lhs
)) != TYPE_PRECISION (type
)
1987 || !INTEGRAL_TYPE_P (type
)
1988 || !TYPE_UNSIGNED (type
))
1991 stmt_vec_info def_stmt_info
;
1992 if (!vect_is_simple_use (oprnd1
, vinfo
, &dt
, &def_stmt_info
, &def_stmt
))
1995 if (dt
!= vect_internal_def
1996 && dt
!= vect_constant_def
1997 && dt
!= vect_external_def
)
2000 vectype
= get_vectype_for_scalar_type (type
);
2001 if (vectype
== NULL_TREE
)
2004 /* If vector/vector or vector/scalar rotate is supported by the target,
2005 don't do anything here. */
2006 optab1
= optab_for_tree_code (rhs_code
, vectype
, optab_vector
);
2008 && optab_handler (optab1
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
2011 if (is_a
<bb_vec_info
> (vinfo
) || dt
!= vect_internal_def
)
2013 optab2
= optab_for_tree_code (rhs_code
, vectype
, optab_scalar
);
2015 && optab_handler (optab2
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
2019 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2020 don't do anything here either. */
2021 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_vector
);
2022 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_vector
);
2024 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
2026 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
2028 if (! is_a
<bb_vec_info
> (vinfo
) && dt
== vect_internal_def
)
2030 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_scalar
);
2031 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_scalar
);
2033 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
2035 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
2039 *type_out
= vectype
;
2041 if (dt
== vect_external_def
2042 && TREE_CODE (oprnd1
) == SSA_NAME
)
2043 ext_def
= vect_get_external_def_edge (vinfo
, oprnd1
);
2046 scalar_int_mode mode
= SCALAR_INT_TYPE_MODE (type
);
2047 if (TREE_CODE (oprnd1
) == INTEGER_CST
2048 || TYPE_MODE (TREE_TYPE (oprnd1
)) == mode
)
2050 else if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
2052 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
2053 if (TYPE_MODE (TREE_TYPE (rhs1
)) == mode
2054 && TYPE_PRECISION (TREE_TYPE (rhs1
))
2055 == TYPE_PRECISION (type
))
2059 if (def
== NULL_TREE
)
2061 def
= vect_recog_temp_ssa_var (type
, NULL
);
2062 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
2066 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
2067 gcc_assert (!new_bb
);
2070 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2072 stype
= TREE_TYPE (def
);
2073 scalar_int_mode smode
= SCALAR_INT_TYPE_MODE (stype
);
2075 if (TREE_CODE (def
) == INTEGER_CST
)
2077 if (!tree_fits_uhwi_p (def
)
2078 || tree_to_uhwi (def
) >= GET_MODE_PRECISION (mode
)
2079 || integer_zerop (def
))
2081 def2
= build_int_cst (stype
,
2082 GET_MODE_PRECISION (mode
) - tree_to_uhwi (def
));
2086 tree vecstype
= get_vectype_for_scalar_type (stype
);
2088 if (vecstype
== NULL_TREE
)
2090 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
2091 def_stmt
= gimple_build_assign (def2
, NEGATE_EXPR
, def
);
2095 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
2096 gcc_assert (!new_bb
);
2099 append_pattern_def_seq (stmt_vinfo
, def_stmt
, vecstype
);
2101 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
2102 tree mask
= build_int_cst (stype
, GET_MODE_PRECISION (smode
) - 1);
2103 def_stmt
= gimple_build_assign (def2
, BIT_AND_EXPR
,
2104 gimple_assign_lhs (def_stmt
), mask
);
2108 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
2109 gcc_assert (!new_bb
);
2112 append_pattern_def_seq (stmt_vinfo
, def_stmt
, vecstype
);
2115 var1
= vect_recog_temp_ssa_var (type
, NULL
);
2116 def_stmt
= gimple_build_assign (var1
, rhs_code
== LROTATE_EXPR
2117 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
2119 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2121 var2
= vect_recog_temp_ssa_var (type
, NULL
);
2122 def_stmt
= gimple_build_assign (var2
, rhs_code
== LROTATE_EXPR
2123 ? RSHIFT_EXPR
: LSHIFT_EXPR
,
2125 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2127 /* Pattern detected. */
2128 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt
);
2130 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2131 var
= vect_recog_temp_ssa_var (type
, NULL
);
2132 pattern_stmt
= gimple_build_assign (var
, BIT_IOR_EXPR
, var1
, var2
);
2134 return pattern_stmt
;
2137 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2145 S3 res_T = b_T op a_t;
2147 where type 'TYPE' is a type with different size than 'type',
2148 and op is <<, >> or rotate.
2153 TYPE b_T, c_T, res_T;
2156 S1 a_t = (type) c_T;
2158 S3 res_T = b_T op a_t;
2162 * STMT_VINFO: The stmt from which the pattern search begins,
2163 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2164 with a shift/rotate which has same type on both operands, in the
2165 second case just b_T op c_T, in the first case with added cast
2166 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2170 * TYPE_OUT: The type of the output of this pattern.
2172 * Return value: A new stmt that will be used to replace the shift/rotate
2176 vect_recog_vector_vector_shift_pattern (stmt_vec_info stmt_vinfo
,
2179 gimple
*last_stmt
= stmt_vinfo
->stmt
;
2180 tree oprnd0
, oprnd1
, lhs
, var
;
2181 gimple
*pattern_stmt
;
2182 enum tree_code rhs_code
;
2183 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
2185 if (!is_gimple_assign (last_stmt
))
2188 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2200 lhs
= gimple_assign_lhs (last_stmt
);
2201 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2202 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2203 if (TREE_CODE (oprnd0
) != SSA_NAME
2204 || TREE_CODE (oprnd1
) != SSA_NAME
2205 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
2206 || !type_has_mode_precision_p (TREE_TYPE (oprnd1
))
2207 || TYPE_PRECISION (TREE_TYPE (lhs
))
2208 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2211 stmt_vec_info def_vinfo
= vect_get_internal_def (vinfo
, oprnd1
);
2215 *type_out
= get_vectype_for_scalar_type (TREE_TYPE (oprnd0
));
2216 if (*type_out
== NULL_TREE
)
2219 tree def
= NULL_TREE
;
2220 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_vinfo
->stmt
);
2221 if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
2223 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
2224 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
2225 && TYPE_PRECISION (TREE_TYPE (rhs1
))
2226 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2228 if (TYPE_PRECISION (TREE_TYPE (oprnd1
))
2229 >= TYPE_PRECISION (TREE_TYPE (rhs1
)))
2234 = build_low_bits_mask (TREE_TYPE (rhs1
),
2235 TYPE_PRECISION (TREE_TYPE (oprnd1
)));
2236 def
= vect_recog_temp_ssa_var (TREE_TYPE (rhs1
), NULL
);
2237 def_stmt
= gimple_build_assign (def
, BIT_AND_EXPR
, rhs1
, mask
);
2238 tree vecstype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
2239 append_pattern_def_seq (stmt_vinfo
, def_stmt
, vecstype
);
2244 if (def
== NULL_TREE
)
2246 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2247 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
2248 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2251 /* Pattern detected. */
2252 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt
);
2254 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2255 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2256 pattern_stmt
= gimple_build_assign (var
, rhs_code
, oprnd0
, def
);
2258 return pattern_stmt
;
2261 /* Return true iff the target has a vector optab implementing the operation
2262 CODE on type VECTYPE. */
2265 target_has_vecop_for_code (tree_code code
, tree vectype
)
2267 optab voptab
= optab_for_tree_code (code
, vectype
, optab_vector
);
2269 && optab_handler (voptab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
;
2272 /* Verify that the target has optabs of VECTYPE to perform all the steps
2273 needed by the multiplication-by-immediate synthesis algorithm described by
2274 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2275 present. Return true iff the target supports all the steps. */
2278 target_supports_mult_synth_alg (struct algorithm
*alg
, mult_variant var
,
2279 tree vectype
, bool synth_shift_p
)
2281 if (alg
->op
[0] != alg_zero
&& alg
->op
[0] != alg_m
)
2284 bool supports_vminus
= target_has_vecop_for_code (MINUS_EXPR
, vectype
);
2285 bool supports_vplus
= target_has_vecop_for_code (PLUS_EXPR
, vectype
);
2287 if (var
== negate_variant
2288 && !target_has_vecop_for_code (NEGATE_EXPR
, vectype
))
2291 /* If we must synthesize shifts with additions make sure that vector
2292 addition is available. */
2293 if ((var
== add_variant
|| synth_shift_p
) && !supports_vplus
)
2296 for (int i
= 1; i
< alg
->ops
; i
++)
2304 case alg_add_factor
:
2305 if (!supports_vplus
)
2310 case alg_sub_factor
:
2311 if (!supports_vminus
)
2317 case alg_impossible
:
2327 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2328 putting the final result in DEST. Append all statements but the last into
2329 VINFO. Return the last statement. */
2332 synth_lshift_by_additions (tree dest
, tree op
, HOST_WIDE_INT amnt
,
2333 stmt_vec_info vinfo
)
2336 tree itype
= TREE_TYPE (op
);
2338 gcc_assert (amnt
>= 0);
2339 for (i
= 0; i
< amnt
; i
++)
2341 tree tmp_var
= (i
< amnt
- 1) ? vect_recog_temp_ssa_var (itype
, NULL
)
2344 = gimple_build_assign (tmp_var
, PLUS_EXPR
, prev_res
, prev_res
);
2347 append_pattern_def_seq (vinfo
, stmt
);
2355 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2356 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2357 the process if necessary. Append the resulting assignment statements
2358 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2359 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2360 left shifts using additions. */
2363 apply_binop_and_append_stmt (tree_code code
, tree op1
, tree op2
,
2364 stmt_vec_info stmt_vinfo
, bool synth_shift_p
)
2366 if (integer_zerop (op2
)
2367 && (code
== LSHIFT_EXPR
2368 || code
== PLUS_EXPR
))
2370 gcc_assert (TREE_CODE (op1
) == SSA_NAME
);
2375 tree itype
= TREE_TYPE (op1
);
2376 tree tmp_var
= vect_recog_temp_ssa_var (itype
, NULL
);
2378 if (code
== LSHIFT_EXPR
2381 stmt
= synth_lshift_by_additions (tmp_var
, op1
, TREE_INT_CST_LOW (op2
),
2383 append_pattern_def_seq (stmt_vinfo
, stmt
);
2387 stmt
= gimple_build_assign (tmp_var
, code
, op1
, op2
);
2388 append_pattern_def_seq (stmt_vinfo
, stmt
);
2392 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2393 and simple arithmetic operations to be vectorized. Record the statements
2394 produced in STMT_VINFO and return the last statement in the sequence or
2395 NULL if it's not possible to synthesize such a multiplication.
2396 This function mirrors the behavior of expand_mult_const in expmed.c but
2397 works on tree-ssa form. */
2400 vect_synth_mult_by_constant (tree op
, tree val
,
2401 stmt_vec_info stmt_vinfo
)
2403 tree itype
= TREE_TYPE (op
);
2404 machine_mode mode
= TYPE_MODE (itype
);
2405 struct algorithm alg
;
2406 mult_variant variant
;
2407 if (!tree_fits_shwi_p (val
))
2410 /* Multiplication synthesis by shifts, adds and subs can introduce
2411 signed overflow where the original operation didn't. Perform the
2412 operations on an unsigned type and cast back to avoid this.
2413 In the future we may want to relax this for synthesis algorithms
2414 that we can prove do not cause unexpected overflow. */
2415 bool cast_to_unsigned_p
= !TYPE_OVERFLOW_WRAPS (itype
);
2417 tree multtype
= cast_to_unsigned_p
? unsigned_type_for (itype
) : itype
;
2419 /* Targets that don't support vector shifts but support vector additions
2420 can synthesize shifts that way. */
2421 bool synth_shift_p
= !vect_supportable_shift (LSHIFT_EXPR
, multtype
);
2423 HOST_WIDE_INT hwval
= tree_to_shwi (val
);
2424 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2425 The vectorizer's benefit analysis will decide whether it's beneficial
2427 bool possible
= choose_mult_variant (mode
, hwval
, &alg
,
2428 &variant
, MAX_COST
);
2432 tree vectype
= get_vectype_for_scalar_type (multtype
);
2435 || !target_supports_mult_synth_alg (&alg
, variant
,
2436 vectype
, synth_shift_p
))
2441 /* Clear out the sequence of statements so we can populate it below. */
2442 gimple
*stmt
= NULL
;
2444 if (cast_to_unsigned_p
)
2446 tree tmp_op
= vect_recog_temp_ssa_var (multtype
, NULL
);
2447 stmt
= gimple_build_assign (tmp_op
, CONVERT_EXPR
, op
);
2448 append_pattern_def_seq (stmt_vinfo
, stmt
);
2452 if (alg
.op
[0] == alg_zero
)
2453 accumulator
= build_int_cst (multtype
, 0);
2457 bool needs_fixup
= (variant
== negate_variant
)
2458 || (variant
== add_variant
);
2460 for (int i
= 1; i
< alg
.ops
; i
++)
2462 tree shft_log
= build_int_cst (multtype
, alg
.log
[i
]);
2463 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
2464 tree tmp_var
= NULL_TREE
;
2471 = synth_lshift_by_additions (accum_tmp
, accumulator
, alg
.log
[i
],
2474 stmt
= gimple_build_assign (accum_tmp
, LSHIFT_EXPR
, accumulator
,
2479 = apply_binop_and_append_stmt (LSHIFT_EXPR
, op
, shft_log
,
2480 stmt_vinfo
, synth_shift_p
);
2481 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
,
2485 tmp_var
= apply_binop_and_append_stmt (LSHIFT_EXPR
, op
,
2486 shft_log
, stmt_vinfo
,
2488 /* In some algorithms the first step involves zeroing the
2489 accumulator. If subtracting from such an accumulator
2490 just emit the negation directly. */
2491 if (integer_zerop (accumulator
))
2492 stmt
= gimple_build_assign (accum_tmp
, NEGATE_EXPR
, tmp_var
);
2494 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, accumulator
,
2499 = apply_binop_and_append_stmt (LSHIFT_EXPR
, accumulator
, shft_log
,
2500 stmt_vinfo
, synth_shift_p
);
2501 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, tmp_var
, op
);
2505 = apply_binop_and_append_stmt (LSHIFT_EXPR
, accumulator
, shft_log
,
2506 stmt_vinfo
, synth_shift_p
);
2507 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, tmp_var
, op
);
2509 case alg_add_factor
:
2511 = apply_binop_and_append_stmt (LSHIFT_EXPR
, accumulator
, shft_log
,
2512 stmt_vinfo
, synth_shift_p
);
2513 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
,
2516 case alg_sub_factor
:
2518 = apply_binop_and_append_stmt (LSHIFT_EXPR
, accumulator
, shft_log
,
2519 stmt_vinfo
, synth_shift_p
);
2520 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, tmp_var
,
2526 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2527 but rather return it directly. */
2529 if ((i
< alg
.ops
- 1) || needs_fixup
|| cast_to_unsigned_p
)
2530 append_pattern_def_seq (stmt_vinfo
, stmt
);
2531 accumulator
= accum_tmp
;
2533 if (variant
== negate_variant
)
2535 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
2536 stmt
= gimple_build_assign (accum_tmp
, NEGATE_EXPR
, accumulator
);
2537 accumulator
= accum_tmp
;
2538 if (cast_to_unsigned_p
)
2539 append_pattern_def_seq (stmt_vinfo
, stmt
);
2541 else if (variant
== add_variant
)
2543 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
2544 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
, op
);
2545 accumulator
= accum_tmp
;
2546 if (cast_to_unsigned_p
)
2547 append_pattern_def_seq (stmt_vinfo
, stmt
);
2549 /* Move back to a signed if needed. */
2550 if (cast_to_unsigned_p
)
2552 tree accum_tmp
= vect_recog_temp_ssa_var (itype
, NULL
);
2553 stmt
= gimple_build_assign (accum_tmp
, CONVERT_EXPR
, accumulator
);
2559 /* Detect multiplication by constant and convert it into a sequence of
2560 shifts and additions, subtractions, negations. We reuse the
2561 choose_mult_variant algorithms from expmed.c
2565 STMT_VINFO: The stmt from which the pattern search begins,
2570 * TYPE_OUT: The type of the output of this pattern.
2572 * Return value: A new stmt that will be used to replace
2573 the multiplication. */
2576 vect_recog_mult_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
2578 gimple
*last_stmt
= stmt_vinfo
->stmt
;
2579 tree oprnd0
, oprnd1
, vectype
, itype
;
2580 gimple
*pattern_stmt
;
2582 if (!is_gimple_assign (last_stmt
))
2585 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
2588 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2589 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2590 itype
= TREE_TYPE (oprnd0
);
2592 if (TREE_CODE (oprnd0
) != SSA_NAME
2593 || TREE_CODE (oprnd1
) != INTEGER_CST
2594 || !INTEGRAL_TYPE_P (itype
)
2595 || !type_has_mode_precision_p (itype
))
2598 vectype
= get_vectype_for_scalar_type (itype
);
2599 if (vectype
== NULL_TREE
)
2602 /* If the target can handle vectorized multiplication natively,
2603 don't attempt to optimize this. */
2604 optab mul_optab
= optab_for_tree_code (MULT_EXPR
, vectype
, optab_default
);
2605 if (mul_optab
!= unknown_optab
)
2607 machine_mode vec_mode
= TYPE_MODE (vectype
);
2608 int icode
= (int) optab_handler (mul_optab
, vec_mode
);
2609 if (icode
!= CODE_FOR_nothing
)
2613 pattern_stmt
= vect_synth_mult_by_constant (oprnd0
, oprnd1
, stmt_vinfo
);
2617 /* Pattern detected. */
2618 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt
);
2620 *type_out
= vectype
;
2622 return pattern_stmt
;
2625 /* Detect a signed division by a constant that wouldn't be
2626 otherwise vectorized:
2632 where type 'type' is an integral type and N is a constant.
2634 Similarly handle modulo by a constant:
2640 * STMT_VINFO: The stmt from which the pattern search begins,
2641 i.e. the division stmt. S1 is replaced by if N is a power
2642 of two constant and type is signed:
2643 S3 y_t = b_t < 0 ? N - 1 : 0;
2645 S1' a_t = x_t >> log2 (N);
2647 S4 is replaced if N is a power of two constant and
2648 type is signed by (where *_T temporaries have unsigned type):
2649 S9 y_T = b_t < 0 ? -1U : 0U;
2650 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2651 S7 z_t = (type) z_T;
2653 S5 x_t = w_t & (N - 1);
2654 S4' a_t = x_t - z_t;
2658 * TYPE_OUT: The type of the output of this pattern.
2660 * Return value: A new stmt that will be used to replace the division
2661 S1 or modulo S4 stmt. */
2664 vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
2666 gimple
*last_stmt
= stmt_vinfo
->stmt
;
2667 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
2668 gimple
*pattern_stmt
, *def_stmt
;
2669 enum tree_code rhs_code
;
2672 int dummy_int
, prec
;
2674 if (!is_gimple_assign (last_stmt
))
2677 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2680 case TRUNC_DIV_EXPR
:
2681 case EXACT_DIV_EXPR
:
2682 case TRUNC_MOD_EXPR
:
2688 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2689 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2690 itype
= TREE_TYPE (oprnd0
);
2691 if (TREE_CODE (oprnd0
) != SSA_NAME
2692 || TREE_CODE (oprnd1
) != INTEGER_CST
2693 || TREE_CODE (itype
) != INTEGER_TYPE
2694 || !type_has_mode_precision_p (itype
))
2697 scalar_int_mode itype_mode
= SCALAR_INT_TYPE_MODE (itype
);
2698 vectype
= get_vectype_for_scalar_type (itype
);
2699 if (vectype
== NULL_TREE
)
2702 if (optimize_bb_for_size_p (gimple_bb (last_stmt
)))
2704 /* If the target can handle vectorized division or modulo natively,
2705 don't attempt to optimize this, since native division is likely
2706 to give smaller code. */
2707 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
2708 if (optab
!= unknown_optab
)
2710 machine_mode vec_mode
= TYPE_MODE (vectype
);
2711 int icode
= (int) optab_handler (optab
, vec_mode
);
2712 if (icode
!= CODE_FOR_nothing
)
2717 prec
= TYPE_PRECISION (itype
);
2718 if (integer_pow2p (oprnd1
))
2720 if (TYPE_UNSIGNED (itype
) || tree_int_cst_sgn (oprnd1
) != 1)
2723 /* Pattern detected. */
2724 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt
);
2726 cond
= build2 (LT_EXPR
, boolean_type_node
, oprnd0
,
2727 build_int_cst (itype
, 0));
2728 if (rhs_code
== TRUNC_DIV_EXPR
2729 || rhs_code
== EXACT_DIV_EXPR
)
2731 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
2734 = gimple_build_assign (var
, COND_EXPR
, cond
,
2735 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2736 build_int_cst (itype
, 1)),
2737 build_int_cst (itype
, 0));
2738 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2739 var
= vect_recog_temp_ssa_var (itype
, NULL
);
2741 = gimple_build_assign (var
, PLUS_EXPR
, oprnd0
,
2742 gimple_assign_lhs (def_stmt
));
2743 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2745 shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
2747 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2748 RSHIFT_EXPR
, var
, shift
);
2753 if (compare_tree_int (oprnd1
, 2) == 0)
2755 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2756 def_stmt
= gimple_build_assign (signmask
, COND_EXPR
, cond
,
2757 build_int_cst (itype
, 1),
2758 build_int_cst (itype
, 0));
2759 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2764 = build_nonstandard_integer_type (prec
, 1);
2765 tree vecutype
= get_vectype_for_scalar_type (utype
);
2767 = build_int_cst (utype
, GET_MODE_BITSIZE (itype_mode
)
2768 - tree_log2 (oprnd1
));
2769 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
2771 def_stmt
= gimple_build_assign (var
, COND_EXPR
, cond
,
2772 build_int_cst (utype
, -1),
2773 build_int_cst (utype
, 0));
2774 append_pattern_def_seq (stmt_vinfo
, def_stmt
, vecutype
);
2775 var
= vect_recog_temp_ssa_var (utype
, NULL
);
2776 def_stmt
= gimple_build_assign (var
, RSHIFT_EXPR
,
2777 gimple_assign_lhs (def_stmt
),
2779 append_pattern_def_seq (stmt_vinfo
, def_stmt
, vecutype
);
2780 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2782 = gimple_build_assign (signmask
, NOP_EXPR
, var
);
2783 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2786 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2787 PLUS_EXPR
, oprnd0
, signmask
);
2788 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2790 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2791 BIT_AND_EXPR
, gimple_assign_lhs (def_stmt
),
2792 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2793 build_int_cst (itype
, 1)));
2794 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2797 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2798 MINUS_EXPR
, gimple_assign_lhs (def_stmt
),
2802 *type_out
= vectype
;
2803 return pattern_stmt
;
2806 if (prec
> HOST_BITS_PER_WIDE_INT
2807 || integer_zerop (oprnd1
))
2810 if (!can_mult_highpart_p (TYPE_MODE (vectype
), TYPE_UNSIGNED (itype
)))
2813 if (TYPE_UNSIGNED (itype
))
2815 unsigned HOST_WIDE_INT mh
, ml
;
2816 int pre_shift
, post_shift
;
2817 unsigned HOST_WIDE_INT d
= (TREE_INT_CST_LOW (oprnd1
)
2818 & GET_MODE_MASK (itype_mode
));
2819 tree t1
, t2
, t3
, t4
;
2821 if (d
>= (HOST_WIDE_INT_1U
<< (prec
- 1)))
2822 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2825 /* Find a suitable multiplier and right shift count
2826 instead of multiplying with D. */
2827 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
2829 /* If the suggested multiplier is more than SIZE bits, we can do better
2830 for even divisors, using an initial right shift. */
2831 if (mh
!= 0 && (d
& 1) == 0)
2833 pre_shift
= ctz_or_zero (d
);
2834 mh
= choose_multiplier (d
>> pre_shift
, prec
, prec
- pre_shift
,
2835 &ml
, &post_shift
, &dummy_int
);
2843 if (post_shift
- 1 >= prec
)
2846 /* t1 = oprnd0 h* ml;
2850 q = t4 >> (post_shift - 1); */
2851 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2852 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2853 build_int_cst (itype
, ml
));
2854 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2856 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2858 = gimple_build_assign (t2
, MINUS_EXPR
, oprnd0
, t1
);
2859 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2861 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2863 = gimple_build_assign (t3
, RSHIFT_EXPR
, t2
, integer_one_node
);
2864 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2866 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2868 = gimple_build_assign (t4
, PLUS_EXPR
, t1
, t3
);
2870 if (post_shift
!= 1)
2872 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2874 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2876 = gimple_build_assign (q
, RSHIFT_EXPR
, t4
,
2877 build_int_cst (itype
, post_shift
- 1));
2882 pattern_stmt
= def_stmt
;
2887 if (pre_shift
>= prec
|| post_shift
>= prec
)
2890 /* t1 = oprnd0 >> pre_shift;
2892 q = t2 >> post_shift; */
2895 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2897 = gimple_build_assign (t1
, RSHIFT_EXPR
, oprnd0
,
2898 build_int_cst (NULL
, pre_shift
));
2899 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2904 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2905 def_stmt
= gimple_build_assign (t2
, MULT_HIGHPART_EXPR
, t1
,
2906 build_int_cst (itype
, ml
));
2910 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2912 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2914 = gimple_build_assign (q
, RSHIFT_EXPR
, t2
,
2915 build_int_cst (itype
, post_shift
));
2920 pattern_stmt
= def_stmt
;
2925 unsigned HOST_WIDE_INT ml
;
2927 HOST_WIDE_INT d
= TREE_INT_CST_LOW (oprnd1
);
2928 unsigned HOST_WIDE_INT abs_d
;
2930 tree t1
, t2
, t3
, t4
;
2932 /* Give up for -1. */
2936 /* Since d might be INT_MIN, we have to cast to
2937 unsigned HOST_WIDE_INT before negating to avoid
2938 undefined signed overflow. */
2940 ? (unsigned HOST_WIDE_INT
) d
2941 : - (unsigned HOST_WIDE_INT
) d
);
2943 /* n rem d = n rem -d */
2944 if (rhs_code
== TRUNC_MOD_EXPR
&& d
< 0)
2947 oprnd1
= build_int_cst (itype
, abs_d
);
2949 else if (HOST_BITS_PER_WIDE_INT
>= prec
2950 && abs_d
== HOST_WIDE_INT_1U
<< (prec
- 1))
2951 /* This case is not handled correctly below. */
2954 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
, &post_shift
, &dummy_int
);
2955 if (ml
>= HOST_WIDE_INT_1U
<< (prec
- 1))
2958 ml
|= HOST_WIDE_INT_M1U
<< (prec
- 1);
2960 if (post_shift
>= prec
)
2963 /* t1 = oprnd0 h* ml; */
2964 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2965 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2966 build_int_cst (itype
, ml
));
2970 /* t2 = t1 + oprnd0; */
2971 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2972 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2973 def_stmt
= gimple_build_assign (t2
, PLUS_EXPR
, t1
, oprnd0
);
2980 /* t3 = t2 >> post_shift; */
2981 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2982 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2983 def_stmt
= gimple_build_assign (t3
, RSHIFT_EXPR
, t2
,
2984 build_int_cst (itype
, post_shift
));
2989 wide_int oprnd0_min
, oprnd0_max
;
2991 if (get_range_info (oprnd0
, &oprnd0_min
, &oprnd0_max
) == VR_RANGE
)
2993 if (!wi::neg_p (oprnd0_min
, TYPE_SIGN (itype
)))
2995 else if (wi::neg_p (oprnd0_max
, TYPE_SIGN (itype
)))
2999 if (msb
== 0 && d
>= 0)
3003 pattern_stmt
= def_stmt
;
3007 /* t4 = oprnd0 >> (prec - 1);
3008 or if we know from VRP that oprnd0 >= 0
3010 or if we know from VRP that oprnd0 < 0
3012 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
3013 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
3015 def_stmt
= gimple_build_assign (t4
, INTEGER_CST
,
3016 build_int_cst (itype
, msb
));
3018 def_stmt
= gimple_build_assign (t4
, RSHIFT_EXPR
, oprnd0
,
3019 build_int_cst (itype
, prec
- 1));
3020 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
3022 /* q = t3 - t4; or q = t4 - t3; */
3023 q
= vect_recog_temp_ssa_var (itype
, NULL
);
3024 pattern_stmt
= gimple_build_assign (q
, MINUS_EXPR
, d
< 0 ? t4
: t3
,
3029 if (rhs_code
== TRUNC_MOD_EXPR
)
3033 /* We divided. Now finish by:
3036 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
3038 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
3039 def_stmt
= gimple_build_assign (t1
, MULT_EXPR
, q
, oprnd1
);
3040 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
3042 r
= vect_recog_temp_ssa_var (itype
, NULL
);
3043 pattern_stmt
= gimple_build_assign (r
, MINUS_EXPR
, oprnd0
, t1
);
3046 /* Pattern detected. */
3047 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt
);
3049 *type_out
= vectype
;
3050 return pattern_stmt
;
3053 /* Function vect_recog_mixed_size_cond_pattern
3055 Try to find the following pattern:
3060 S1 a_T = x_t CMP y_t ? b_T : c_T;
3062 where type 'TYPE' is an integral type which has different size
3063 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3064 than 'type', the constants need to fit into an integer type
3065 with the same width as 'type') or results of conversion from 'type'.
3069 * STMT_VINFO: The stmt from which the pattern search begins.
3073 * TYPE_OUT: The type of the output of this pattern.
3075 * Return value: A new stmt that will be used to replace the pattern.
3076 Additionally a def_stmt is added.
3078 a_it = x_t CMP y_t ? b_it : c_it;
3079 a_T = (TYPE) a_it; */
3082 vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
3084 gimple
*last_stmt
= stmt_vinfo
->stmt
;
3085 tree cond_expr
, then_clause
, else_clause
;
3086 tree type
, vectype
, comp_vectype
, itype
= NULL_TREE
, vecitype
;
3087 gimple
*pattern_stmt
, *def_stmt
;
3088 tree orig_type0
= NULL_TREE
, orig_type1
= NULL_TREE
;
3089 gimple
*def_stmt0
= NULL
, *def_stmt1
= NULL
;
3091 tree comp_scalar_type
;
3093 if (!is_gimple_assign (last_stmt
)
3094 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
3095 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
3098 cond_expr
= gimple_assign_rhs1 (last_stmt
);
3099 then_clause
= gimple_assign_rhs2 (last_stmt
);
3100 else_clause
= gimple_assign_rhs3 (last_stmt
);
3102 if (!COMPARISON_CLASS_P (cond_expr
))
3105 comp_scalar_type
= TREE_TYPE (TREE_OPERAND (cond_expr
, 0));
3106 comp_vectype
= get_vectype_for_scalar_type (comp_scalar_type
);
3107 if (comp_vectype
== NULL_TREE
)
3110 type
= gimple_expr_type (last_stmt
);
3111 if (types_compatible_p (type
, comp_scalar_type
)
3112 || ((TREE_CODE (then_clause
) != INTEGER_CST
3113 || TREE_CODE (else_clause
) != INTEGER_CST
)
3114 && !INTEGRAL_TYPE_P (comp_scalar_type
))
3115 || !INTEGRAL_TYPE_P (type
))
3118 if ((TREE_CODE (then_clause
) != INTEGER_CST
3119 && !type_conversion_p (then_clause
, stmt_vinfo
, false, &orig_type0
,
3120 &def_stmt0
, &promotion
))
3121 || (TREE_CODE (else_clause
) != INTEGER_CST
3122 && !type_conversion_p (else_clause
, stmt_vinfo
, false, &orig_type1
,
3123 &def_stmt1
, &promotion
)))
3126 if (orig_type0
&& orig_type1
3127 && !types_compatible_p (orig_type0
, orig_type1
))
3132 if (!types_compatible_p (orig_type0
, comp_scalar_type
))
3134 then_clause
= gimple_assign_rhs1 (def_stmt0
);
3140 if (!types_compatible_p (orig_type1
, comp_scalar_type
))
3142 else_clause
= gimple_assign_rhs1 (def_stmt1
);
3147 HOST_WIDE_INT cmp_mode_size
3148 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype
));
3150 scalar_int_mode type_mode
= SCALAR_INT_TYPE_MODE (type
);
3151 if (GET_MODE_BITSIZE (type_mode
) == cmp_mode_size
)
3154 vectype
= get_vectype_for_scalar_type (type
);
3155 if (vectype
== NULL_TREE
)
3158 if (expand_vec_cond_expr_p (vectype
, comp_vectype
, TREE_CODE (cond_expr
)))
3161 if (itype
== NULL_TREE
)
3162 itype
= build_nonstandard_integer_type (cmp_mode_size
,
3163 TYPE_UNSIGNED (type
));
3165 if (itype
== NULL_TREE
3166 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype
)) != cmp_mode_size
)
3169 vecitype
= get_vectype_for_scalar_type (itype
);
3170 if (vecitype
== NULL_TREE
)
3173 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
, TREE_CODE (cond_expr
)))
3176 if (GET_MODE_BITSIZE (type_mode
) > cmp_mode_size
)
3178 if ((TREE_CODE (then_clause
) == INTEGER_CST
3179 && !int_fits_type_p (then_clause
, itype
))
3180 || (TREE_CODE (else_clause
) == INTEGER_CST
3181 && !int_fits_type_p (else_clause
, itype
)))
3185 def_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3186 COND_EXPR
, unshare_expr (cond_expr
),
3187 fold_convert (itype
, then_clause
),
3188 fold_convert (itype
, else_clause
));
3189 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
3190 NOP_EXPR
, gimple_assign_lhs (def_stmt
));
3192 append_pattern_def_seq (stmt_vinfo
, def_stmt
, vecitype
);
3193 *type_out
= vectype
;
3195 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt
);
3197 return pattern_stmt
;
3201 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3202 true if bool VAR can and should be optimized that way. Assume it shouldn't
3203 in case it's a result of a comparison which can be directly vectorized into
3204 a vector comparison. Fills in STMTS with all stmts visited during the
3208 check_bool_pattern (tree var
, vec_info
*vinfo
, hash_set
<gimple
*> &stmts
)
3211 enum tree_code rhs_code
;
3213 stmt_vec_info def_stmt_info
= vect_get_internal_def (vinfo
, var
);
3217 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_stmt_info
->stmt
);
3221 if (stmts
.contains (def_stmt
))
3224 rhs1
= gimple_assign_rhs1 (def_stmt
);
3225 rhs_code
= gimple_assign_rhs_code (def_stmt
);
3229 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
3234 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
3236 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
3241 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
3248 if (! check_bool_pattern (rhs1
, vinfo
, stmts
)
3249 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt
), vinfo
, stmts
))
3254 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
3256 tree vecitype
, comp_vectype
;
3258 /* If the comparison can throw, then is_gimple_condexpr will be
3259 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3260 if (stmt_could_throw_p (def_stmt
))
3263 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
3264 if (comp_vectype
== NULL_TREE
)
3267 tree mask_type
= get_mask_type_for_scalar_type (TREE_TYPE (rhs1
));
3269 && expand_vec_cmp_expr_p (comp_vectype
, mask_type
, rhs_code
))
3272 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
3274 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
3276 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3277 vecitype
= get_vectype_for_scalar_type (itype
);
3278 if (vecitype
== NULL_TREE
)
3282 vecitype
= comp_vectype
;
3283 if (! expand_vec_cond_expr_p (vecitype
, comp_vectype
, rhs_code
))
3291 bool res
= stmts
.add (def_stmt
);
3292 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3299 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3300 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3301 pattern sequence. */
3304 adjust_bool_pattern_cast (tree type
, tree var
, stmt_vec_info stmt_info
)
3306 gimple
*cast_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
3308 append_pattern_def_seq (stmt_info
, cast_stmt
,
3309 get_vectype_for_scalar_type (type
));
3310 return gimple_assign_lhs (cast_stmt
);
3313 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3314 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3315 type, OUT_TYPE is the desired final integer type of the whole pattern.
3316 STMT_INFO is the info of the pattern root and is where pattern stmts should
3317 be associated with. DEFS is a map of pattern defs. */
3320 adjust_bool_pattern (tree var
, tree out_type
,
3321 stmt_vec_info stmt_info
, hash_map
<tree
, tree
> &defs
)
3323 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
3324 enum tree_code rhs_code
, def_rhs_code
;
3325 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
3327 gimple
*pattern_stmt
, *def_stmt
;
3328 tree trueval
= NULL_TREE
;
3330 rhs1
= gimple_assign_rhs1 (stmt
);
3331 rhs2
= gimple_assign_rhs2 (stmt
);
3332 rhs_code
= gimple_assign_rhs_code (stmt
);
3333 loc
= gimple_location (stmt
);
3338 irhs1
= *defs
.get (rhs1
);
3339 itype
= TREE_TYPE (irhs1
);
3341 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3346 irhs1
= *defs
.get (rhs1
);
3347 itype
= TREE_TYPE (irhs1
);
3349 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3350 BIT_XOR_EXPR
, irhs1
, build_int_cst (itype
, 1));
3354 /* Try to optimize x = y & (a < b ? 1 : 0); into
3355 x = (a < b ? y : 0);
3361 S1 a_b = x1 CMP1 y1;
3362 S2 b_b = x2 CMP2 y2;
3364 S4 d_T = (TYPE) c_b;
3366 we would normally emit:
3368 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3369 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3370 S3' c_T = a_T & b_T;
3373 but we can save one stmt by using the
3374 result of one of the COND_EXPRs in the other COND_EXPR and leave
3375 BIT_AND_EXPR stmt out:
3377 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3378 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3381 At least when VEC_COND_EXPR is implemented using masks
3382 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3383 computes the comparison masks and ands it, in one case with
3384 all ones vector, in the other case with a vector register.
3385 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3386 often more expensive. */
3387 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
3388 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3389 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3391 irhs1
= *defs
.get (rhs1
);
3392 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3393 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3394 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1
))))
3396 rhs_code
= def_rhs_code
;
3398 rhs2
= gimple_assign_rhs2 (def_stmt
);
3403 irhs2
= *defs
.get (rhs2
);
3406 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
3407 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3408 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3410 irhs2
= *defs
.get (rhs2
);
3411 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3412 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
3413 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1
))))
3415 rhs_code
= def_rhs_code
;
3417 rhs2
= gimple_assign_rhs2 (def_stmt
);
3422 irhs1
= *defs
.get (rhs1
);
3428 irhs1
= *defs
.get (rhs1
);
3429 irhs2
= *defs
.get (rhs2
);
3431 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3432 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
3434 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
3435 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
3436 int out_prec
= TYPE_PRECISION (out_type
);
3437 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
3438 irhs2
= adjust_bool_pattern_cast (TREE_TYPE (irhs1
), irhs2
,
3440 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
3441 irhs1
= adjust_bool_pattern_cast (TREE_TYPE (irhs2
), irhs1
,
3445 irhs1
= adjust_bool_pattern_cast (out_type
, irhs1
, stmt_info
);
3446 irhs2
= adjust_bool_pattern_cast (out_type
, irhs2
, stmt_info
);
3449 itype
= TREE_TYPE (irhs1
);
3451 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3452 rhs_code
, irhs1
, irhs2
);
3457 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
3458 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
3459 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
))
3460 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1
)),
3461 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1
)))))
3463 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
3465 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3468 itype
= TREE_TYPE (rhs1
);
3469 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
3470 if (trueval
== NULL_TREE
)
3471 trueval
= build_int_cst (itype
, 1);
3473 gcc_checking_assert (useless_type_conversion_p (itype
,
3474 TREE_TYPE (trueval
)));
3476 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3477 COND_EXPR
, cond_expr
, trueval
,
3478 build_int_cst (itype
, 0));
3482 gimple_set_location (pattern_stmt
, loc
);
3483 append_pattern_def_seq (stmt_info
, pattern_stmt
,
3484 get_vectype_for_scalar_type (itype
));
3485 defs
.put (var
, gimple_assign_lhs (pattern_stmt
));
3488 /* Comparison function to qsort a vector of gimple stmts after UID. */
3491 sort_after_uid (const void *p1
, const void *p2
)
3493 const gimple
*stmt1
= *(const gimple
* const *)p1
;
3494 const gimple
*stmt2
= *(const gimple
* const *)p2
;
3495 return gimple_uid (stmt1
) - gimple_uid (stmt2
);
3498 /* Create pattern stmts for all stmts participating in the bool pattern
3499 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
3500 OUT_TYPE. Return the def of the pattern root. */
3503 adjust_bool_stmts (hash_set
<gimple
*> &bool_stmt_set
,
3504 tree out_type
, stmt_vec_info stmt_info
)
3506 /* Gather original stmts in the bool pattern in their order of appearance
3508 auto_vec
<gimple
*> bool_stmts (bool_stmt_set
.elements ());
3509 for (hash_set
<gimple
*>::iterator i
= bool_stmt_set
.begin ();
3510 i
!= bool_stmt_set
.end (); ++i
)
3511 bool_stmts
.quick_push (*i
);
3512 bool_stmts
.qsort (sort_after_uid
);
3514 /* Now process them in that order, producing pattern stmts. */
3515 hash_map
<tree
, tree
> defs
;
3516 for (unsigned i
= 0; i
< bool_stmts
.length (); ++i
)
3517 adjust_bool_pattern (gimple_assign_lhs (bool_stmts
[i
]),
3518 out_type
, stmt_info
, defs
);
3520 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3521 gimple
*pattern_stmt
3522 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
3523 return gimple_assign_lhs (pattern_stmt
);
3526 /* Helper for search_type_for_mask. */
3529 search_type_for_mask_1 (tree var
, vec_info
*vinfo
,
3530 hash_map
<gimple
*, tree
> &cache
)
3533 enum tree_code rhs_code
;
3534 tree res
= NULL_TREE
, res2
;
3536 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var
)))
3539 stmt_vec_info def_stmt_info
= vect_get_internal_def (vinfo
, var
);
3543 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_stmt_info
->stmt
);
3547 tree
*c
= cache
.get (def_stmt
);
3551 rhs_code
= gimple_assign_rhs_code (def_stmt
);
3552 rhs1
= gimple_assign_rhs1 (def_stmt
);
3559 res
= search_type_for_mask_1 (rhs1
, vinfo
, cache
);
3565 res
= search_type_for_mask_1 (rhs1
, vinfo
, cache
);
3566 res2
= search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt
), vinfo
,
3568 if (!res
|| (res2
&& TYPE_PRECISION (res
) > TYPE_PRECISION (res2
)))
3573 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
3575 tree comp_vectype
, mask_type
;
3577 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
3579 res
= search_type_for_mask_1 (rhs1
, vinfo
, cache
);
3580 res2
= search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt
),
3582 if (!res
|| (res2
&& TYPE_PRECISION (res
) > TYPE_PRECISION (res2
)))
3587 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
3588 if (comp_vectype
== NULL_TREE
)
3594 mask_type
= get_mask_type_for_scalar_type (TREE_TYPE (rhs1
));
3596 || !expand_vec_cmp_expr_p (comp_vectype
, mask_type
, rhs_code
))
3602 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
3603 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
3605 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
3606 res
= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3609 res
= TREE_TYPE (rhs1
);
3613 cache
.put (def_stmt
, res
);
3617 /* Return the proper type for converting bool VAR into
3618 an integer value or NULL_TREE if no such type exists.
3619 The type is chosen so that converted value has the
3620 same number of elements as VAR's vector type. */
3623 search_type_for_mask (tree var
, vec_info
*vinfo
)
3625 hash_map
<gimple
*, tree
> cache
;
3626 return search_type_for_mask_1 (var
, vinfo
, cache
);
3629 /* Function vect_recog_bool_pattern
3631 Try to find pattern like following:
3633 bool a_b, b_b, c_b, d_b, e_b;
3636 S1 a_b = x1 CMP1 y1;
3637 S2 b_b = x2 CMP2 y2;
3639 S4 d_b = x3 CMP3 y3;
3641 S6 f_T = (TYPE) e_b;
3643 where type 'TYPE' is an integral type. Or a similar pattern
3646 S6 f_Y = e_b ? r_Y : s_Y;
3648 as results from if-conversion of a complex condition.
3652 * STMT_VINFO: The stmt at the end from which the pattern
3653 search begins, i.e. cast of a bool to
3658 * TYPE_OUT: The type of the output of this pattern.
3660 * Return value: A new stmt that will be used to replace the pattern.
3662 Assuming size of TYPE is the same as size of all comparisons
3663 (otherwise some casts would be added where needed), the above
3664 sequence we create related pattern stmts:
3665 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3666 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3667 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3668 S5' e_T = c_T | d_T;
3671 Instead of the above S3' we could emit:
3672 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3673 S3' c_T = a_T | b_T;
3674 but the above is more efficient. */
3677 vect_recog_bool_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
3679 gimple
*last_stmt
= stmt_vinfo
->stmt
;
3680 enum tree_code rhs_code
;
3681 tree var
, lhs
, rhs
, vectype
;
3682 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
3683 gimple
*pattern_stmt
;
3685 if (!is_gimple_assign (last_stmt
))
3688 var
= gimple_assign_rhs1 (last_stmt
);
3689 lhs
= gimple_assign_lhs (last_stmt
);
3691 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var
)))
3694 hash_set
<gimple
*> bool_stmts
;
3696 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3697 if (CONVERT_EXPR_CODE_P (rhs_code
))
3699 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3700 || TYPE_PRECISION (TREE_TYPE (lhs
)) == 1)
3702 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3703 if (vectype
== NULL_TREE
)
3706 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
3708 rhs
= adjust_bool_stmts (bool_stmts
, TREE_TYPE (lhs
), stmt_vinfo
);
3709 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3710 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3711 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3714 = gimple_build_assign (lhs
, NOP_EXPR
, rhs
);
3718 tree type
= search_type_for_mask (var
, vinfo
);
3719 tree cst0
, cst1
, tmp
;
3724 /* We may directly use cond with narrowed type to avoid
3725 multiple cond exprs with following result packing and
3726 perform single cond with packed mask instead. In case
3727 of widening we better make cond first and then extract
3729 if (TYPE_MODE (type
) == TYPE_MODE (TREE_TYPE (lhs
)))
3730 type
= TREE_TYPE (lhs
);
3732 cst0
= build_int_cst (type
, 0);
3733 cst1
= build_int_cst (type
, 1);
3734 tmp
= vect_recog_temp_ssa_var (type
, NULL
);
3735 pattern_stmt
= gimple_build_assign (tmp
, COND_EXPR
, var
, cst1
, cst0
);
3737 if (!useless_type_conversion_p (type
, TREE_TYPE (lhs
)))
3739 tree new_vectype
= get_vectype_for_scalar_type (type
);
3740 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
, new_vectype
);
3742 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3743 pattern_stmt
= gimple_build_assign (lhs
, CONVERT_EXPR
, tmp
);
3747 *type_out
= vectype
;
3748 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
3750 return pattern_stmt
;
3752 else if (rhs_code
== COND_EXPR
3753 && TREE_CODE (var
) == SSA_NAME
)
3755 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3756 if (vectype
== NULL_TREE
)
3759 /* Build a scalar type for the boolean result that when
3760 vectorized matches the vector type of the result in
3761 size and number of elements. */
3763 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype
)),
3764 TYPE_VECTOR_SUBPARTS (vectype
));
3767 = build_nonstandard_integer_type (prec
,
3768 TYPE_UNSIGNED (TREE_TYPE (var
)));
3769 if (get_vectype_for_scalar_type (type
) == NULL_TREE
)
3772 if (!check_bool_pattern (var
, vinfo
, bool_stmts
))
3775 rhs
= adjust_bool_stmts (bool_stmts
, type
, stmt_vinfo
);
3777 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3779 = gimple_build_assign (lhs
, COND_EXPR
,
3780 build2 (NE_EXPR
, boolean_type_node
,
3781 rhs
, build_int_cst (type
, 0)),
3782 gimple_assign_rhs2 (last_stmt
),
3783 gimple_assign_rhs3 (last_stmt
));
3784 *type_out
= vectype
;
3785 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
3787 return pattern_stmt
;
3789 else if (rhs_code
== SSA_NAME
3790 && STMT_VINFO_DATA_REF (stmt_vinfo
))
3792 stmt_vec_info pattern_stmt_info
;
3793 vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
3794 gcc_assert (vectype
!= NULL_TREE
);
3795 if (!VECTOR_MODE_P (TYPE_MODE (vectype
)))
3798 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
3799 rhs
= adjust_bool_stmts (bool_stmts
, TREE_TYPE (vectype
), stmt_vinfo
);
3802 tree type
= search_type_for_mask (var
, vinfo
);
3803 tree cst0
, cst1
, new_vectype
;
3808 if (TYPE_MODE (type
) == TYPE_MODE (TREE_TYPE (vectype
)))
3809 type
= TREE_TYPE (vectype
);
3811 cst0
= build_int_cst (type
, 0);
3812 cst1
= build_int_cst (type
, 1);
3813 new_vectype
= get_vectype_for_scalar_type (type
);
3815 rhs
= vect_recog_temp_ssa_var (type
, NULL
);
3816 pattern_stmt
= gimple_build_assign (rhs
, COND_EXPR
, var
, cst1
, cst0
);
3817 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
, new_vectype
);
3820 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
3821 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3823 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3824 gimple
*cast_stmt
= gimple_build_assign (rhs2
, NOP_EXPR
, rhs
);
3825 append_pattern_def_seq (stmt_vinfo
, cast_stmt
);
3828 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3829 pattern_stmt_info
= vinfo
->add_stmt (pattern_stmt
);
3830 STMT_VINFO_DATA_REF (pattern_stmt_info
)
3831 = STMT_VINFO_DATA_REF (stmt_vinfo
);
3832 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info
)
3833 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo
);
3834 *type_out
= vectype
;
3835 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
3837 return pattern_stmt
;
3844 /* A helper for vect_recog_mask_conversion_pattern. Build
3845 conversion of MASK to a type suitable for masking VECTYPE.
3846 Built statement gets required vectype and is appended to
3847 a pattern sequence of STMT_VINFO.
3849 Return converted mask. */
3852 build_mask_conversion (tree mask
, tree vectype
, stmt_vec_info stmt_vinfo
)
3857 masktype
= build_same_sized_truth_vector_type (vectype
);
3858 tmp
= vect_recog_temp_ssa_var (TREE_TYPE (masktype
), NULL
);
3859 stmt
= gimple_build_assign (tmp
, CONVERT_EXPR
, mask
);
3860 append_pattern_def_seq (stmt_vinfo
, stmt
, masktype
);
3866 /* Function vect_recog_mask_conversion_pattern
3868 Try to find statements which require boolean type
3869 converison. Additional conversion statements are
3870 added to handle such cases. For example:
3880 S4 c_1 = m_3 ? c_2 : c_3;
3882 Will be transformed into:
3886 S3'' m_2' = (_Bool[bitsize=32])m_2
3887 S3' m_3' = m_1 & m_2';
3888 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3889 S4' c_1' = m_3'' ? c_2 : c_3; */
3892 vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
3894 gimple
*last_stmt
= stmt_vinfo
->stmt
;
3895 enum tree_code rhs_code
;
3896 tree lhs
= NULL_TREE
, rhs1
, rhs2
, tmp
, rhs1_type
, rhs2_type
;
3897 tree vectype1
, vectype2
;
3898 stmt_vec_info pattern_stmt_info
;
3899 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
3901 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3902 if (is_gimple_call (last_stmt
)
3903 && gimple_call_internal_p (last_stmt
))
3905 gcall
*pattern_stmt
;
3907 internal_fn ifn
= gimple_call_internal_fn (last_stmt
);
3908 int mask_argno
= internal_fn_mask_index (ifn
);
3912 bool store_p
= internal_store_fn_p (ifn
);
3915 int rhs_index
= internal_fn_stored_value_index (ifn
);
3916 tree rhs
= gimple_call_arg (last_stmt
, rhs_index
);
3917 vectype1
= get_vectype_for_scalar_type (TREE_TYPE (rhs
));
3921 lhs
= gimple_call_lhs (last_stmt
);
3922 vectype1
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3925 tree mask_arg
= gimple_call_arg (last_stmt
, mask_argno
);
3926 tree mask_arg_type
= search_type_for_mask (mask_arg
, vinfo
);
3929 vectype2
= get_mask_type_for_scalar_type (mask_arg_type
);
3931 if (!vectype1
|| !vectype2
3932 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1
),
3933 TYPE_VECTOR_SUBPARTS (vectype2
)))
3936 tmp
= build_mask_conversion (mask_arg
, vectype1
, stmt_vinfo
);
3938 auto_vec
<tree
, 8> args
;
3939 unsigned int nargs
= gimple_call_num_args (last_stmt
);
3940 args
.safe_grow (nargs
);
3941 for (unsigned int i
= 0; i
< nargs
; ++i
)
3942 args
[i
] = ((int) i
== mask_argno
3944 : gimple_call_arg (last_stmt
, i
));
3945 pattern_stmt
= gimple_build_call_internal_vec (ifn
, args
);
3949 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3950 gimple_call_set_lhs (pattern_stmt
, lhs
);
3952 gimple_call_set_nothrow (pattern_stmt
, true);
3954 pattern_stmt_info
= vinfo
->add_stmt (pattern_stmt
);
3955 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
3957 STMT_VINFO_DATA_REF (pattern_stmt_info
)
3958 = STMT_VINFO_DATA_REF (stmt_vinfo
);
3959 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info
)
3960 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo
);
3961 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info
)
3962 = STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
);
3965 *type_out
= vectype1
;
3966 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
3968 return pattern_stmt
;
3971 if (!is_gimple_assign (last_stmt
))
3974 gimple
*pattern_stmt
;
3975 lhs
= gimple_assign_lhs (last_stmt
);
3976 rhs1
= gimple_assign_rhs1 (last_stmt
);
3977 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3979 /* Check for cond expression requiring mask conversion. */
3980 if (rhs_code
== COND_EXPR
)
3982 vectype1
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3984 if (TREE_CODE (rhs1
) == SSA_NAME
)
3986 rhs1_type
= search_type_for_mask (rhs1
, vinfo
);
3990 else if (COMPARISON_CLASS_P (rhs1
))
3992 /* Check whether we're comparing scalar booleans and (if so)
3993 whether a better mask type exists than the mask associated
3994 with boolean-sized elements. This avoids unnecessary packs
3995 and unpacks if the booleans are set from comparisons of
3996 wider types. E.g. in:
3998 int x1, x2, x3, x4, y1, y1;
4000 bool b1 = (x1 == x2);
4001 bool b2 = (x3 == x4);
4002 ... = b1 == b2 ? y1 : y2;
4004 it is better for b1 and b2 to use the mask type associated
4005 with int elements rather bool (byte) elements. */
4006 rhs1_type
= search_type_for_mask (TREE_OPERAND (rhs1
, 0), vinfo
);
4008 rhs1_type
= TREE_TYPE (TREE_OPERAND (rhs1
, 0));
4013 vectype2
= get_mask_type_for_scalar_type (rhs1_type
);
4015 if (!vectype1
|| !vectype2
)
4018 /* Continue if a conversion is needed. Also continue if we have
4019 a comparison whose vector type would normally be different from
4020 VECTYPE2 when considered in isolation. In that case we'll
4021 replace the comparison with an SSA name (so that we can record
4022 its vector type) and behave as though the comparison was an SSA
4023 name from the outset. */
4024 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1
),
4025 TYPE_VECTOR_SUBPARTS (vectype2
))
4026 && (TREE_CODE (rhs1
) == SSA_NAME
4027 || rhs1_type
== TREE_TYPE (TREE_OPERAND (rhs1
, 0))))
4030 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4031 in place, we can handle it in vectorizable_condition. This avoids
4032 unnecessary promotion stmts and increased vectorization factor. */
4033 if (COMPARISON_CLASS_P (rhs1
)
4034 && INTEGRAL_TYPE_P (rhs1_type
)
4035 && known_le (TYPE_VECTOR_SUBPARTS (vectype1
),
4036 TYPE_VECTOR_SUBPARTS (vectype2
)))
4038 enum vect_def_type dt
;
4039 if (vect_is_simple_use (TREE_OPERAND (rhs1
, 0), vinfo
, &dt
)
4040 && dt
== vect_external_def
4041 && vect_is_simple_use (TREE_OPERAND (rhs1
, 1), vinfo
, &dt
)
4042 && (dt
== vect_external_def
4043 || dt
== vect_constant_def
))
4045 tree wide_scalar_type
= build_nonstandard_integer_type
4046 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1
))),
4047 TYPE_UNSIGNED (rhs1_type
));
4048 tree vectype3
= get_vectype_for_scalar_type (wide_scalar_type
);
4049 if (expand_vec_cond_expr_p (vectype1
, vectype3
, TREE_CODE (rhs1
)))
4054 /* If rhs1 is a comparison we need to move it into a
4055 separate statement. */
4056 if (TREE_CODE (rhs1
) != SSA_NAME
)
4058 tmp
= vect_recog_temp_ssa_var (TREE_TYPE (rhs1
), NULL
);
4059 pattern_stmt
= gimple_build_assign (tmp
, rhs1
);
4061 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
, vectype2
);
4064 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
4065 TYPE_VECTOR_SUBPARTS (vectype2
)))
4066 tmp
= build_mask_conversion (rhs1
, vectype1
, stmt_vinfo
);
4070 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
4071 pattern_stmt
= gimple_build_assign (lhs
, COND_EXPR
, tmp
,
4072 gimple_assign_rhs2 (last_stmt
),
4073 gimple_assign_rhs3 (last_stmt
));
4075 *type_out
= vectype1
;
4076 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
4078 return pattern_stmt
;
4081 /* Now check for binary boolean operations requiring conversion for
4083 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs
)))
4086 if (rhs_code
!= BIT_IOR_EXPR
4087 && rhs_code
!= BIT_XOR_EXPR
4088 && rhs_code
!= BIT_AND_EXPR
4089 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
)
4092 rhs2
= gimple_assign_rhs2 (last_stmt
);
4094 rhs1_type
= search_type_for_mask (rhs1
, vinfo
);
4095 rhs2_type
= search_type_for_mask (rhs2
, vinfo
);
4097 if (!rhs1_type
|| !rhs2_type
4098 || TYPE_PRECISION (rhs1_type
) == TYPE_PRECISION (rhs2_type
))
4101 if (TYPE_PRECISION (rhs1_type
) < TYPE_PRECISION (rhs2_type
))
4103 vectype1
= get_mask_type_for_scalar_type (rhs1_type
);
4106 rhs2
= build_mask_conversion (rhs2
, vectype1
, stmt_vinfo
);
4110 vectype1
= get_mask_type_for_scalar_type (rhs2_type
);
4113 rhs1
= build_mask_conversion (rhs1
, vectype1
, stmt_vinfo
);
4116 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
4117 pattern_stmt
= gimple_build_assign (lhs
, rhs_code
, rhs1
, rhs2
);
4119 *type_out
= vectype1
;
4120 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
4122 return pattern_stmt
;
4125 /* STMT_INFO is a load or store. If the load or store is conditional, return
4126 the boolean condition under which it occurs, otherwise return null. */
4129 vect_get_load_store_mask (stmt_vec_info stmt_info
)
4131 if (gassign
*def_assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
4133 gcc_assert (gimple_assign_single_p (def_assign
));
4137 if (gcall
*def_call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
4139 internal_fn ifn
= gimple_call_internal_fn (def_call
);
4140 int mask_index
= internal_fn_mask_index (ifn
);
4141 return gimple_call_arg (def_call
, mask_index
);
4147 /* Return the scalar offset type that an internal gather/scatter function
4148 should use. GS_INFO describes the gather/scatter operation. */
4151 vect_get_gather_scatter_offset_type (gather_scatter_info
*gs_info
)
4153 tree offset_type
= TREE_TYPE (gs_info
->offset
);
4154 unsigned int element_bits
= tree_to_uhwi (TYPE_SIZE (gs_info
->element_type
));
4156 /* Enforced by vect_check_gather_scatter. */
4157 unsigned int offset_bits
= TYPE_PRECISION (offset_type
);
4158 gcc_assert (element_bits
>= offset_bits
);
4160 /* If the offset is narrower than the elements, extend it according
4162 if (element_bits
> offset_bits
)
4163 return build_nonstandard_integer_type (element_bits
,
4164 TYPE_UNSIGNED (offset_type
));
4169 /* Return MASK if MASK is suitable for masking an operation on vectors
4170 of type VECTYPE, otherwise convert it into such a form and return
4171 the result. Associate any conversion statements with STMT_INFO's
4175 vect_convert_mask_for_vectype (tree mask
, tree vectype
,
4176 stmt_vec_info stmt_info
, vec_info
*vinfo
)
4178 tree mask_type
= search_type_for_mask (mask
, vinfo
);
4181 tree mask_vectype
= get_mask_type_for_scalar_type (mask_type
);
4183 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
),
4184 TYPE_VECTOR_SUBPARTS (mask_vectype
)))
4185 mask
= build_mask_conversion (mask
, vectype
, stmt_info
);
4190 /* Return the equivalent of:
4192 fold_convert (TYPE, VALUE)
4194 with the expectation that the operation will be vectorized.
4195 If new statements are needed, add them as pattern statements
4199 vect_add_conversion_to_pattern (tree type
, tree value
, stmt_vec_info stmt_info
)
4201 if (useless_type_conversion_p (type
, TREE_TYPE (value
)))
4204 tree new_value
= vect_recog_temp_ssa_var (type
, NULL
);
4205 gassign
*conversion
= gimple_build_assign (new_value
, CONVERT_EXPR
, value
);
4206 append_pattern_def_seq (stmt_info
, conversion
,
4207 get_vectype_for_scalar_type (type
));
4211 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4212 internal function. Return the final statement on success and set
4213 *TYPE_OUT to the vector type being loaded or stored.
4215 This function only handles gathers and scatters that were recognized
4216 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4219 vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info
, tree
*type_out
)
4221 /* Currently we only support this for loop vectorization. */
4222 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (stmt_info
->vinfo
);
4226 /* Make sure that we're looking at a gather load or scatter store. */
4227 data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
4228 if (!dr
|| !STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
4231 /* Get the boolean that controls whether the load or store happens.
4232 This is null if the operation is unconditional. */
4233 tree mask
= vect_get_load_store_mask (stmt_info
);
4235 /* Make sure that the target supports an appropriate internal
4236 function for the gather/scatter operation. */
4237 gather_scatter_info gs_info
;
4238 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, &gs_info
)
4242 /* Convert the mask to the right form. */
4243 tree gs_vectype
= get_vectype_for_scalar_type (gs_info
.element_type
);
4245 mask
= vect_convert_mask_for_vectype (mask
, gs_vectype
, stmt_info
,
4248 /* Get the invariant base and non-invariant offset, converting the
4249 latter to the same width as the vector elements. */
4250 tree base
= gs_info
.base
;
4251 tree offset_type
= vect_get_gather_scatter_offset_type (&gs_info
);
4252 tree offset
= vect_add_conversion_to_pattern (offset_type
, gs_info
.offset
,
4255 /* Build the new pattern statement. */
4256 tree scale
= size_int (gs_info
.scale
);
4257 gcall
*pattern_stmt
;
4258 if (DR_IS_READ (dr
))
4261 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 4, base
,
4262 offset
, scale
, mask
);
4264 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 3, base
,
4266 tree load_lhs
= vect_recog_temp_ssa_var (gs_info
.element_type
, NULL
);
4267 gimple_call_set_lhs (pattern_stmt
, load_lhs
);
4271 tree rhs
= vect_get_store_rhs (stmt_info
);
4273 pattern_stmt
= gimple_build_call_internal (IFN_MASK_SCATTER_STORE
, 5,
4274 base
, offset
, scale
, rhs
,
4277 pattern_stmt
= gimple_build_call_internal (IFN_SCATTER_STORE
, 4,
4278 base
, offset
, scale
, rhs
);
4280 gimple_call_set_nothrow (pattern_stmt
, true);
4282 /* Copy across relevant vectorization info and associate DR with the
4283 new pattern statement instead of the original statement. */
4284 stmt_vec_info pattern_stmt_info
= loop_vinfo
->add_stmt (pattern_stmt
);
4285 STMT_VINFO_DATA_REF (pattern_stmt_info
) = dr
;
4286 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info
)
4287 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info
);
4288 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info
)
4289 = STMT_VINFO_GATHER_SCATTER_P (stmt_info
);
4291 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4292 *type_out
= vectype
;
4293 vect_pattern_detected ("gather/scatter pattern", stmt_info
->stmt
);
4295 return pattern_stmt
;
4298 /* Return true if TYPE is a non-boolean integer type. These are the types
4299 that we want to consider for narrowing. */
4302 vect_narrowable_type_p (tree type
)
4304 return INTEGRAL_TYPE_P (type
) && !VECT_SCALAR_BOOLEAN_TYPE_P (type
);
4307 /* Return true if the operation given by CODE can be truncated to N bits
4308 when only N bits of the output are needed. This is only true if bit N+1
4309 of the inputs has no effect on the low N bits of the result. */
4312 vect_truncatable_operation_p (tree_code code
)
4330 /* Record that STMT_INFO could be changed from operating on TYPE to
4331 operating on a type with the precision and sign given by PRECISION
4332 and SIGN respectively. PRECISION is an arbitrary bit precision;
4333 it might not be a whole number of bytes. */
4336 vect_set_operation_type (stmt_vec_info stmt_info
, tree type
,
4337 unsigned int precision
, signop sign
)
4339 /* Round the precision up to a whole number of bytes. */
4340 precision
= vect_element_precision (precision
);
4341 if (precision
< TYPE_PRECISION (type
)
4342 && (!stmt_info
->operation_precision
4343 || stmt_info
->operation_precision
> precision
))
4345 stmt_info
->operation_precision
= precision
;
4346 stmt_info
->operation_sign
= sign
;
4350 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4351 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4352 is an arbitrary bit precision; it might not be a whole number of bytes. */
4355 vect_set_min_input_precision (stmt_vec_info stmt_info
, tree type
,
4356 unsigned int min_input_precision
)
4358 /* This operation in isolation only requires the inputs to have
4359 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4360 that MIN_INPUT_PRECISION is a natural precision for the chain
4361 as a whole. E.g. consider something like:
4363 unsigned short *x, *y;
4364 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4366 The right shift can be done on unsigned chars, and only requires the
4367 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4368 approach would mean turning a natural chain of single-vector unsigned
4369 short operations into one that truncates "*x" and then extends
4370 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4371 operation and one vector for each unsigned char operation.
4372 This would be a significant pessimization.
4374 Instead only propagate the maximum of this precision and the precision
4375 required by the users of the result. This means that we don't pessimize
4376 the case above but continue to optimize things like:
4380 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4382 Here we would truncate two vectors of *x to a single vector of
4383 unsigned chars and use single-vector unsigned char operations for
4384 everything else, rather than doing two unsigned short copies of
4385 "(*x & 0xf0) >> 4" and then truncating the result. */
4386 min_input_precision
= MAX (min_input_precision
,
4387 stmt_info
->min_output_precision
);
4389 if (min_input_precision
< TYPE_PRECISION (type
)
4390 && (!stmt_info
->min_input_precision
4391 || stmt_info
->min_input_precision
> min_input_precision
))
4392 stmt_info
->min_input_precision
= min_input_precision
;
4395 /* Subroutine of vect_determine_min_output_precision. Return true if
4396 we can calculate a reduced number of output bits for STMT_INFO,
4397 whose result is LHS. */
4400 vect_determine_min_output_precision_1 (stmt_vec_info stmt_info
, tree lhs
)
4402 /* Take the maximum precision required by users of the result. */
4403 vec_info
*vinfo
= stmt_info
->vinfo
;
4404 unsigned int precision
= 0;
4405 imm_use_iterator iter
;
4407 FOR_EACH_IMM_USE_FAST (use
, iter
, lhs
)
4409 gimple
*use_stmt
= USE_STMT (use
);
4410 if (is_gimple_debug (use_stmt
))
4412 stmt_vec_info use_stmt_info
= vinfo
->lookup_stmt (use_stmt
);
4413 if (!use_stmt_info
|| !use_stmt_info
->min_input_precision
)
4415 precision
= MAX (precision
, use_stmt_info
->min_input_precision
);
4418 if (dump_enabled_p ())
4420 dump_printf_loc (MSG_NOTE
, vect_location
, "only the low %d bits of ",
4422 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, lhs
);
4423 dump_printf (MSG_NOTE
, " are significant\n");
4425 stmt_info
->min_output_precision
= precision
;
4429 /* Calculate min_output_precision for STMT_INFO. */
4432 vect_determine_min_output_precision (stmt_vec_info stmt_info
)
4434 /* We're only interested in statements with a narrowable result. */
4435 tree lhs
= gimple_get_lhs (stmt_info
->stmt
);
4437 || TREE_CODE (lhs
) != SSA_NAME
4438 || !vect_narrowable_type_p (TREE_TYPE (lhs
)))
4441 if (!vect_determine_min_output_precision_1 (stmt_info
, lhs
))
4442 stmt_info
->min_output_precision
= TYPE_PRECISION (TREE_TYPE (lhs
));
4445 /* Use range information to decide whether STMT (described by STMT_INFO)
4446 could be done in a narrower type. This is effectively a forward
4447 propagation, since it uses context-independent information that applies
4448 to all users of an SSA name. */
4451 vect_determine_precisions_from_range (stmt_vec_info stmt_info
, gassign
*stmt
)
4453 tree lhs
= gimple_assign_lhs (stmt
);
4454 if (!lhs
|| TREE_CODE (lhs
) != SSA_NAME
)
4457 tree type
= TREE_TYPE (lhs
);
4458 if (!vect_narrowable_type_p (type
))
4461 /* First see whether we have any useful range information for the result. */
4462 unsigned int precision
= TYPE_PRECISION (type
);
4463 signop sign
= TYPE_SIGN (type
);
4464 wide_int min_value
, max_value
;
4465 if (!vect_get_range_info (lhs
, &min_value
, &max_value
))
4468 tree_code code
= gimple_assign_rhs_code (stmt
);
4469 unsigned int nops
= gimple_num_ops (stmt
);
4471 if (!vect_truncatable_operation_p (code
))
4472 /* Check that all relevant input operands are compatible, and update
4473 [MIN_VALUE, MAX_VALUE] to include their ranges. */
4474 for (unsigned int i
= 1; i
< nops
; ++i
)
4476 tree op
= gimple_op (stmt
, i
);
4477 if (TREE_CODE (op
) == INTEGER_CST
)
4479 /* Don't require the integer to have RHS_TYPE (which it might
4480 not for things like shift amounts, etc.), but do require it
4482 if (!int_fits_type_p (op
, type
))
4485 min_value
= wi::min (min_value
, wi::to_wide (op
, precision
), sign
);
4486 max_value
= wi::max (max_value
, wi::to_wide (op
, precision
), sign
);
4488 else if (TREE_CODE (op
) == SSA_NAME
)
4490 /* Ignore codes that don't take uniform arguments. */
4491 if (!types_compatible_p (TREE_TYPE (op
), type
))
4494 wide_int op_min_value
, op_max_value
;
4495 if (!vect_get_range_info (op
, &op_min_value
, &op_max_value
))
4498 min_value
= wi::min (min_value
, op_min_value
, sign
);
4499 max_value
= wi::max (max_value
, op_max_value
, sign
);
4505 /* Try to switch signed types for unsigned types if we can.
4506 This is better for two reasons. First, unsigned ops tend
4507 to be cheaper than signed ops. Second, it means that we can
4511 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4516 unsigned short res_1 = (unsigned short) c & 0xff00;
4517 int res = (int) res_1;
4519 where the intermediate result res_1 has unsigned rather than
4521 if (sign
== SIGNED
&& !wi::neg_p (min_value
))
4524 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
4525 unsigned int precision1
= wi::min_precision (min_value
, sign
);
4526 unsigned int precision2
= wi::min_precision (max_value
, sign
);
4527 unsigned int value_precision
= MAX (precision1
, precision2
);
4528 if (value_precision
>= precision
)
4531 if (dump_enabled_p ())
4533 dump_printf_loc (MSG_NOTE
, vect_location
, "can narrow to %s:%d"
4534 " without loss of precision: ",
4535 sign
== SIGNED
? "signed" : "unsigned",
4537 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
4540 vect_set_operation_type (stmt_info
, type
, value_precision
, sign
);
4541 vect_set_min_input_precision (stmt_info
, type
, value_precision
);
4544 /* Use information about the users of STMT's result to decide whether
4545 STMT (described by STMT_INFO) could be done in a narrower type.
4546 This is effectively a backward propagation. */
4549 vect_determine_precisions_from_users (stmt_vec_info stmt_info
, gassign
*stmt
)
4551 tree_code code
= gimple_assign_rhs_code (stmt
);
4552 unsigned int opno
= (code
== COND_EXPR
? 2 : 1);
4553 tree type
= TREE_TYPE (gimple_op (stmt
, opno
));
4554 if (!vect_narrowable_type_p (type
))
4557 unsigned int precision
= TYPE_PRECISION (type
);
4558 unsigned int operation_precision
, min_input_precision
;
4562 /* Only the bits that contribute to the output matter. Don't change
4563 the precision of the operation itself. */
4564 operation_precision
= precision
;
4565 min_input_precision
= stmt_info
->min_output_precision
;
4571 tree shift
= gimple_assign_rhs2 (stmt
);
4572 if (TREE_CODE (shift
) != INTEGER_CST
4573 || !wi::ltu_p (wi::to_widest (shift
), precision
))
4575 unsigned int const_shift
= TREE_INT_CST_LOW (shift
);
4576 if (code
== LSHIFT_EXPR
)
4578 /* We need CONST_SHIFT fewer bits of the input. */
4579 operation_precision
= stmt_info
->min_output_precision
;
4580 min_input_precision
= (MAX (operation_precision
, const_shift
)
4585 /* We need CONST_SHIFT extra bits to do the operation. */
4586 operation_precision
= (stmt_info
->min_output_precision
4588 min_input_precision
= operation_precision
;
4594 if (vect_truncatable_operation_p (code
))
4596 /* Input bit N has no effect on output bits N-1 and lower. */
4597 operation_precision
= stmt_info
->min_output_precision
;
4598 min_input_precision
= operation_precision
;
4604 if (operation_precision
< precision
)
4606 if (dump_enabled_p ())
4608 dump_printf_loc (MSG_NOTE
, vect_location
, "can narrow to %s:%d"
4609 " without affecting users: ",
4610 TYPE_UNSIGNED (type
) ? "unsigned" : "signed",
4611 operation_precision
);
4612 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
4614 vect_set_operation_type (stmt_info
, type
, operation_precision
,
4617 vect_set_min_input_precision (stmt_info
, type
, min_input_precision
);
4620 /* Handle vect_determine_precisions for STMT_INFO, given that we
4621 have already done so for the users of its result. */
4624 vect_determine_stmt_precisions (stmt_vec_info stmt_info
)
4626 vect_determine_min_output_precision (stmt_info
);
4627 if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
4629 vect_determine_precisions_from_range (stmt_info
, stmt
);
4630 vect_determine_precisions_from_users (stmt_info
, stmt
);
4634 /* Walk backwards through the vectorizable region to determine the
4635 values of these fields:
4637 - min_output_precision
4638 - min_input_precision
4639 - operation_precision
4640 - operation_sign. */
4643 vect_determine_precisions (vec_info
*vinfo
)
4645 DUMP_VECT_SCOPE ("vect_determine_precisions");
4647 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
4649 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4650 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
4651 unsigned int nbbs
= loop
->num_nodes
;
4653 for (unsigned int i
= 0; i
< nbbs
; i
++)
4655 basic_block bb
= bbs
[nbbs
- i
- 1];
4656 for (gimple_stmt_iterator si
= gsi_last_bb (bb
);
4657 !gsi_end_p (si
); gsi_prev (&si
))
4658 vect_determine_stmt_precisions
4659 (vinfo
->lookup_stmt (gsi_stmt (si
)));
4664 bb_vec_info bb_vinfo
= as_a
<bb_vec_info
> (vinfo
);
4665 gimple_stmt_iterator si
= bb_vinfo
->region_end
;
4670 si
= gsi_last_bb (bb_vinfo
->bb
);
4673 stmt
= gsi_stmt (si
);
4674 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (stmt
);
4675 if (stmt_info
&& STMT_VINFO_VECTORIZABLE (stmt_info
))
4676 vect_determine_stmt_precisions (stmt_info
);
4678 while (stmt
!= gsi_stmt (bb_vinfo
->region_begin
));
4682 typedef gimple
*(*vect_recog_func_ptr
) (stmt_vec_info
, tree
*);
4684 struct vect_recog_func
4686 vect_recog_func_ptr fn
;
4690 /* Note that ordering matters - the first pattern matching on a stmt is
4691 taken which means usually the more complex one needs to preceed the
4692 less comples onex (widen_sum only after dot_prod or sad for example). */
4693 static vect_recog_func vect_vect_recog_func_ptrs
[] = {
4694 { vect_recog_over_widening_pattern
, "over_widening" },
4695 /* Must come after over_widening, which narrows the shift as much as
4696 possible beforehand. */
4697 { vect_recog_average_pattern
, "average" },
4698 { vect_recog_cast_forwprop_pattern
, "cast_forwprop" },
4699 { vect_recog_widen_mult_pattern
, "widen_mult" },
4700 { vect_recog_dot_prod_pattern
, "dot_prod" },
4701 { vect_recog_sad_pattern
, "sad" },
4702 { vect_recog_widen_sum_pattern
, "widen_sum" },
4703 { vect_recog_pow_pattern
, "pow" },
4704 { vect_recog_widen_shift_pattern
, "widen_shift" },
4705 { vect_recog_rotate_pattern
, "rotate" },
4706 { vect_recog_vector_vector_shift_pattern
, "vector_vector_shift" },
4707 { vect_recog_divmod_pattern
, "divmod" },
4708 { vect_recog_mult_pattern
, "mult" },
4709 { vect_recog_mixed_size_cond_pattern
, "mixed_size_cond" },
4710 { vect_recog_bool_pattern
, "bool" },
4711 /* This must come before mask conversion, and includes the parts
4712 of mask conversion that are needed for gather and scatter
4713 internal functions. */
4714 { vect_recog_gather_scatter_pattern
, "gather_scatter" },
4715 { vect_recog_mask_conversion_pattern
, "mask_conversion" }
4718 const unsigned int NUM_PATTERNS
= ARRAY_SIZE (vect_vect_recog_func_ptrs
);
4720 /* Mark statements that are involved in a pattern. */
4723 vect_mark_pattern_stmts (stmt_vec_info orig_stmt_info
, gimple
*pattern_stmt
,
4724 tree pattern_vectype
)
4726 gimple
*def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
4728 gimple
*orig_pattern_stmt
= NULL
;
4729 if (is_pattern_stmt_p (orig_stmt_info
))
4731 /* We're replacing a statement in an existing pattern definition
4733 orig_pattern_stmt
= orig_stmt_info
->stmt
;
4734 if (dump_enabled_p ())
4736 dump_printf_loc (MSG_NOTE
, vect_location
,
4737 "replacing earlier pattern ");
4738 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, orig_pattern_stmt
, 0);
4741 /* To keep the book-keeping simple, just swap the lhs of the
4742 old and new statements, so that the old one has a valid but
4744 tree old_lhs
= gimple_get_lhs (orig_pattern_stmt
);
4745 gimple_set_lhs (orig_pattern_stmt
, gimple_get_lhs (pattern_stmt
));
4746 gimple_set_lhs (pattern_stmt
, old_lhs
);
4748 if (dump_enabled_p ())
4750 dump_printf_loc (MSG_NOTE
, vect_location
, "with ");
4751 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
4754 /* Switch to the statement that ORIG replaces. */
4755 orig_stmt_info
= STMT_VINFO_RELATED_STMT (orig_stmt_info
);
4757 /* We shouldn't be replacing the main pattern statement. */
4758 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info
)->stmt
4759 != orig_pattern_stmt
);
4763 for (gimple_stmt_iterator si
= gsi_start (def_seq
);
4764 !gsi_end_p (si
); gsi_next (&si
))
4765 vect_init_pattern_stmt (gsi_stmt (si
), orig_stmt_info
, pattern_vectype
);
4767 if (orig_pattern_stmt
)
4769 vect_init_pattern_stmt (pattern_stmt
, orig_stmt_info
, pattern_vectype
);
4771 /* Insert all the new pattern statements before the original one. */
4772 gimple_seq
*orig_def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
4773 gimple_stmt_iterator gsi
= gsi_for_stmt (orig_pattern_stmt
,
4775 gsi_insert_seq_before_without_update (&gsi
, def_seq
, GSI_SAME_STMT
);
4776 gsi_insert_before_without_update (&gsi
, pattern_stmt
, GSI_SAME_STMT
);
4778 /* Remove the pattern statement that this new pattern replaces. */
4779 gsi_remove (&gsi
, false);
4782 vect_set_pattern_stmt (pattern_stmt
, orig_stmt_info
, pattern_vectype
);
4785 /* Function vect_pattern_recog_1
4788 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4789 computation pattern.
4790 STMT_INFO: A stmt from which the pattern search should start.
4792 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
4793 a sequence of statements that has the same functionality and can be
4794 used to replace STMT_INFO. It returns the last statement in the sequence
4795 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
4796 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
4797 statement, having first checked that the target supports the new operation
4800 This function also does some bookkeeping, as explained in the documentation
4801 for vect_recog_pattern. */
4804 vect_pattern_recog_1 (vect_recog_func
*recog_func
, stmt_vec_info stmt_info
)
4806 vec_info
*vinfo
= stmt_info
->vinfo
;
4807 gimple
*pattern_stmt
;
4808 loop_vec_info loop_vinfo
;
4809 tree pattern_vectype
;
4811 /* If this statement has already been replaced with pattern statements,
4812 leave the original statement alone, since the first match wins.
4813 Instead try to match against the definition statements that feed
4814 the main pattern statement. */
4815 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
4817 gimple_stmt_iterator gsi
;
4818 for (gsi
= gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
4819 !gsi_end_p (gsi
); gsi_next (&gsi
))
4820 vect_pattern_recog_1 (recog_func
, vinfo
->lookup_stmt (gsi_stmt (gsi
)));
4824 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
4825 pattern_stmt
= recog_func
->fn (stmt_info
, &pattern_vectype
);
4828 /* Clear any half-formed pattern definition sequence. */
4829 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
4833 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4834 gcc_assert (pattern_vectype
);
4836 /* Found a vectorizable pattern. */
4837 if (dump_enabled_p ())
4839 dump_printf_loc (MSG_NOTE
, vect_location
,
4840 "%s pattern recognized: ", recog_func
->name
);
4841 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
4844 /* Mark the stmts that are involved in the pattern. */
4845 vect_mark_pattern_stmts (stmt_info
, pattern_stmt
, pattern_vectype
);
4847 /* Patterns cannot be vectorized using SLP, because they change the order of
4852 stmt_vec_info
*elem_ptr
;
4853 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo
), ix
, ix2
,
4854 elem_ptr
, *elem_ptr
== stmt_info
);
4859 /* Function vect_pattern_recog
4862 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4865 Output - for each computation idiom that is detected we create a new stmt
4866 that provides the same functionality and that can be vectorized. We
4867 also record some information in the struct_stmt_info of the relevant
4868 stmts, as explained below:
4870 At the entry to this function we have the following stmts, with the
4871 following initial value in the STMT_VINFO fields:
4873 stmt in_pattern_p related_stmt vec_stmt
4874 S1: a_i = .... - - -
4875 S2: a_2 = ..use(a_i).. - - -
4876 S3: a_1 = ..use(a_2).. - - -
4877 S4: a_0 = ..use(a_1).. - - -
4878 S5: ... = ..use(a_0).. - - -
4880 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4881 represented by a single stmt. We then:
4882 - create a new stmt S6 equivalent to the pattern (the stmt is not
4883 inserted into the code)
4884 - fill in the STMT_VINFO fields as follows:
4886 in_pattern_p related_stmt vec_stmt
4887 S1: a_i = .... - - -
4888 S2: a_2 = ..use(a_i).. - - -
4889 S3: a_1 = ..use(a_2).. - - -
4890 S4: a_0 = ..use(a_1).. true S6 -
4891 '---> S6: a_new = .... - S4 -
4892 S5: ... = ..use(a_0).. - - -
4894 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4895 to each other through the RELATED_STMT field).
4897 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4898 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4899 remain irrelevant unless used by stmts other than S4.
4901 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4902 (because they are marked as irrelevant). It will vectorize S6, and record
4903 a pointer to the new vector stmt VS6 from S6 (as usual).
4904 S4 will be skipped, and S5 will be vectorized as usual:
4906 in_pattern_p related_stmt vec_stmt
4907 S1: a_i = .... - - -
4908 S2: a_2 = ..use(a_i).. - - -
4909 S3: a_1 = ..use(a_2).. - - -
4910 > VS6: va_new = .... - - -
4911 S4: a_0 = ..use(a_1).. true S6 VS6
4912 '---> S6: a_new = .... - S4 VS6
4913 > VS5: ... = ..vuse(va_new).. - - -
4914 S5: ... = ..use(a_0).. - - -
4916 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4917 elsewhere), and we'll end up with:
4920 VS5: ... = ..vuse(va_new)..
4922 In case of more than one pattern statements, e.g., widen-mult with
4926 S2 a_T = (TYPE) a_t;
4927 '--> S3: a_it = (interm_type) a_t;
4928 S4 prod_T = a_T * CONST;
4929 '--> S5: prod_T' = a_it w* CONST;
4931 there may be other users of a_T outside the pattern. In that case S2 will
4932 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4933 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4934 be recorded in S3. */
4937 vect_pattern_recog (vec_info
*vinfo
)
4942 gimple_stmt_iterator si
;
4945 vect_determine_precisions (vinfo
);
4947 DUMP_VECT_SCOPE ("vect_pattern_recog");
4949 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
4951 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4952 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
4953 nbbs
= loop
->num_nodes
;
4955 /* Scan through the loop stmts, applying the pattern recognition
4956 functions starting at each stmt visited: */
4957 for (i
= 0; i
< nbbs
; i
++)
4959 basic_block bb
= bbs
[i
];
4960 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
4962 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi_stmt (si
));
4963 /* Scan over all generic vect_recog_xxx_pattern functions. */
4964 for (j
= 0; j
< NUM_PATTERNS
; j
++)
4965 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs
[j
],
4972 bb_vec_info bb_vinfo
= as_a
<bb_vec_info
> (vinfo
);
4973 for (si
= bb_vinfo
->region_begin
;
4974 gsi_stmt (si
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&si
))
4976 gimple
*stmt
= gsi_stmt (si
);
4977 stmt_vec_info stmt_info
= bb_vinfo
->lookup_stmt (stmt
);
4978 if (stmt_info
&& !STMT_VINFO_VECTORIZABLE (stmt_info
))
4981 /* Scan over all generic vect_recog_xxx_pattern functions. */
4982 for (j
= 0; j
< NUM_PATTERNS
; j
++)
4983 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs
[j
], stmt_info
);