1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
37 #include "gimple-iterator.h"
39 #include "tree-vectorizer.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
47 #include "omp-simd-clone.h"
50 /* Return true if we have a useful VR_RANGE range for VAR, storing it
51 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
54 vect_get_range_info (tree var
, wide_int
*min_value
, wide_int
*max_value
)
56 value_range_type vr_type
= get_range_info (var
, min_value
, max_value
);
57 wide_int nonzero
= get_nonzero_bits (var
);
58 signop sgn
= TYPE_SIGN (TREE_TYPE (var
));
59 if (intersect_range_with_nonzero_bits (vr_type
, min_value
, max_value
,
60 nonzero
, sgn
) == VR_RANGE
)
62 if (dump_enabled_p ())
64 dump_generic_expr_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, var
);
65 dump_printf (MSG_NOTE
, " has range [");
66 dump_hex (MSG_NOTE
, *min_value
);
67 dump_printf (MSG_NOTE
, ", ");
68 dump_hex (MSG_NOTE
, *max_value
);
69 dump_printf (MSG_NOTE
, "]\n");
75 if (dump_enabled_p ())
77 dump_generic_expr_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, var
);
78 dump_printf (MSG_NOTE
, " has no range info\n");
84 /* Report that we've found an instance of pattern PATTERN in
88 vect_pattern_detected (const char *name
, gimple
*stmt
)
90 if (dump_enabled_p ())
92 dump_printf_loc (MSG_NOTE
, vect_location
, "%s: detected: ", name
);
93 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
97 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO.
98 Set its vector type to VECTYPE if it doesn't have one already. */
101 vect_init_pattern_stmt (gimple
*pattern_stmt
, stmt_vec_info orig_stmt_info
,
104 stmt_vec_info pattern_stmt_info
= vinfo_for_stmt (pattern_stmt
);
105 if (pattern_stmt_info
== NULL
)
107 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
,
108 orig_stmt_info
->vinfo
);
109 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
111 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt_info
->stmt
));
113 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt_info
->stmt
;
114 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
115 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
116 if (!STMT_VINFO_VECTYPE (pattern_stmt_info
))
117 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vectype
;
120 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
121 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
125 vect_set_pattern_stmt (gimple
*pattern_stmt
, stmt_vec_info orig_stmt_info
,
128 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
129 STMT_VINFO_RELATED_STMT (orig_stmt_info
) = pattern_stmt
;
130 vect_init_pattern_stmt (pattern_stmt
, orig_stmt_info
, vectype
);
133 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
134 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
135 be different from the vector type of the final pattern statement. */
138 append_pattern_def_seq (stmt_vec_info stmt_info
, gimple
*new_stmt
,
139 tree vectype
= NULL_TREE
)
141 vec_info
*vinfo
= stmt_info
->vinfo
;
144 gcc_assert (!vinfo_for_stmt (new_stmt
));
145 stmt_vec_info new_stmt_info
= new_stmt_vec_info (new_stmt
, vinfo
);
146 set_vinfo_for_stmt (new_stmt
, new_stmt_info
);
147 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
149 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
154 new_pattern_def_seq (stmt_vec_info stmt_info
, gimple
*stmt
)
156 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
157 append_pattern_def_seq (stmt_info
, stmt
);
160 /* The caller wants to perform new operations on vect_external variable
161 VAR, so that the result of the operations would also be vect_external.
162 Return the edge on which the operations can be performed, if one exists.
163 Return null if the operations should instead be treated as part of
164 the pattern that needs them. */
167 vect_get_external_def_edge (vec_info
*vinfo
, tree var
)
170 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
172 e
= loop_preheader_edge (loop_vinfo
->loop
);
173 if (!SSA_NAME_IS_DEFAULT_DEF (var
))
175 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
177 || !dominated_by_p (CDI_DOMINATORS
, e
->dest
, bb
))
184 /* Return true if the target supports a vector version of CODE,
185 where CODE is known to map to a direct optab. ITYPE specifies
186 the type of (some of) the scalar inputs and OTYPE specifies the
187 type of the scalar result.
189 If CODE allows the inputs and outputs to have different type
190 (such as for WIDEN_SUM_EXPR), it is the input mode rather
191 than the output mode that determines the appropriate target pattern.
192 Operand 0 of the target pattern then specifies the mode that the output
195 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
196 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
200 vect_supportable_direct_optab_p (tree otype
, tree_code code
,
201 tree itype
, tree
*vecotype_out
,
202 tree
*vecitype_out
= NULL
)
204 tree vecitype
= get_vectype_for_scalar_type (itype
);
208 tree vecotype
= get_vectype_for_scalar_type (otype
);
212 optab optab
= optab_for_tree_code (code
, vecitype
, optab_default
);
216 insn_code icode
= optab_handler (optab
, TYPE_MODE (vecitype
));
217 if (icode
== CODE_FOR_nothing
218 || insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (vecotype
))
221 *vecotype_out
= vecotype
;
223 *vecitype_out
= vecitype
;
227 /* Round bit precision PRECISION up to a full element. */
230 vect_element_precision (unsigned int precision
)
232 precision
= 1 << ceil_log2 (precision
);
233 return MAX (precision
, BITS_PER_UNIT
);
236 /* If OP is defined by a statement that's being considered for vectorization,
237 return information about that statement, otherwise return NULL. */
240 vect_get_internal_def (vec_info
*vinfo
, tree op
)
244 if (TREE_CODE (op
) != SSA_NAME
245 || !vect_is_simple_use (op
, vinfo
, &dt
, &def_stmt
)
246 || dt
!= vect_internal_def
)
249 return vinfo_for_stmt (def_stmt
);
252 /* Check whether NAME, an ssa-name used in USE_STMT,
253 is a result of a type promotion, such that:
254 DEF_STMT: NAME = NOP (name0)
255 If CHECK_SIGN is TRUE, check that either both types are signed or both are
259 type_conversion_p (tree name
, gimple
*use_stmt
, bool check_sign
,
260 tree
*orig_type
, gimple
**def_stmt
, bool *promotion
)
262 stmt_vec_info stmt_vinfo
;
263 tree type
= TREE_TYPE (name
);
265 enum vect_def_type dt
;
267 stmt_vinfo
= vinfo_for_stmt (use_stmt
);
268 if (!vect_is_simple_use (name
, stmt_vinfo
->vinfo
, &dt
, def_stmt
))
271 if (dt
!= vect_internal_def
272 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
278 if (!is_gimple_assign (*def_stmt
))
281 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt
)))
284 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
286 *orig_type
= TREE_TYPE (oprnd0
);
287 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*orig_type
)
288 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*orig_type
)) && check_sign
))
291 if (TYPE_PRECISION (type
) >= (TYPE_PRECISION (*orig_type
) * 2))
296 if (!vect_is_simple_use (oprnd0
, stmt_vinfo
->vinfo
, &dt
))
302 /* Holds information about an input operand after some sign changes
303 and type promotions have been peeled away. */
304 struct vect_unpromoted_value
{
305 vect_unpromoted_value ();
307 void set_op (tree
, vect_def_type
, stmt_vec_info
= NULL
);
309 /* The value obtained after peeling away zero or more casts. */
312 /* The type of OP. */
315 /* The definition type of OP. */
318 /* If OP is the result of peeling at least one cast, and if the cast
319 of OP itself is a vectorizable statement, CASTER identifies that
320 statement, otherwise it is null. */
321 stmt_vec_info caster
;
324 inline vect_unpromoted_value::vect_unpromoted_value ()
327 dt (vect_uninitialized_def
),
332 /* Set the operand to OP_IN, its definition type to DT_IN, and the
333 statement that casts it to CASTER_IN. */
336 vect_unpromoted_value::set_op (tree op_in
, vect_def_type dt_in
,
337 stmt_vec_info caster_in
)
340 type
= TREE_TYPE (op
);
345 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
346 to reach some vectorizable inner operand OP', continuing as long as it
347 is possible to convert OP' back to OP using a possible sign change
348 followed by a possible promotion P. Return this OP', or null if OP is
349 not a vectorizable SSA name. If there is a promotion P, describe its
350 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
351 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
352 have more than one user.
354 A successful return means that it is possible to go from OP' to OP
355 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
356 whereas the cast from UNPROM to OP might be a promotion, a sign
361 signed short *ptr = ...;
362 signed short C = *ptr;
363 unsigned short B = (unsigned short) C; // sign change
364 signed int A = (signed int) B; // unsigned promotion
365 ...possible other uses of A...
366 unsigned int OP = (unsigned int) A; // sign change
368 In this case it's possible to go directly from C to OP using:
370 OP = (unsigned int) (unsigned short) C;
371 +------------+ +--------------+
372 promotion sign change
374 so OP' would be C. The input to the promotion is B, so UNPROM
378 vect_look_through_possible_promotion (vec_info
*vinfo
, tree op
,
379 vect_unpromoted_value
*unprom
,
380 bool *single_use_p
= NULL
)
382 tree res
= NULL_TREE
;
383 tree op_type
= TREE_TYPE (op
);
384 unsigned int orig_precision
= TYPE_PRECISION (op_type
);
385 stmt_vec_info caster
= NULL
;
386 while (TREE_CODE (op
) == SSA_NAME
&& INTEGRAL_TYPE_P (op_type
))
388 /* See whether OP is simple enough to vectorize. */
391 if (!vect_is_simple_use (op
, vinfo
, &dt
, &def_stmt
))
394 /* If OP is the input of a demotion, skip over it to see whether
395 OP is itself the result of a promotion. If so, the combined
396 effect of the promotion and the demotion might fit the required
397 pattern, otherwise neither operation fits.
399 This copes with cases such as the result of an arithmetic
400 operation being truncated before being stored, and where that
401 arithmetic operation has been recognized as an over-widened one. */
402 if (TYPE_PRECISION (op_type
) <= orig_precision
)
404 /* Use OP as the UNPROM described above if we haven't yet
405 found a promotion, or if using the new input preserves the
406 sign of the previous promotion. */
408 || TYPE_PRECISION (unprom
->type
) == orig_precision
409 || TYPE_SIGN (unprom
->type
) == TYPE_SIGN (op_type
))
410 unprom
->set_op (op
, dt
, caster
);
411 /* Stop if we've already seen a promotion and if this
412 conversion does more than change the sign. */
413 else if (TYPE_PRECISION (op_type
)
414 != TYPE_PRECISION (unprom
->type
))
417 /* The sequence now extends to OP. */
421 /* See whether OP is defined by a cast. Record it as CASTER if
422 the cast is potentially vectorizable. */
425 if (dt
== vect_internal_def
)
427 caster
= vinfo_for_stmt (def_stmt
);
428 /* Ignore pattern statements, since we don't link uses for them. */
430 && !STMT_VINFO_RELATED_STMT (caster
)
431 && !has_single_use (res
))
432 *single_use_p
= false;
436 gassign
*assign
= dyn_cast
<gassign
*> (def_stmt
);
437 if (!assign
|| !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt
)))
440 /* Continue with the input to the cast. */
441 op
= gimple_assign_rhs1 (def_stmt
);
442 op_type
= TREE_TYPE (op
);
447 /* OP is an integer operand to an operation that returns TYPE, and we
448 want to treat the operation as a widening one. So far we can treat
449 it as widening from *COMMON_TYPE.
451 Return true if OP is suitable for such a widening operation,
452 either widening from *COMMON_TYPE or from some supertype of it.
453 Update *COMMON_TYPE to the supertype in the latter case.
455 SHIFT_P is true if OP is a shift amount. */
458 vect_joust_widened_integer (tree type
, bool shift_p
, tree op
,
461 /* Calculate the minimum precision required by OP, without changing
462 the sign of either operand. */
463 unsigned int precision
;
466 if (!wi::leu_p (wi::to_widest (op
), TYPE_PRECISION (type
) / 2))
468 precision
= TREE_INT_CST_LOW (op
);
472 precision
= wi::min_precision (wi::to_widest (op
),
473 TYPE_SIGN (*common_type
));
474 if (precision
* 2 > TYPE_PRECISION (type
))
478 /* If OP requires a wider type, switch to that type. The checks
479 above ensure that this is still narrower than the result. */
480 precision
= vect_element_precision (precision
);
481 if (TYPE_PRECISION (*common_type
) < precision
)
482 *common_type
= build_nonstandard_integer_type
483 (precision
, TYPE_UNSIGNED (*common_type
));
487 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
488 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
491 vect_joust_widened_type (tree type
, tree new_type
, tree
*common_type
)
493 if (types_compatible_p (*common_type
, new_type
))
496 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
497 if ((TYPE_PRECISION (new_type
) < TYPE_PRECISION (*common_type
))
498 && (TYPE_UNSIGNED (new_type
) || !TYPE_UNSIGNED (*common_type
)))
501 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
502 if (TYPE_PRECISION (*common_type
) < TYPE_PRECISION (new_type
)
503 && (TYPE_UNSIGNED (*common_type
) || !TYPE_UNSIGNED (new_type
)))
505 *common_type
= new_type
;
509 /* We have mismatched signs, with the signed type being
510 no wider than the unsigned type. In this case we need
511 a wider signed type. */
512 unsigned int precision
= MAX (TYPE_PRECISION (*common_type
),
513 TYPE_PRECISION (new_type
));
515 if (precision
* 2 > TYPE_PRECISION (type
))
518 *common_type
= build_nonstandard_integer_type (precision
, false);
522 /* Check whether STMT_INFO can be viewed as a tree of integer operations
523 in which each node either performs CODE or WIDENED_CODE, and where
524 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
525 specifies the maximum number of leaf operands. SHIFT_P says whether
526 CODE and WIDENED_CODE are some sort of shift.
528 If STMT_INFO is such a tree, return the number of leaf operands
529 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
530 to a type that (a) is narrower than the result of STMT_INFO and
531 (b) can hold all leaf operand values.
533 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
537 vect_widened_op_tree (stmt_vec_info stmt_info
, tree_code code
,
538 tree_code widened_code
, bool shift_p
,
539 unsigned int max_nops
,
540 vect_unpromoted_value
*unprom
, tree
*common_type
)
542 /* Check for an integer operation with the right code. */
543 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
547 tree_code rhs_code
= gimple_assign_rhs_code (assign
);
548 if (rhs_code
!= code
&& rhs_code
!= widened_code
)
551 tree type
= gimple_expr_type (assign
);
552 if (!INTEGRAL_TYPE_P (type
))
555 /* Assume that both operands will be leaf operands. */
558 /* Check the operands. */
559 unsigned int next_op
= 0;
560 for (unsigned int i
= 0; i
< 2; ++i
)
562 vect_unpromoted_value
*this_unprom
= &unprom
[next_op
];
563 unsigned int nops
= 1;
564 tree op
= gimple_op (assign
, i
+ 1);
565 if (i
== 1 && TREE_CODE (op
) == INTEGER_CST
)
567 /* We already have a common type from earlier operands.
568 Update it to account for OP. */
569 this_unprom
->set_op (op
, vect_constant_def
);
570 if (!vect_joust_widened_integer (type
, shift_p
, op
, common_type
))
575 /* Only allow shifts by constants. */
576 if (shift_p
&& i
== 1)
579 if (!vect_look_through_possible_promotion (stmt_info
->vinfo
, op
,
583 if (TYPE_PRECISION (this_unprom
->type
) == TYPE_PRECISION (type
))
585 /* The operand isn't widened. If STMT_INFO has the code
586 for an unwidened operation, recursively check whether
587 this operand is a node of the tree. */
590 || this_unprom
->dt
!= vect_internal_def
)
593 /* Give back the leaf slot allocated above now that we're
594 not treating this as a leaf operand. */
597 /* Recursively process the definition of the operand. */
598 stmt_vec_info def_stmt_info
599 = vinfo_for_stmt (SSA_NAME_DEF_STMT (this_unprom
->op
));
600 nops
= vect_widened_op_tree (def_stmt_info
, code
, widened_code
,
601 shift_p
, max_nops
, this_unprom
,
610 /* Make sure that the operand is narrower than the result. */
611 if (TYPE_PRECISION (this_unprom
->type
) * 2
612 > TYPE_PRECISION (type
))
615 /* Update COMMON_TYPE for the new operand. */
617 *common_type
= this_unprom
->type
;
618 else if (!vect_joust_widened_type (type
, this_unprom
->type
,
628 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
629 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
632 vect_recog_temp_ssa_var (tree type
, gimple
*stmt
)
634 return make_temp_ssa_name (type
, stmt
, "patt");
637 /* STMT2_INFO describes a type conversion that could be split into STMT1
638 followed by a version of STMT2_INFO that takes NEW_RHS as its first
639 input. Try to do this using pattern statements, returning true on
643 vect_split_statement (stmt_vec_info stmt2_info
, tree new_rhs
,
644 gimple
*stmt1
, tree vectype
)
646 if (is_pattern_stmt_p (stmt2_info
))
648 /* STMT2_INFO is part of a pattern. Get the statement to which
649 the pattern is attached. */
650 stmt_vec_info orig_stmt2_info
651 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt2_info
));
652 vect_init_pattern_stmt (stmt1
, orig_stmt2_info
, vectype
);
654 if (dump_enabled_p ())
656 dump_printf_loc (MSG_NOTE
, vect_location
,
657 "Splitting pattern statement: ");
658 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt2_info
->stmt
, 0);
661 /* Since STMT2_INFO is a pattern statement, we can change it
662 in-situ without worrying about changing the code for the
664 gimple_assign_set_rhs1 (stmt2_info
->stmt
, new_rhs
);
666 if (dump_enabled_p ())
668 dump_printf_loc (MSG_NOTE
, vect_location
, "into: ");
669 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt1
, 0);
670 dump_printf_loc (MSG_NOTE
, vect_location
, "and: ");
671 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt2_info
->stmt
, 0);
674 gimple_seq
*def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info
);
675 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info
) == stmt2_info
->stmt
)
676 /* STMT2_INFO is the actual pattern statement. Add STMT1
677 to the end of the definition sequence. */
678 gimple_seq_add_stmt_without_update (def_seq
, stmt1
);
681 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
683 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt2_info
->stmt
, def_seq
);
684 gsi_insert_before_without_update (&gsi
, stmt1
, GSI_SAME_STMT
);
690 /* STMT2_INFO doesn't yet have a pattern. Try to create a
691 two-statement pattern now. */
692 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info
));
693 tree lhs_type
= TREE_TYPE (gimple_get_lhs (stmt2_info
->stmt
));
694 tree lhs_vectype
= get_vectype_for_scalar_type (lhs_type
);
698 if (dump_enabled_p ())
700 dump_printf_loc (MSG_NOTE
, vect_location
,
701 "Splitting statement: ");
702 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt2_info
->stmt
, 0);
705 /* Add STMT1 as a singleton pattern definition sequence. */
706 gimple_seq
*def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info
);
707 vect_init_pattern_stmt (stmt1
, stmt2_info
, vectype
);
708 gimple_seq_add_stmt_without_update (def_seq
, stmt1
);
710 /* Build the second of the two pattern statements. */
711 tree new_lhs
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
712 gassign
*new_stmt2
= gimple_build_assign (new_lhs
, NOP_EXPR
, new_rhs
);
713 vect_set_pattern_stmt (new_stmt2
, stmt2_info
, lhs_vectype
);
715 if (dump_enabled_p ())
717 dump_printf_loc (MSG_NOTE
, vect_location
,
718 "into pattern statements: ");
719 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt1
, 0);
720 dump_printf_loc (MSG_NOTE
, vect_location
, "and: ");
721 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt2
, 0);
728 /* Convert UNPROM to TYPE and return the result, adding new statements
729 to STMT_INFO's pattern definition statements if no better way is
730 available. VECTYPE is the vector form of TYPE. */
733 vect_convert_input (stmt_vec_info stmt_info
, tree type
,
734 vect_unpromoted_value
*unprom
, tree vectype
)
736 /* Check for a no-op conversion. */
737 if (types_compatible_p (type
, TREE_TYPE (unprom
->op
)))
740 /* Allow the caller to create constant vect_unpromoted_values. */
741 if (TREE_CODE (unprom
->op
) == INTEGER_CST
)
742 return wide_int_to_tree (type
, wi::to_widest (unprom
->op
));
744 /* See if we can reuse an existing result. */
747 tree lhs
= gimple_get_lhs (unprom
->caster
->stmt
);
748 if (types_compatible_p (TREE_TYPE (lhs
), type
))
752 /* We need a new conversion statement. */
753 tree new_op
= vect_recog_temp_ssa_var (type
, NULL
);
754 gassign
*new_stmt
= gimple_build_assign (new_op
, NOP_EXPR
, unprom
->op
);
756 /* If the operation is the input to a vectorizable cast, try splitting
757 that cast into two, taking the required result as a mid-way point. */
760 tree lhs
= gimple_get_lhs (unprom
->caster
->stmt
);
761 if (TYPE_PRECISION (TREE_TYPE (lhs
)) > TYPE_PRECISION (type
)
762 && TYPE_PRECISION (type
) > TYPE_PRECISION (unprom
->type
)
763 && (TYPE_UNSIGNED (unprom
->type
) || !TYPE_UNSIGNED (type
))
764 && vect_split_statement (unprom
->caster
, new_op
, new_stmt
, vectype
))
768 /* If OP is an external value, see if we can insert the new statement
769 on an incoming edge. */
770 if (unprom
->dt
== vect_external_def
)
771 if (edge e
= vect_get_external_def_edge (stmt_info
->vinfo
, unprom
->op
))
773 basic_block new_bb
= gsi_insert_on_edge_immediate (e
, new_stmt
);
774 gcc_assert (!new_bb
);
778 /* As a (common) last resort, add the statement to the pattern itself. */
779 append_pattern_def_seq (stmt_info
, new_stmt
, vectype
);
783 /* Invoke vect_convert_input for N elements of UNPROM and store the
784 result in the corresponding elements of RESULT. */
787 vect_convert_inputs (stmt_vec_info stmt_info
, unsigned int n
,
788 tree
*result
, tree type
, vect_unpromoted_value
*unprom
,
791 for (unsigned int i
= 0; i
< n
; ++i
)
794 for (j
= 0; j
< i
; ++j
)
795 if (unprom
[j
].op
== unprom
[i
].op
)
798 result
[i
] = result
[j
];
800 result
[i
] = vect_convert_input (stmt_info
, type
, &unprom
[i
], vectype
);
804 /* The caller has created a (possibly empty) sequence of pattern definition
805 statements followed by a single statement PATTERN_STMT. Cast the result
806 of this final statement to TYPE. If a new statement is needed, add
807 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
808 and return the new statement, otherwise return PATTERN_STMT as-is.
809 VECITYPE is the vector form of PATTERN_STMT's result type. */
812 vect_convert_output (stmt_vec_info stmt_info
, tree type
, gimple
*pattern_stmt
,
815 tree lhs
= gimple_get_lhs (pattern_stmt
);
816 if (!types_compatible_p (type
, TREE_TYPE (lhs
)))
818 append_pattern_def_seq (stmt_info
, pattern_stmt
, vecitype
);
819 tree cast_var
= vect_recog_temp_ssa_var (type
, NULL
);
820 pattern_stmt
= gimple_build_assign (cast_var
, NOP_EXPR
, lhs
);
825 /* Return true if STMT_VINFO describes a reduction for which reassociation
826 is allowed. If STMT_INFO is part of a group, assume that it's part of
827 a reduction chain and optimistically assume that all statements
828 except the last allow reassociation. */
831 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo
)
833 return (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
834 ? STMT_VINFO_REDUC_TYPE (stmt_vinfo
) != FOLD_LEFT_REDUCTION
835 : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo
) != NULL
);
838 /* As above, but also require it to have code CODE and to be a reduction
839 in the outermost loop. When returning true, store the operands in
840 *OP0_OUT and *OP1_OUT. */
843 vect_reassociating_reduction_p (stmt_vec_info stmt_info
, tree_code code
,
844 tree
*op0_out
, tree
*op1_out
)
846 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
850 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
851 if (!assign
|| gimple_assign_rhs_code (assign
) != code
)
854 /* We don't allow changing the order of the computation in the inner-loop
855 when doing outer-loop vectorization. */
856 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
857 if (loop
&& nested_in_vect_loop_p (loop
, assign
))
860 if (!vect_reassociating_reduction_p (stmt_info
))
863 *op0_out
= gimple_assign_rhs1 (assign
);
864 *op1_out
= gimple_assign_rhs2 (assign
);
868 /* Function vect_recog_dot_prod_pattern
870 Try to find the following pattern:
876 sum_0 = phi <init, sum_1>
879 S3 x_T = (TYPE1) x_t;
880 S4 y_T = (TYPE1) y_t;
882 [S6 prod = (TYPE2) prod; #optional]
883 S7 sum_1 = prod + sum_0;
885 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
886 same size of 'TYPE1' or bigger. This is a special case of a reduction
891 * STMTS: Contains a stmt from which the pattern search begins. In the
892 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
897 * TYPE_OUT: The type of the output of this pattern.
899 * Return value: A new stmt that will be used to replace the sequence of
900 stmts that constitute the pattern. In this case it will be:
901 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
903 Note: The dot-prod idiom is a widening reduction pattern that is
904 vectorized without preserving all the intermediate results. It
905 produces only N/2 (widened) results (by summing up pairs of
906 intermediate results) rather than all N results. Therefore, we
907 cannot allow this pattern when we want to get all the results and in
908 the correct order (as is the case when this computation is in an
909 inner-loop nested in an outer-loop that us being vectorized). */
912 vect_recog_dot_prod_pattern (vec
<gimple
*> *stmts
, tree
*type_out
)
914 gimple
*last_stmt
= (*stmts
)[0];
916 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
917 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
918 tree type
, half_type
;
919 gimple
*pattern_stmt
;
922 /* Look for the following pattern
926 DDPROD = (TYPE2) DPROD;
927 sum_1 = DDPROD + sum_0;
929 - DX is double the size of X
930 - DY is double the size of Y
931 - DX, DY, DPROD all have the same type
932 - sum is the same size of DPROD or bigger
933 - sum has been recognized as a reduction variable.
935 This is equivalent to:
936 DPROD = X w* Y; #widen mult
937 sum_1 = DPROD w+ sum_0; #widen summation
939 DPROD = X w* Y; #widen mult
940 sum_1 = DPROD + sum_0; #summation
943 /* Starting from LAST_STMT, follow the defs of its uses in search
944 of the above pattern. */
946 if (!vect_reassociating_reduction_p (stmt_vinfo
, PLUS_EXPR
,
950 type
= gimple_expr_type (last_stmt
);
952 vect_unpromoted_value unprom_mult
;
953 oprnd0
= vect_look_through_possible_promotion (vinfo
, oprnd0
, &unprom_mult
);
955 /* So far so good. Since last_stmt was detected as a (summation) reduction,
956 we know that oprnd1 is the reduction variable (defined by a loop-header
957 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
958 Left to check that oprnd0 is defined by a (widen_)mult_expr */
962 stmt_vec_info mult_vinfo
= vect_get_internal_def (vinfo
, oprnd0
);
966 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
967 inside the loop (in case we are analyzing an outer-loop). */
968 vect_unpromoted_value unprom0
[2];
969 if (!vect_widened_op_tree (mult_vinfo
, MULT_EXPR
, WIDEN_MULT_EXPR
,
970 false, 2, unprom0
, &half_type
))
973 /* If there are two widening operations, make sure they agree on
974 the sign of the extension. */
975 if (TYPE_PRECISION (unprom_mult
.type
) != TYPE_PRECISION (type
)
976 && TYPE_SIGN (unprom_mult
.type
) != TYPE_SIGN (half_type
))
979 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt
);
982 if (!vect_supportable_direct_optab_p (type
, DOT_PROD_EXPR
, half_type
,
983 type_out
, &half_vectype
))
986 /* Get the inputs in the appropriate types. */
987 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
989 vect_convert_inputs (stmt_vinfo
, 2, mult_oprnd
, half_type
,
990 unprom0
, half_vectype
);
992 var
= vect_recog_temp_ssa_var (type
, NULL
);
993 pattern_stmt
= gimple_build_assign (var
, DOT_PROD_EXPR
,
994 mult_oprnd
[0], mult_oprnd
[1], oprnd1
);
1000 /* Function vect_recog_sad_pattern
1002 Try to find the following Sum of Absolute Difference (SAD) pattern:
1005 signed TYPE1 diff, abs_diff;
1008 sum_0 = phi <init, sum_1>
1011 S3 x_T = (TYPE1) x_t;
1012 S4 y_T = (TYPE1) y_t;
1013 S5 diff = x_T - y_T;
1014 S6 abs_diff = ABS_EXPR <diff>;
1015 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1016 S8 sum_1 = abs_diff + sum_0;
1018 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1019 same size of 'TYPE1' or bigger. This is a special case of a reduction
1024 * STMTS: Contains a stmt from which the pattern search begins. In the
1025 example, when this function is called with S8, the pattern
1026 {S3,S4,S5,S6,S7,S8} will be detected.
1030 * TYPE_OUT: The type of the output of this pattern.
1032 * Return value: A new stmt that will be used to replace the sequence of
1033 stmts that constitute the pattern. In this case it will be:
1034 SAD_EXPR <x_t, y_t, sum_0>
1038 vect_recog_sad_pattern (vec
<gimple
*> *stmts
, tree
*type_out
)
1040 gimple
*last_stmt
= (*stmts
)[0];
1041 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1042 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
1045 /* Look for the following pattern
1049 DAD = ABS_EXPR <DDIFF>;
1050 DDPROD = (TYPE2) DPROD;
1051 sum_1 = DAD + sum_0;
1053 - DX is at least double the size of X
1054 - DY is at least double the size of Y
1055 - DX, DY, DDIFF, DAD all have the same type
1056 - sum is the same size of DAD or bigger
1057 - sum has been recognized as a reduction variable.
1059 This is equivalent to:
1060 DDIFF = X w- Y; #widen sub
1061 DAD = ABS_EXPR <DDIFF>;
1062 sum_1 = DAD w+ sum_0; #widen summation
1064 DDIFF = X w- Y; #widen sub
1065 DAD = ABS_EXPR <DDIFF>;
1066 sum_1 = DAD + sum_0; #summation
1069 /* Starting from LAST_STMT, follow the defs of its uses in search
1070 of the above pattern. */
1072 tree plus_oprnd0
, plus_oprnd1
;
1073 if (!vect_reassociating_reduction_p (stmt_vinfo
, PLUS_EXPR
,
1074 &plus_oprnd0
, &plus_oprnd1
))
1077 tree sum_type
= gimple_expr_type (last_stmt
);
1079 /* Any non-truncating sequence of conversions is OK here, since
1080 with a successful match, the result of the ABS(U) is known to fit
1081 within the nonnegative range of the result type. (It cannot be the
1082 negative of the minimum signed value due to the range of the widening
1084 vect_unpromoted_value unprom_abs
;
1085 plus_oprnd0
= vect_look_through_possible_promotion (vinfo
, plus_oprnd0
,
1088 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1089 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1090 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1091 Then check that plus_oprnd0 is defined by an abs_expr. */
1096 stmt_vec_info abs_stmt_vinfo
= vect_get_internal_def (vinfo
, plus_oprnd0
);
1097 if (!abs_stmt_vinfo
)
1100 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1101 inside the loop (in case we are analyzing an outer-loop). */
1102 gassign
*abs_stmt
= dyn_cast
<gassign
*> (abs_stmt_vinfo
->stmt
);
1104 || (gimple_assign_rhs_code (abs_stmt
) != ABS_EXPR
1105 && gimple_assign_rhs_code (abs_stmt
) != ABSU_EXPR
))
1108 tree abs_oprnd
= gimple_assign_rhs1 (abs_stmt
);
1109 tree abs_type
= TREE_TYPE (abs_oprnd
);
1110 if (TYPE_UNSIGNED (abs_type
))
1113 /* Peel off conversions from the ABS input. This can involve sign
1114 changes (e.g. from an unsigned subtraction to a signed ABS input)
1115 or signed promotion, but it can't include unsigned promotion.
1116 (Note that ABS of an unsigned promotion should have been folded
1117 away before now anyway.) */
1118 vect_unpromoted_value unprom_diff
;
1119 abs_oprnd
= vect_look_through_possible_promotion (vinfo
, abs_oprnd
,
1123 if (TYPE_PRECISION (unprom_diff
.type
) != TYPE_PRECISION (abs_type
)
1124 && TYPE_UNSIGNED (unprom_diff
.type
))
1127 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1128 stmt_vec_info diff_stmt_vinfo
= vect_get_internal_def (vinfo
, abs_oprnd
);
1129 if (!diff_stmt_vinfo
)
1132 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1133 inside the loop (in case we are analyzing an outer-loop). */
1134 vect_unpromoted_value unprom
[2];
1135 if (!vect_widened_op_tree (diff_stmt_vinfo
, MINUS_EXPR
, MINUS_EXPR
,
1136 false, 2, unprom
, &half_type
))
1139 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt
);
1142 if (!vect_supportable_direct_optab_p (sum_type
, SAD_EXPR
, half_type
,
1143 type_out
, &half_vectype
))
1146 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1147 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
1149 vect_convert_inputs (stmt_vinfo
, 2, sad_oprnd
, half_type
,
1150 unprom
, half_vectype
);
1152 tree var
= vect_recog_temp_ssa_var (sum_type
, NULL
);
1153 gimple
*pattern_stmt
= gimple_build_assign (var
, SAD_EXPR
, sad_oprnd
[0],
1154 sad_oprnd
[1], plus_oprnd1
);
1156 return pattern_stmt
;
1159 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1160 so that it can be treated as though it had the form:
1164 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1165 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1166 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1167 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1168 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1170 Try to replace the pattern with:
1174 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1175 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1176 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1177 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1179 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1181 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1182 name of the pattern being matched, for dump purposes. */
1185 vect_recog_widen_op_pattern (vec
<gimple
*> *stmts
, tree
*type_out
,
1186 tree_code orig_code
, tree_code wide_code
,
1187 bool shift_p
, const char *name
)
1189 gimple
*last_stmt
= stmts
->pop ();
1190 stmt_vec_info last_stmt_info
= vinfo_for_stmt (last_stmt
);
1192 vect_unpromoted_value unprom
[2];
1194 if (!vect_widened_op_tree (last_stmt_info
, orig_code
, orig_code
,
1195 shift_p
, 2, unprom
, &half_type
))
1198 /* Pattern detected. */
1199 vect_pattern_detected (name
, last_stmt
);
1201 tree type
= gimple_expr_type (last_stmt
);
1203 if (TYPE_PRECISION (type
) != TYPE_PRECISION (half_type
) * 2
1204 || TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type
))
1205 itype
= build_nonstandard_integer_type (TYPE_PRECISION (half_type
) * 2,
1206 TYPE_UNSIGNED (half_type
));
1208 /* Check target support */
1209 tree vectype
= get_vectype_for_scalar_type (half_type
);
1210 tree vecitype
= get_vectype_for_scalar_type (itype
);
1211 enum tree_code dummy_code
;
1213 auto_vec
<tree
> dummy_vec
;
1216 || !supportable_widening_operation (wide_code
, last_stmt
,
1218 &dummy_code
, &dummy_code
,
1219 &dummy_int
, &dummy_vec
))
1222 *type_out
= get_vectype_for_scalar_type (type
);
1226 STMT_VINFO_PATTERN_DEF_SEQ (last_stmt_info
) = NULL
;
1228 vect_convert_inputs (last_stmt_info
, 2, oprnd
, half_type
, unprom
, vectype
);
1230 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
1231 gimple
*pattern_stmt
= gimple_build_assign (var
, wide_code
,
1232 oprnd
[0], oprnd
[1]);
1234 stmts
->safe_push (last_stmt
);
1235 return vect_convert_output (last_stmt_info
, type
, pattern_stmt
, vecitype
);
1238 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1239 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1242 vect_recog_widen_mult_pattern (vec
<gimple
*> *stmts
, tree
*type_out
)
1244 return vect_recog_widen_op_pattern (stmts
, type_out
, MULT_EXPR
,
1245 WIDEN_MULT_EXPR
, false,
1246 "vect_recog_widen_mult_pattern");
1249 /* Function vect_recog_pow_pattern
1251 Try to find the following pattern:
1255 with POW being one of pow, powf, powi, powif and N being
1260 * LAST_STMT: A stmt from which the pattern search begins.
1264 * TYPE_OUT: The type of the output of this pattern.
1266 * Return value: A new stmt that will be used to replace the sequence of
1267 stmts that constitute the pattern. In this case it will be:
1274 vect_recog_pow_pattern (vec
<gimple
*> *stmts
, tree
*type_out
)
1276 gimple
*last_stmt
= (*stmts
)[0];
1281 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
1284 switch (gimple_call_combined_fn (last_stmt
))
1294 base
= gimple_call_arg (last_stmt
, 0);
1295 exp
= gimple_call_arg (last_stmt
, 1);
1296 if (TREE_CODE (exp
) != REAL_CST
1297 && TREE_CODE (exp
) != INTEGER_CST
)
1299 if (flag_unsafe_math_optimizations
1300 && TREE_CODE (base
) == REAL_CST
1301 && !gimple_call_internal_p (last_stmt
))
1303 combined_fn log_cfn
;
1304 built_in_function exp_bfn
;
1305 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt
)))
1308 log_cfn
= CFN_BUILT_IN_LOG
;
1309 exp_bfn
= BUILT_IN_EXP
;
1312 log_cfn
= CFN_BUILT_IN_LOGF
;
1313 exp_bfn
= BUILT_IN_EXPF
;
1316 log_cfn
= CFN_BUILT_IN_LOGL
;
1317 exp_bfn
= BUILT_IN_EXPL
;
1322 tree logc
= fold_const_call (log_cfn
, TREE_TYPE (base
), base
);
1323 tree exp_decl
= builtin_decl_implicit (exp_bfn
);
1324 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1325 does that, but if C is a power of 2, we want to use
1326 exp2 (log2 (C) * x) in the non-vectorized version, but for
1327 vectorization we don't have vectorized exp2. */
1329 && TREE_CODE (logc
) == REAL_CST
1331 && lookup_attribute ("omp declare simd",
1332 DECL_ATTRIBUTES (exp_decl
)))
1334 cgraph_node
*node
= cgraph_node::get_create (exp_decl
);
1335 if (node
->simd_clones
== NULL
)
1337 if (targetm
.simd_clone
.compute_vecsize_and_simdlen
== NULL
1338 || node
->definition
)
1340 expand_simd_clones (node
);
1341 if (node
->simd_clones
== NULL
)
1344 *type_out
= get_vectype_for_scalar_type (TREE_TYPE (base
));
1347 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1348 tree def
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1349 gimple
*g
= gimple_build_assign (def
, MULT_EXPR
, exp
, logc
);
1350 new_pattern_def_seq (stmt_vinfo
, g
);
1351 tree res
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1352 g
= gimple_build_call (exp_decl
, 1, def
);
1353 gimple_call_set_lhs (g
, res
);
1361 /* We now have a pow or powi builtin function call with a constant
1364 /* Catch squaring. */
1365 if ((tree_fits_shwi_p (exp
)
1366 && tree_to_shwi (exp
) == 2)
1367 || (TREE_CODE (exp
) == REAL_CST
1368 && real_equal (&TREE_REAL_CST (exp
), &dconst2
)))
1370 if (!vect_supportable_direct_optab_p (TREE_TYPE (base
), MULT_EXPR
,
1371 TREE_TYPE (base
), type_out
))
1374 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1375 stmt
= gimple_build_assign (var
, MULT_EXPR
, base
, base
);
1379 /* Catch square root. */
1380 if (TREE_CODE (exp
) == REAL_CST
1381 && real_equal (&TREE_REAL_CST (exp
), &dconsthalf
))
1383 *type_out
= get_vectype_for_scalar_type (TREE_TYPE (base
));
1385 && direct_internal_fn_supported_p (IFN_SQRT
, *type_out
,
1386 OPTIMIZE_FOR_SPEED
))
1388 gcall
*stmt
= gimple_build_call_internal (IFN_SQRT
, 1, base
);
1389 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
1390 gimple_call_set_lhs (stmt
, var
);
1391 gimple_call_set_nothrow (stmt
, true);
1400 /* Function vect_recog_widen_sum_pattern
1402 Try to find the following pattern:
1405 TYPE x_T, sum = init;
1407 sum_0 = phi <init, sum_1>
1409 S2 x_T = (TYPE) x_t;
1410 S3 sum_1 = x_T + sum_0;
1412 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1413 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1414 a special case of a reduction computation.
1418 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1419 when this function is called with S3, the pattern {S2,S3} will be detected.
1423 * TYPE_OUT: The type of the output of this pattern.
1425 * Return value: A new stmt that will be used to replace the sequence of
1426 stmts that constitute the pattern. In this case it will be:
1427 WIDEN_SUM <x_t, sum_0>
1429 Note: The widening-sum idiom is a widening reduction pattern that is
1430 vectorized without preserving all the intermediate results. It
1431 produces only N/2 (widened) results (by summing up pairs of
1432 intermediate results) rather than all N results. Therefore, we
1433 cannot allow this pattern when we want to get all the results and in
1434 the correct order (as is the case when this computation is in an
1435 inner-loop nested in an outer-loop that us being vectorized). */
1438 vect_recog_widen_sum_pattern (vec
<gimple
*> *stmts
, tree
*type_out
)
1440 gimple
*last_stmt
= (*stmts
)[0];
1441 tree oprnd0
, oprnd1
;
1442 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1443 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
1445 gimple
*pattern_stmt
;
1448 /* Look for the following pattern
1451 In which DX is at least double the size of X, and sum_1 has been
1452 recognized as a reduction variable.
1455 /* Starting from LAST_STMT, follow the defs of its uses in search
1456 of the above pattern. */
1458 if (!vect_reassociating_reduction_p (stmt_vinfo
, PLUS_EXPR
,
1462 type
= gimple_expr_type (last_stmt
);
1464 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1465 we know that oprnd1 is the reduction variable (defined by a loop-header
1466 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1467 Left to check that oprnd0 is defined by a cast from type 'type' to type
1470 vect_unpromoted_value unprom0
;
1471 if (!vect_look_through_possible_promotion (vinfo
, oprnd0
, &unprom0
)
1472 || TYPE_PRECISION (unprom0
.type
) * 2 > TYPE_PRECISION (type
))
1475 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt
);
1477 if (!vect_supportable_direct_optab_p (type
, WIDEN_SUM_EXPR
, unprom0
.type
,
1481 var
= vect_recog_temp_ssa_var (type
, NULL
);
1482 pattern_stmt
= gimple_build_assign (var
, WIDEN_SUM_EXPR
, unprom0
.op
, oprnd1
);
1484 return pattern_stmt
;
1487 /* Recognize cases in which an operation is performed in one type WTYPE
1488 but could be done more efficiently in a narrower type NTYPE. For example,
1491 ATYPE a; // narrower than NTYPE
1492 BTYPE b; // narrower than NTYPE
1493 WTYPE aw = (WTYPE) a;
1494 WTYPE bw = (WTYPE) b;
1495 WTYPE res = aw + bw; // only uses of aw and bw
1497 then it would be more efficient to do:
1499 NTYPE an = (NTYPE) a;
1500 NTYPE bn = (NTYPE) b;
1501 NTYPE resn = an + bn;
1502 WTYPE res = (WTYPE) resn;
1504 Other situations include things like:
1506 ATYPE a; // NTYPE or narrower
1507 WTYPE aw = (WTYPE) a;
1510 when only "(NTYPE) res" is significant. In that case it's more efficient
1511 to truncate "b" and do the operation on NTYPE instead:
1513 NTYPE an = (NTYPE) a;
1514 NTYPE bn = (NTYPE) b; // truncation
1515 NTYPE resn = an + bn;
1516 WTYPE res = (WTYPE) resn;
1518 All users of "res" should then use "resn" instead, making the final
1519 statement dead (not marked as relevant). The final statement is still
1520 needed to maintain the type correctness of the IR.
1522 vect_determine_precisions has already determined the minimum
1523 precison of the operation and the minimum precision required
1524 by users of the result. */
1527 vect_recog_over_widening_pattern (vec
<gimple
*> *stmts
, tree
*type_out
)
1529 gassign
*last_stmt
= dyn_cast
<gassign
*> (stmts
->pop ());
1533 /* See whether we have found that this operation can be done on a
1534 narrower type without changing its semantics. */
1535 stmt_vec_info last_stmt_info
= vinfo_for_stmt (last_stmt
);
1536 unsigned int new_precision
= last_stmt_info
->operation_precision
;
1540 vec_info
*vinfo
= last_stmt_info
->vinfo
;
1541 tree lhs
= gimple_assign_lhs (last_stmt
);
1542 tree type
= TREE_TYPE (lhs
);
1543 tree_code code
= gimple_assign_rhs_code (last_stmt
);
1545 /* Keep the first operand of a COND_EXPR as-is: only the other two
1546 operands are interesting. */
1547 unsigned int first_op
= (code
== COND_EXPR
? 2 : 1);
1549 /* Check the operands. */
1550 unsigned int nops
= gimple_num_ops (last_stmt
) - first_op
;
1551 auto_vec
<vect_unpromoted_value
, 3> unprom (nops
);
1552 unprom
.quick_grow (nops
);
1553 unsigned int min_precision
= 0;
1554 bool single_use_p
= false;
1555 for (unsigned int i
= 0; i
< nops
; ++i
)
1557 tree op
= gimple_op (last_stmt
, first_op
+ i
);
1558 if (TREE_CODE (op
) == INTEGER_CST
)
1559 unprom
[i
].set_op (op
, vect_constant_def
);
1560 else if (TREE_CODE (op
) == SSA_NAME
)
1562 bool op_single_use_p
= true;
1563 if (!vect_look_through_possible_promotion (vinfo
, op
, &unprom
[i
],
1568 (1) N bits of the result are needed;
1569 (2) all inputs are widened from M<N bits; and
1570 (3) one operand OP is a single-use SSA name
1572 we can shift the M->N widening from OP to the output
1573 without changing the number or type of extensions involved.
1574 This then reduces the number of copies of STMT_INFO.
1576 If instead of (3) more than one operand is a single-use SSA name,
1577 shifting the extension to the output is even more of a win.
1581 (1) N bits of the result are needed;
1582 (2) one operand OP2 is widened from M2<N bits;
1583 (3) another operand OP1 is widened from M1<M2 bits; and
1584 (4) both OP1 and OP2 are single-use
1586 the choice is between:
1588 (a) truncating OP2 to M1, doing the operation on M1,
1589 and then widening the result to N
1591 (b) widening OP1 to M2, doing the operation on M2, and then
1592 widening the result to N
1594 Both shift the M2->N widening of the inputs to the output.
1595 (a) additionally shifts the M1->M2 widening to the output;
1596 it requires fewer copies of STMT_INFO but requires an extra
1599 Which is better will depend on the complexity and cost of
1600 STMT_INFO, which is hard to predict at this stage. However,
1601 a clear tie-breaker in favor of (b) is the fact that the
1602 truncation in (a) increases the length of the operation chain.
1604 If instead of (4) only one of OP1 or OP2 is single-use,
1605 (b) is still a win over doing the operation in N bits:
1606 it still shifts the M2->N widening on the single-use operand
1607 to the output and reduces the number of STMT_INFO copies.
1609 If neither operand is single-use then operating on fewer than
1610 N bits might lead to more extensions overall. Whether it does
1611 or not depends on global information about the vectorization
1612 region, and whether that's a good trade-off would again
1613 depend on the complexity and cost of the statements involved,
1614 as well as things like register pressure that are not normally
1615 modelled at this stage. We therefore ignore these cases
1616 and just optimize the clear single-use wins above.
1618 Thus we take the maximum precision of the unpromoted operands
1619 and record whether any operand is single-use. */
1620 if (unprom
[i
].dt
== vect_internal_def
)
1622 min_precision
= MAX (min_precision
,
1623 TYPE_PRECISION (unprom
[i
].type
));
1624 single_use_p
|= op_single_use_p
;
1629 /* Although the operation could be done in operation_precision, we have
1630 to balance that against introducing extra truncations or extensions.
1631 Calculate the minimum precision that can be handled efficiently.
1633 The loop above determined that the operation could be handled
1634 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1635 extension from the inputs to the output without introducing more
1636 instructions, and would reduce the number of instructions required
1637 for STMT_INFO itself.
1639 vect_determine_precisions has also determined that the result only
1640 needs min_output_precision bits. Truncating by a factor of N times
1641 requires a tree of N - 1 instructions, so if TYPE is N times wider
1642 than min_output_precision, doing the operation in TYPE and truncating
1643 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1646 - truncating the input to a unary operation and doing the operation
1647 in the new type requires at most N - 1 + 1 = N instructions per
1650 - doing the same for a binary operation requires at most
1651 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1653 Both unary and binary operations require fewer instructions than
1654 this if the operands were extended from a suitable truncated form.
1655 Thus there is usually nothing to lose by doing operations in
1656 min_output_precision bits, but there can be something to gain. */
1658 min_precision
= last_stmt_info
->min_output_precision
;
1660 min_precision
= MIN (min_precision
, last_stmt_info
->min_output_precision
);
1662 /* Apply the minimum efficient precision we just calculated. */
1663 if (new_precision
< min_precision
)
1664 new_precision
= min_precision
;
1665 if (new_precision
>= TYPE_PRECISION (type
))
1668 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt
);
1670 *type_out
= get_vectype_for_scalar_type (type
);
1674 /* We've found a viable pattern. Get the new type of the operation. */
1675 bool unsigned_p
= (last_stmt_info
->operation_sign
== UNSIGNED
);
1676 tree new_type
= build_nonstandard_integer_type (new_precision
, unsigned_p
);
1678 /* We specifically don't check here whether the target supports the
1679 new operation, since it might be something that a later pattern
1680 wants to rewrite anyway. If targets have a minimum element size
1681 for some optabs, we should pattern-match smaller ops to larger ops
1682 where beneficial. */
1683 tree new_vectype
= get_vectype_for_scalar_type (new_type
);
1687 if (dump_enabled_p ())
1689 dump_printf_loc (MSG_NOTE
, vect_location
, "demoting ");
1690 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, type
);
1691 dump_printf (MSG_NOTE
, " to ");
1692 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, new_type
);
1693 dump_printf (MSG_NOTE
, "\n");
1696 /* Calculate the rhs operands for an operation on NEW_TYPE. */
1697 STMT_VINFO_PATTERN_DEF_SEQ (last_stmt_info
) = NULL
;
1699 for (unsigned int i
= 1; i
< first_op
; ++i
)
1700 ops
[i
- 1] = gimple_op (last_stmt
, i
);
1701 vect_convert_inputs (last_stmt_info
, nops
, &ops
[first_op
- 1],
1702 new_type
, &unprom
[0], new_vectype
);
1704 /* Use the operation to produce a result of type NEW_TYPE. */
1705 tree new_var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1706 gimple
*pattern_stmt
= gimple_build_assign (new_var
, code
,
1707 ops
[0], ops
[1], ops
[2]);
1708 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
1710 if (dump_enabled_p ())
1712 dump_printf_loc (MSG_NOTE
, vect_location
,
1713 "created pattern stmt: ");
1714 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1717 pattern_stmt
= vect_convert_output (last_stmt_info
, type
,
1718 pattern_stmt
, new_vectype
);
1720 stmts
->safe_push (last_stmt
);
1721 return pattern_stmt
;
1724 /* Recognize cases in which the input to a cast is wider than its
1725 output, and the input is fed by a widening operation. Fold this
1726 by removing the unnecessary intermediate widening. E.g.:
1729 unsigned int b = (unsigned int) a;
1730 unsigned short c = (unsigned short) b;
1734 unsigned short c = (unsigned short) a;
1736 Although this is rare in input IR, it is an expected side-effect
1737 of the over-widening pattern above.
1739 This is beneficial also for integer-to-float conversions, if the
1740 widened integer has more bits than the float, and if the unwidened
1744 vect_recog_cast_forwprop_pattern (vec
<gimple
*> *stmts
, tree
*type_out
)
1746 /* Check for a cast, including an integer-to-float conversion. */
1747 gassign
*last_stmt
= dyn_cast
<gassign
*> (stmts
->pop ());
1750 tree_code code
= gimple_assign_rhs_code (last_stmt
);
1751 if (!CONVERT_EXPR_CODE_P (code
) && code
!= FLOAT_EXPR
)
1754 /* Make sure that the rhs is a scalar with a natural bitsize. */
1755 tree lhs
= gimple_assign_lhs (last_stmt
);
1758 tree lhs_type
= TREE_TYPE (lhs
);
1759 scalar_mode lhs_mode
;
1760 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type
)
1761 || !is_a
<scalar_mode
> (TYPE_MODE (lhs_type
), &lhs_mode
))
1764 /* Check for a narrowing operation (from a vector point of view). */
1765 tree rhs
= gimple_assign_rhs1 (last_stmt
);
1766 tree rhs_type
= TREE_TYPE (rhs
);
1767 if (!INTEGRAL_TYPE_P (rhs_type
)
1768 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type
)
1769 || TYPE_PRECISION (rhs_type
) <= GET_MODE_BITSIZE (lhs_mode
))
1772 /* Try to find an unpromoted input. */
1773 stmt_vec_info last_stmt_info
= vinfo_for_stmt (last_stmt
);
1774 vec_info
*vinfo
= last_stmt_info
->vinfo
;
1775 vect_unpromoted_value unprom
;
1776 if (!vect_look_through_possible_promotion (vinfo
, rhs
, &unprom
)
1777 || TYPE_PRECISION (unprom
.type
) >= TYPE_PRECISION (rhs_type
))
1780 /* If the bits above RHS_TYPE matter, make sure that they're the
1781 same when extending from UNPROM as they are when extending from RHS. */
1782 if (!INTEGRAL_TYPE_P (lhs_type
)
1783 && TYPE_SIGN (rhs_type
) != TYPE_SIGN (unprom
.type
))
1786 /* We can get the same result by casting UNPROM directly, to avoid
1787 the unnecessary widening and narrowing. */
1788 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt
);
1790 *type_out
= get_vectype_for_scalar_type (lhs_type
);
1794 tree new_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
1795 gimple
*pattern_stmt
= gimple_build_assign (new_var
, code
, unprom
.op
);
1796 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
1798 stmts
->safe_push (last_stmt
);
1799 return pattern_stmt
;
1802 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
1803 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
1806 vect_recog_widen_shift_pattern (vec
<gimple
*> *stmts
, tree
*type_out
)
1808 return vect_recog_widen_op_pattern (stmts
, type_out
, LSHIFT_EXPR
,
1809 WIDEN_LSHIFT_EXPR
, true,
1810 "vect_recog_widen_shift_pattern");
1813 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1817 S0 a_t = b_t r<< c_t;
1821 * STMTS: Contains a stmt from which the pattern search begins,
1822 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1826 S2 e_t = d_t & (B - 1);
1827 S3 f_t = b_t << c_t;
1828 S4 g_t = b_t >> e_t;
1831 where B is element bitsize of type.
1835 * TYPE_OUT: The type of the output of this pattern.
1837 * Return value: A new stmt that will be used to replace the rotate
1841 vect_recog_rotate_pattern (vec
<gimple
*> *stmts
, tree
*type_out
)
1843 gimple
*last_stmt
= stmts
->pop ();
1844 tree oprnd0
, oprnd1
, lhs
, var
, var1
, var2
, vectype
, type
, stype
, def
, def2
;
1845 gimple
*pattern_stmt
, *def_stmt
;
1846 enum tree_code rhs_code
;
1847 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1848 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
1849 enum vect_def_type dt
;
1850 optab optab1
, optab2
;
1851 edge ext_def
= NULL
;
1853 if (!is_gimple_assign (last_stmt
))
1856 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1866 lhs
= gimple_assign_lhs (last_stmt
);
1867 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1868 type
= TREE_TYPE (oprnd0
);
1869 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1870 if (TREE_CODE (oprnd0
) != SSA_NAME
1871 || TYPE_PRECISION (TREE_TYPE (lhs
)) != TYPE_PRECISION (type
)
1872 || !INTEGRAL_TYPE_P (type
)
1873 || !TYPE_UNSIGNED (type
))
1876 if (!vect_is_simple_use (oprnd1
, vinfo
, &dt
, &def_stmt
))
1879 if (dt
!= vect_internal_def
1880 && dt
!= vect_constant_def
1881 && dt
!= vect_external_def
)
1884 vectype
= get_vectype_for_scalar_type (type
);
1885 if (vectype
== NULL_TREE
)
1888 /* If vector/vector or vector/scalar rotate is supported by the target,
1889 don't do anything here. */
1890 optab1
= optab_for_tree_code (rhs_code
, vectype
, optab_vector
);
1892 && optab_handler (optab1
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1895 if (is_a
<bb_vec_info
> (vinfo
) || dt
!= vect_internal_def
)
1897 optab2
= optab_for_tree_code (rhs_code
, vectype
, optab_scalar
);
1899 && optab_handler (optab2
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1903 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1904 don't do anything here either. */
1905 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_vector
);
1906 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_vector
);
1908 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1910 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1912 if (! is_a
<bb_vec_info
> (vinfo
) && dt
== vect_internal_def
)
1914 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_scalar
);
1915 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_scalar
);
1917 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1919 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1923 *type_out
= vectype
;
1925 if (dt
== vect_external_def
1926 && TREE_CODE (oprnd1
) == SSA_NAME
)
1927 ext_def
= vect_get_external_def_edge (vinfo
, oprnd1
);
1930 scalar_int_mode mode
= SCALAR_INT_TYPE_MODE (type
);
1931 if (TREE_CODE (oprnd1
) == INTEGER_CST
1932 || TYPE_MODE (TREE_TYPE (oprnd1
)) == mode
)
1934 else if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
1936 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
1937 if (TYPE_MODE (TREE_TYPE (rhs1
)) == mode
1938 && TYPE_PRECISION (TREE_TYPE (rhs1
))
1939 == TYPE_PRECISION (type
))
1943 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
1944 if (def
== NULL_TREE
)
1946 def
= vect_recog_temp_ssa_var (type
, NULL
);
1947 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
1951 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1952 gcc_assert (!new_bb
);
1955 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1957 stype
= TREE_TYPE (def
);
1958 scalar_int_mode smode
= SCALAR_INT_TYPE_MODE (stype
);
1960 if (TREE_CODE (def
) == INTEGER_CST
)
1962 if (!tree_fits_uhwi_p (def
)
1963 || tree_to_uhwi (def
) >= GET_MODE_PRECISION (mode
)
1964 || integer_zerop (def
))
1966 def2
= build_int_cst (stype
,
1967 GET_MODE_PRECISION (mode
) - tree_to_uhwi (def
));
1971 tree vecstype
= get_vectype_for_scalar_type (stype
);
1972 stmt_vec_info def_stmt_vinfo
;
1974 if (vecstype
== NULL_TREE
)
1976 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1977 def_stmt
= gimple_build_assign (def2
, NEGATE_EXPR
, def
);
1981 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1982 gcc_assert (!new_bb
);
1986 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, vinfo
);
1987 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1988 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
1989 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1992 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1993 tree mask
= build_int_cst (stype
, GET_MODE_PRECISION (smode
) - 1);
1994 def_stmt
= gimple_build_assign (def2
, BIT_AND_EXPR
,
1995 gimple_assign_lhs (def_stmt
), mask
);
1999 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
2000 gcc_assert (!new_bb
);
2004 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, vinfo
);
2005 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2006 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
2007 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2011 var1
= vect_recog_temp_ssa_var (type
, NULL
);
2012 def_stmt
= gimple_build_assign (var1
, rhs_code
== LROTATE_EXPR
2013 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
2015 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2017 var2
= vect_recog_temp_ssa_var (type
, NULL
);
2018 def_stmt
= gimple_build_assign (var2
, rhs_code
== LROTATE_EXPR
2019 ? RSHIFT_EXPR
: LSHIFT_EXPR
,
2021 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2023 /* Pattern detected. */
2024 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt
);
2026 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2027 var
= vect_recog_temp_ssa_var (type
, NULL
);
2028 pattern_stmt
= gimple_build_assign (var
, BIT_IOR_EXPR
, var1
, var2
);
2030 stmts
->safe_push (last_stmt
);
2031 return pattern_stmt
;
2034 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2042 S3 res_T = b_T op a_t;
2044 where type 'TYPE' is a type with different size than 'type',
2045 and op is <<, >> or rotate.
2050 TYPE b_T, c_T, res_T;
2053 S1 a_t = (type) c_T;
2055 S3 res_T = b_T op a_t;
2059 * STMTS: Contains a stmt from which the pattern search begins,
2060 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2061 with a shift/rotate which has same type on both operands, in the
2062 second case just b_T op c_T, in the first case with added cast
2063 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2067 * TYPE_OUT: The type of the output of this pattern.
2069 * Return value: A new stmt that will be used to replace the shift/rotate
2073 vect_recog_vector_vector_shift_pattern (vec
<gimple
*> *stmts
, tree
*type_out
)
2075 gimple
*last_stmt
= stmts
->pop ();
2076 tree oprnd0
, oprnd1
, lhs
, var
;
2077 gimple
*pattern_stmt
;
2078 enum tree_code rhs_code
;
2079 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2080 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
2082 if (!is_gimple_assign (last_stmt
))
2085 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2097 lhs
= gimple_assign_lhs (last_stmt
);
2098 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2099 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2100 if (TREE_CODE (oprnd0
) != SSA_NAME
2101 || TREE_CODE (oprnd1
) != SSA_NAME
2102 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
2103 || !type_has_mode_precision_p (TREE_TYPE (oprnd1
))
2104 || TYPE_PRECISION (TREE_TYPE (lhs
))
2105 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2108 stmt_vec_info def_vinfo
= vect_get_internal_def (vinfo
, oprnd1
);
2112 *type_out
= get_vectype_for_scalar_type (TREE_TYPE (oprnd0
));
2113 if (*type_out
== NULL_TREE
)
2116 tree def
= NULL_TREE
;
2117 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_vinfo
->stmt
);
2118 if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
2120 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
2121 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
2122 && TYPE_PRECISION (TREE_TYPE (rhs1
))
2123 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2125 if (TYPE_PRECISION (TREE_TYPE (oprnd1
))
2126 >= TYPE_PRECISION (TREE_TYPE (rhs1
)))
2131 = build_low_bits_mask (TREE_TYPE (rhs1
),
2132 TYPE_PRECISION (TREE_TYPE (oprnd1
)));
2133 def
= vect_recog_temp_ssa_var (TREE_TYPE (rhs1
), NULL
);
2134 def_stmt
= gimple_build_assign (def
, BIT_AND_EXPR
, rhs1
, mask
);
2135 stmt_vec_info new_stmt_info
2136 = new_stmt_vec_info (def_stmt
, vinfo
);
2137 set_vinfo_for_stmt (def_stmt
, new_stmt_info
);
2138 STMT_VINFO_VECTYPE (new_stmt_info
)
2139 = get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
2140 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2145 if (def
== NULL_TREE
)
2147 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2148 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
2149 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2152 /* Pattern detected. */
2153 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt
);
2155 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2156 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2157 pattern_stmt
= gimple_build_assign (var
, rhs_code
, oprnd0
, def
);
2159 stmts
->safe_push (last_stmt
);
2160 return pattern_stmt
;
2163 /* Return true iff the target has a vector optab implementing the operation
2164 CODE on type VECTYPE. */
2167 target_has_vecop_for_code (tree_code code
, tree vectype
)
2169 optab voptab
= optab_for_tree_code (code
, vectype
, optab_vector
);
2171 && optab_handler (voptab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
;
2174 /* Verify that the target has optabs of VECTYPE to perform all the steps
2175 needed by the multiplication-by-immediate synthesis algorithm described by
2176 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2177 present. Return true iff the target supports all the steps. */
2180 target_supports_mult_synth_alg (struct algorithm
*alg
, mult_variant var
,
2181 tree vectype
, bool synth_shift_p
)
2183 if (alg
->op
[0] != alg_zero
&& alg
->op
[0] != alg_m
)
2186 bool supports_vminus
= target_has_vecop_for_code (MINUS_EXPR
, vectype
);
2187 bool supports_vplus
= target_has_vecop_for_code (PLUS_EXPR
, vectype
);
2189 if (var
== negate_variant
2190 && !target_has_vecop_for_code (NEGATE_EXPR
, vectype
))
2193 /* If we must synthesize shifts with additions make sure that vector
2194 addition is available. */
2195 if ((var
== add_variant
|| synth_shift_p
) && !supports_vplus
)
2198 for (int i
= 1; i
< alg
->ops
; i
++)
2206 case alg_add_factor
:
2207 if (!supports_vplus
)
2212 case alg_sub_factor
:
2213 if (!supports_vminus
)
2219 case alg_impossible
:
2229 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2230 putting the final result in DEST. Append all statements but the last into
2231 VINFO. Return the last statement. */
2234 synth_lshift_by_additions (tree dest
, tree op
, HOST_WIDE_INT amnt
,
2235 stmt_vec_info vinfo
)
2238 tree itype
= TREE_TYPE (op
);
2240 gcc_assert (amnt
>= 0);
2241 for (i
= 0; i
< amnt
; i
++)
2243 tree tmp_var
= (i
< amnt
- 1) ? vect_recog_temp_ssa_var (itype
, NULL
)
2246 = gimple_build_assign (tmp_var
, PLUS_EXPR
, prev_res
, prev_res
);
2249 append_pattern_def_seq (vinfo
, stmt
);
2257 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2258 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2259 the process if necessary. Append the resulting assignment statements
2260 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2261 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2262 left shifts using additions. */
2265 apply_binop_and_append_stmt (tree_code code
, tree op1
, tree op2
,
2266 stmt_vec_info stmt_vinfo
, bool synth_shift_p
)
2268 if (integer_zerop (op2
)
2269 && (code
== LSHIFT_EXPR
2270 || code
== PLUS_EXPR
))
2272 gcc_assert (TREE_CODE (op1
) == SSA_NAME
);
2277 tree itype
= TREE_TYPE (op1
);
2278 tree tmp_var
= vect_recog_temp_ssa_var (itype
, NULL
);
2280 if (code
== LSHIFT_EXPR
2283 stmt
= synth_lshift_by_additions (tmp_var
, op1
, TREE_INT_CST_LOW (op2
),
2285 append_pattern_def_seq (stmt_vinfo
, stmt
);
2289 stmt
= gimple_build_assign (tmp_var
, code
, op1
, op2
);
2290 append_pattern_def_seq (stmt_vinfo
, stmt
);
2294 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2295 and simple arithmetic operations to be vectorized. Record the statements
2296 produced in STMT_VINFO and return the last statement in the sequence or
2297 NULL if it's not possible to synthesize such a multiplication.
2298 This function mirrors the behavior of expand_mult_const in expmed.c but
2299 works on tree-ssa form. */
2302 vect_synth_mult_by_constant (tree op
, tree val
,
2303 stmt_vec_info stmt_vinfo
)
2305 tree itype
= TREE_TYPE (op
);
2306 machine_mode mode
= TYPE_MODE (itype
);
2307 struct algorithm alg
;
2308 mult_variant variant
;
2309 if (!tree_fits_shwi_p (val
))
2312 /* Multiplication synthesis by shifts, adds and subs can introduce
2313 signed overflow where the original operation didn't. Perform the
2314 operations on an unsigned type and cast back to avoid this.
2315 In the future we may want to relax this for synthesis algorithms
2316 that we can prove do not cause unexpected overflow. */
2317 bool cast_to_unsigned_p
= !TYPE_OVERFLOW_WRAPS (itype
);
2319 tree multtype
= cast_to_unsigned_p
? unsigned_type_for (itype
) : itype
;
2321 /* Targets that don't support vector shifts but support vector additions
2322 can synthesize shifts that way. */
2323 bool synth_shift_p
= !vect_supportable_shift (LSHIFT_EXPR
, multtype
);
2325 HOST_WIDE_INT hwval
= tree_to_shwi (val
);
2326 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2327 The vectorizer's benefit analysis will decide whether it's beneficial
2329 bool possible
= choose_mult_variant (mode
, hwval
, &alg
,
2330 &variant
, MAX_COST
);
2334 tree vectype
= get_vectype_for_scalar_type (multtype
);
2337 || !target_supports_mult_synth_alg (&alg
, variant
,
2338 vectype
, synth_shift_p
))
2343 /* Clear out the sequence of statements so we can populate it below. */
2344 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2345 gimple
*stmt
= NULL
;
2347 if (cast_to_unsigned_p
)
2349 tree tmp_op
= vect_recog_temp_ssa_var (multtype
, NULL
);
2350 stmt
= gimple_build_assign (tmp_op
, CONVERT_EXPR
, op
);
2351 append_pattern_def_seq (stmt_vinfo
, stmt
);
2355 if (alg
.op
[0] == alg_zero
)
2356 accumulator
= build_int_cst (multtype
, 0);
2360 bool needs_fixup
= (variant
== negate_variant
)
2361 || (variant
== add_variant
);
2363 for (int i
= 1; i
< alg
.ops
; i
++)
2365 tree shft_log
= build_int_cst (multtype
, alg
.log
[i
]);
2366 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
2367 tree tmp_var
= NULL_TREE
;
2374 = synth_lshift_by_additions (accum_tmp
, accumulator
, alg
.log
[i
],
2377 stmt
= gimple_build_assign (accum_tmp
, LSHIFT_EXPR
, accumulator
,
2382 = apply_binop_and_append_stmt (LSHIFT_EXPR
, op
, shft_log
,
2383 stmt_vinfo
, synth_shift_p
);
2384 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
,
2388 tmp_var
= apply_binop_and_append_stmt (LSHIFT_EXPR
, op
,
2389 shft_log
, stmt_vinfo
,
2391 /* In some algorithms the first step involves zeroing the
2392 accumulator. If subtracting from such an accumulator
2393 just emit the negation directly. */
2394 if (integer_zerop (accumulator
))
2395 stmt
= gimple_build_assign (accum_tmp
, NEGATE_EXPR
, tmp_var
);
2397 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, accumulator
,
2402 = apply_binop_and_append_stmt (LSHIFT_EXPR
, accumulator
, shft_log
,
2403 stmt_vinfo
, synth_shift_p
);
2404 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, tmp_var
, op
);
2408 = apply_binop_and_append_stmt (LSHIFT_EXPR
, accumulator
, shft_log
,
2409 stmt_vinfo
, synth_shift_p
);
2410 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, tmp_var
, op
);
2412 case alg_add_factor
:
2414 = apply_binop_and_append_stmt (LSHIFT_EXPR
, accumulator
, shft_log
,
2415 stmt_vinfo
, synth_shift_p
);
2416 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
,
2419 case alg_sub_factor
:
2421 = apply_binop_and_append_stmt (LSHIFT_EXPR
, accumulator
, shft_log
,
2422 stmt_vinfo
, synth_shift_p
);
2423 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, tmp_var
,
2429 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2430 but rather return it directly. */
2432 if ((i
< alg
.ops
- 1) || needs_fixup
|| cast_to_unsigned_p
)
2433 append_pattern_def_seq (stmt_vinfo
, stmt
);
2434 accumulator
= accum_tmp
;
2436 if (variant
== negate_variant
)
2438 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
2439 stmt
= gimple_build_assign (accum_tmp
, NEGATE_EXPR
, accumulator
);
2440 accumulator
= accum_tmp
;
2441 if (cast_to_unsigned_p
)
2442 append_pattern_def_seq (stmt_vinfo
, stmt
);
2444 else if (variant
== add_variant
)
2446 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
2447 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
, op
);
2448 accumulator
= accum_tmp
;
2449 if (cast_to_unsigned_p
)
2450 append_pattern_def_seq (stmt_vinfo
, stmt
);
2452 /* Move back to a signed if needed. */
2453 if (cast_to_unsigned_p
)
2455 tree accum_tmp
= vect_recog_temp_ssa_var (itype
, NULL
);
2456 stmt
= gimple_build_assign (accum_tmp
, CONVERT_EXPR
, accumulator
);
2462 /* Detect multiplication by constant and convert it into a sequence of
2463 shifts and additions, subtractions, negations. We reuse the
2464 choose_mult_variant algorithms from expmed.c
2468 STMTS: Contains a stmt from which the pattern search begins,
2473 * TYPE_OUT: The type of the output of this pattern.
2475 * Return value: A new stmt that will be used to replace
2476 the multiplication. */
2479 vect_recog_mult_pattern (vec
<gimple
*> *stmts
, tree
*type_out
)
2481 gimple
*last_stmt
= stmts
->pop ();
2482 tree oprnd0
, oprnd1
, vectype
, itype
;
2483 gimple
*pattern_stmt
;
2484 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2486 if (!is_gimple_assign (last_stmt
))
2489 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
2492 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2493 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2494 itype
= TREE_TYPE (oprnd0
);
2496 if (TREE_CODE (oprnd0
) != SSA_NAME
2497 || TREE_CODE (oprnd1
) != INTEGER_CST
2498 || !INTEGRAL_TYPE_P (itype
)
2499 || !type_has_mode_precision_p (itype
))
2502 vectype
= get_vectype_for_scalar_type (itype
);
2503 if (vectype
== NULL_TREE
)
2506 /* If the target can handle vectorized multiplication natively,
2507 don't attempt to optimize this. */
2508 optab mul_optab
= optab_for_tree_code (MULT_EXPR
, vectype
, optab_default
);
2509 if (mul_optab
!= unknown_optab
)
2511 machine_mode vec_mode
= TYPE_MODE (vectype
);
2512 int icode
= (int) optab_handler (mul_optab
, vec_mode
);
2513 if (icode
!= CODE_FOR_nothing
)
2517 pattern_stmt
= vect_synth_mult_by_constant (oprnd0
, oprnd1
, stmt_vinfo
);
2521 /* Pattern detected. */
2522 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt
);
2524 stmts
->safe_push (last_stmt
);
2525 *type_out
= vectype
;
2527 return pattern_stmt
;
2530 /* Detect a signed division by a constant that wouldn't be
2531 otherwise vectorized:
2537 where type 'type' is an integral type and N is a constant.
2539 Similarly handle modulo by a constant:
2545 * STMTS: Contains a stmt from which the pattern search begins,
2546 i.e. the division stmt. S1 is replaced by if N is a power
2547 of two constant and type is signed:
2548 S3 y_t = b_t < 0 ? N - 1 : 0;
2550 S1' a_t = x_t >> log2 (N);
2552 S4 is replaced if N is a power of two constant and
2553 type is signed by (where *_T temporaries have unsigned type):
2554 S9 y_T = b_t < 0 ? -1U : 0U;
2555 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2556 S7 z_t = (type) z_T;
2558 S5 x_t = w_t & (N - 1);
2559 S4' a_t = x_t - z_t;
2563 * TYPE_OUT: The type of the output of this pattern.
2565 * Return value: A new stmt that will be used to replace the division
2566 S1 or modulo S4 stmt. */
2569 vect_recog_divmod_pattern (vec
<gimple
*> *stmts
, tree
*type_out
)
2571 gimple
*last_stmt
= stmts
->pop ();
2572 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
2573 gimple
*pattern_stmt
, *def_stmt
;
2574 enum tree_code rhs_code
;
2575 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2576 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
2579 int dummy_int
, prec
;
2580 stmt_vec_info def_stmt_vinfo
;
2582 if (!is_gimple_assign (last_stmt
))
2585 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2588 case TRUNC_DIV_EXPR
:
2589 case TRUNC_MOD_EXPR
:
2595 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2596 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2597 itype
= TREE_TYPE (oprnd0
);
2598 if (TREE_CODE (oprnd0
) != SSA_NAME
2599 || TREE_CODE (oprnd1
) != INTEGER_CST
2600 || TREE_CODE (itype
) != INTEGER_TYPE
2601 || !type_has_mode_precision_p (itype
))
2604 scalar_int_mode itype_mode
= SCALAR_INT_TYPE_MODE (itype
);
2605 vectype
= get_vectype_for_scalar_type (itype
);
2606 if (vectype
== NULL_TREE
)
2609 if (optimize_bb_for_size_p (gimple_bb (last_stmt
)))
2611 /* If the target can handle vectorized division or modulo natively,
2612 don't attempt to optimize this, since native division is likely
2613 to give smaller code. */
2614 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
2615 if (optab
!= unknown_optab
)
2617 machine_mode vec_mode
= TYPE_MODE (vectype
);
2618 int icode
= (int) optab_handler (optab
, vec_mode
);
2619 if (icode
!= CODE_FOR_nothing
)
2624 prec
= TYPE_PRECISION (itype
);
2625 if (integer_pow2p (oprnd1
))
2627 if (TYPE_UNSIGNED (itype
) || tree_int_cst_sgn (oprnd1
) != 1)
2630 /* Pattern detected. */
2631 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt
);
2633 cond
= build2 (LT_EXPR
, boolean_type_node
, oprnd0
,
2634 build_int_cst (itype
, 0));
2635 if (rhs_code
== TRUNC_DIV_EXPR
)
2637 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
2640 = gimple_build_assign (var
, COND_EXPR
, cond
,
2641 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2642 build_int_cst (itype
, 1)),
2643 build_int_cst (itype
, 0));
2644 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2645 var
= vect_recog_temp_ssa_var (itype
, NULL
);
2647 = gimple_build_assign (var
, PLUS_EXPR
, oprnd0
,
2648 gimple_assign_lhs (def_stmt
));
2649 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2651 shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
2653 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2654 RSHIFT_EXPR
, var
, shift
);
2659 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2660 if (compare_tree_int (oprnd1
, 2) == 0)
2662 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2663 def_stmt
= gimple_build_assign (signmask
, COND_EXPR
, cond
,
2664 build_int_cst (itype
, 1),
2665 build_int_cst (itype
, 0));
2666 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2671 = build_nonstandard_integer_type (prec
, 1);
2672 tree vecutype
= get_vectype_for_scalar_type (utype
);
2674 = build_int_cst (utype
, GET_MODE_BITSIZE (itype_mode
)
2675 - tree_log2 (oprnd1
));
2676 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
2678 def_stmt
= gimple_build_assign (var
, COND_EXPR
, cond
,
2679 build_int_cst (utype
, -1),
2680 build_int_cst (utype
, 0));
2681 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, vinfo
);
2682 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2683 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2684 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2685 var
= vect_recog_temp_ssa_var (utype
, NULL
);
2686 def_stmt
= gimple_build_assign (var
, RSHIFT_EXPR
,
2687 gimple_assign_lhs (def_stmt
),
2689 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, vinfo
);
2690 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2691 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2692 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2693 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2695 = gimple_build_assign (signmask
, NOP_EXPR
, var
);
2696 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2699 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2700 PLUS_EXPR
, oprnd0
, signmask
);
2701 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2703 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2704 BIT_AND_EXPR
, gimple_assign_lhs (def_stmt
),
2705 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2706 build_int_cst (itype
, 1)));
2707 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2710 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2711 MINUS_EXPR
, gimple_assign_lhs (def_stmt
),
2715 stmts
->safe_push (last_stmt
);
2717 *type_out
= vectype
;
2718 return pattern_stmt
;
2721 if (prec
> HOST_BITS_PER_WIDE_INT
2722 || integer_zerop (oprnd1
))
2725 if (!can_mult_highpart_p (TYPE_MODE (vectype
), TYPE_UNSIGNED (itype
)))
2728 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2730 if (TYPE_UNSIGNED (itype
))
2732 unsigned HOST_WIDE_INT mh
, ml
;
2733 int pre_shift
, post_shift
;
2734 unsigned HOST_WIDE_INT d
= (TREE_INT_CST_LOW (oprnd1
)
2735 & GET_MODE_MASK (itype_mode
));
2736 tree t1
, t2
, t3
, t4
;
2738 if (d
>= (HOST_WIDE_INT_1U
<< (prec
- 1)))
2739 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2742 /* Find a suitable multiplier and right shift count
2743 instead of multiplying with D. */
2744 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
2746 /* If the suggested multiplier is more than SIZE bits, we can do better
2747 for even divisors, using an initial right shift. */
2748 if (mh
!= 0 && (d
& 1) == 0)
2750 pre_shift
= ctz_or_zero (d
);
2751 mh
= choose_multiplier (d
>> pre_shift
, prec
, prec
- pre_shift
,
2752 &ml
, &post_shift
, &dummy_int
);
2760 if (post_shift
- 1 >= prec
)
2763 /* t1 = oprnd0 h* ml;
2767 q = t4 >> (post_shift - 1); */
2768 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2769 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2770 build_int_cst (itype
, ml
));
2771 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2773 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2775 = gimple_build_assign (t2
, MINUS_EXPR
, oprnd0
, t1
);
2776 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2778 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2780 = gimple_build_assign (t3
, RSHIFT_EXPR
, t2
, integer_one_node
);
2781 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2783 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2785 = gimple_build_assign (t4
, PLUS_EXPR
, t1
, t3
);
2787 if (post_shift
!= 1)
2789 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2791 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2793 = gimple_build_assign (q
, RSHIFT_EXPR
, t4
,
2794 build_int_cst (itype
, post_shift
- 1));
2799 pattern_stmt
= def_stmt
;
2804 if (pre_shift
>= prec
|| post_shift
>= prec
)
2807 /* t1 = oprnd0 >> pre_shift;
2809 q = t2 >> post_shift; */
2812 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2814 = gimple_build_assign (t1
, RSHIFT_EXPR
, oprnd0
,
2815 build_int_cst (NULL
, pre_shift
));
2816 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2821 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2822 def_stmt
= gimple_build_assign (t2
, MULT_HIGHPART_EXPR
, t1
,
2823 build_int_cst (itype
, ml
));
2827 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2829 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2831 = gimple_build_assign (q
, RSHIFT_EXPR
, t2
,
2832 build_int_cst (itype
, post_shift
));
2837 pattern_stmt
= def_stmt
;
2842 unsigned HOST_WIDE_INT ml
;
2844 HOST_WIDE_INT d
= TREE_INT_CST_LOW (oprnd1
);
2845 unsigned HOST_WIDE_INT abs_d
;
2847 tree t1
, t2
, t3
, t4
;
2849 /* Give up for -1. */
2853 /* Since d might be INT_MIN, we have to cast to
2854 unsigned HOST_WIDE_INT before negating to avoid
2855 undefined signed overflow. */
2857 ? (unsigned HOST_WIDE_INT
) d
2858 : - (unsigned HOST_WIDE_INT
) d
);
2860 /* n rem d = n rem -d */
2861 if (rhs_code
== TRUNC_MOD_EXPR
&& d
< 0)
2864 oprnd1
= build_int_cst (itype
, abs_d
);
2866 else if (HOST_BITS_PER_WIDE_INT
>= prec
2867 && abs_d
== HOST_WIDE_INT_1U
<< (prec
- 1))
2868 /* This case is not handled correctly below. */
2871 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
, &post_shift
, &dummy_int
);
2872 if (ml
>= HOST_WIDE_INT_1U
<< (prec
- 1))
2875 ml
|= HOST_WIDE_INT_M1U
<< (prec
- 1);
2877 if (post_shift
>= prec
)
2880 /* t1 = oprnd0 h* ml; */
2881 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2882 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2883 build_int_cst (itype
, ml
));
2887 /* t2 = t1 + oprnd0; */
2888 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2889 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2890 def_stmt
= gimple_build_assign (t2
, PLUS_EXPR
, t1
, oprnd0
);
2897 /* t3 = t2 >> post_shift; */
2898 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2899 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2900 def_stmt
= gimple_build_assign (t3
, RSHIFT_EXPR
, t2
,
2901 build_int_cst (itype
, post_shift
));
2906 wide_int oprnd0_min
, oprnd0_max
;
2908 if (get_range_info (oprnd0
, &oprnd0_min
, &oprnd0_max
) == VR_RANGE
)
2910 if (!wi::neg_p (oprnd0_min
, TYPE_SIGN (itype
)))
2912 else if (wi::neg_p (oprnd0_max
, TYPE_SIGN (itype
)))
2916 if (msb
== 0 && d
>= 0)
2920 pattern_stmt
= def_stmt
;
2924 /* t4 = oprnd0 >> (prec - 1);
2925 or if we know from VRP that oprnd0 >= 0
2927 or if we know from VRP that oprnd0 < 0
2929 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2930 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2932 def_stmt
= gimple_build_assign (t4
, INTEGER_CST
,
2933 build_int_cst (itype
, msb
));
2935 def_stmt
= gimple_build_assign (t4
, RSHIFT_EXPR
, oprnd0
,
2936 build_int_cst (itype
, prec
- 1));
2937 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2939 /* q = t3 - t4; or q = t4 - t3; */
2940 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2941 pattern_stmt
= gimple_build_assign (q
, MINUS_EXPR
, d
< 0 ? t4
: t3
,
2946 if (rhs_code
== TRUNC_MOD_EXPR
)
2950 /* We divided. Now finish by:
2953 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2955 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2956 def_stmt
= gimple_build_assign (t1
, MULT_EXPR
, q
, oprnd1
);
2957 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2959 r
= vect_recog_temp_ssa_var (itype
, NULL
);
2960 pattern_stmt
= gimple_build_assign (r
, MINUS_EXPR
, oprnd0
, t1
);
2963 /* Pattern detected. */
2964 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt
);
2966 stmts
->safe_push (last_stmt
);
2968 *type_out
= vectype
;
2969 return pattern_stmt
;
2972 /* Function vect_recog_mixed_size_cond_pattern
2974 Try to find the following pattern:
2979 S1 a_T = x_t CMP y_t ? b_T : c_T;
2981 where type 'TYPE' is an integral type which has different size
2982 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2983 than 'type', the constants need to fit into an integer type
2984 with the same width as 'type') or results of conversion from 'type'.
2988 * LAST_STMT: A stmt from which the pattern search begins.
2992 * TYPE_OUT: The type of the output of this pattern.
2994 * Return value: A new stmt that will be used to replace the pattern.
2995 Additionally a def_stmt is added.
2997 a_it = x_t CMP y_t ? b_it : c_it;
2998 a_T = (TYPE) a_it; */
3001 vect_recog_mixed_size_cond_pattern (vec
<gimple
*> *stmts
, tree
*type_out
)
3003 gimple
*last_stmt
= (*stmts
)[0];
3004 tree cond_expr
, then_clause
, else_clause
;
3005 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
), def_stmt_info
;
3006 tree type
, vectype
, comp_vectype
, itype
= NULL_TREE
, vecitype
;
3007 gimple
*pattern_stmt
, *def_stmt
;
3008 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
3009 tree orig_type0
= NULL_TREE
, orig_type1
= NULL_TREE
;
3010 gimple
*def_stmt0
= NULL
, *def_stmt1
= NULL
;
3012 tree comp_scalar_type
;
3014 if (!is_gimple_assign (last_stmt
)
3015 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
3016 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
3019 cond_expr
= gimple_assign_rhs1 (last_stmt
);
3020 then_clause
= gimple_assign_rhs2 (last_stmt
);
3021 else_clause
= gimple_assign_rhs3 (last_stmt
);
3023 if (!COMPARISON_CLASS_P (cond_expr
))
3026 comp_scalar_type
= TREE_TYPE (TREE_OPERAND (cond_expr
, 0));
3027 comp_vectype
= get_vectype_for_scalar_type (comp_scalar_type
);
3028 if (comp_vectype
== NULL_TREE
)
3031 type
= gimple_expr_type (last_stmt
);
3032 if (types_compatible_p (type
, comp_scalar_type
)
3033 || ((TREE_CODE (then_clause
) != INTEGER_CST
3034 || TREE_CODE (else_clause
) != INTEGER_CST
)
3035 && !INTEGRAL_TYPE_P (comp_scalar_type
))
3036 || !INTEGRAL_TYPE_P (type
))
3039 if ((TREE_CODE (then_clause
) != INTEGER_CST
3040 && !type_conversion_p (then_clause
, last_stmt
, false, &orig_type0
,
3041 &def_stmt0
, &promotion
))
3042 || (TREE_CODE (else_clause
) != INTEGER_CST
3043 && !type_conversion_p (else_clause
, last_stmt
, false, &orig_type1
,
3044 &def_stmt1
, &promotion
)))
3047 if (orig_type0
&& orig_type1
3048 && !types_compatible_p (orig_type0
, orig_type1
))
3053 if (!types_compatible_p (orig_type0
, comp_scalar_type
))
3055 then_clause
= gimple_assign_rhs1 (def_stmt0
);
3061 if (!types_compatible_p (orig_type1
, comp_scalar_type
))
3063 else_clause
= gimple_assign_rhs1 (def_stmt1
);
3068 HOST_WIDE_INT cmp_mode_size
3069 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype
));
3071 scalar_int_mode type_mode
= SCALAR_INT_TYPE_MODE (type
);
3072 if (GET_MODE_BITSIZE (type_mode
) == cmp_mode_size
)
3075 vectype
= get_vectype_for_scalar_type (type
);
3076 if (vectype
== NULL_TREE
)
3079 if (expand_vec_cond_expr_p (vectype
, comp_vectype
, TREE_CODE (cond_expr
)))
3082 if (itype
== NULL_TREE
)
3083 itype
= build_nonstandard_integer_type (cmp_mode_size
,
3084 TYPE_UNSIGNED (type
));
3086 if (itype
== NULL_TREE
3087 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype
)) != cmp_mode_size
)
3090 vecitype
= get_vectype_for_scalar_type (itype
);
3091 if (vecitype
== NULL_TREE
)
3094 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
, TREE_CODE (cond_expr
)))
3097 if (GET_MODE_BITSIZE (type_mode
) > cmp_mode_size
)
3099 if ((TREE_CODE (then_clause
) == INTEGER_CST
3100 && !int_fits_type_p (then_clause
, itype
))
3101 || (TREE_CODE (else_clause
) == INTEGER_CST
3102 && !int_fits_type_p (else_clause
, itype
)))
3106 def_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3107 COND_EXPR
, unshare_expr (cond_expr
),
3108 fold_convert (itype
, then_clause
),
3109 fold_convert (itype
, else_clause
));
3110 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
3111 NOP_EXPR
, gimple_assign_lhs (def_stmt
));
3113 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
3114 def_stmt_info
= new_stmt_vec_info (def_stmt
, vinfo
);
3115 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
3116 STMT_VINFO_VECTYPE (def_stmt_info
) = vecitype
;
3117 *type_out
= vectype
;
3119 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt
);
3121 return pattern_stmt
;
3125 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3126 true if bool VAR can and should be optimized that way. Assume it shouldn't
3127 in case it's a result of a comparison which can be directly vectorized into
3128 a vector comparison. Fills in STMTS with all stmts visited during the
3132 check_bool_pattern (tree var
, vec_info
*vinfo
, hash_set
<gimple
*> &stmts
)
3135 enum tree_code rhs_code
;
3137 stmt_vec_info def_stmt_info
= vect_get_internal_def (vinfo
, var
);
3141 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_stmt_info
->stmt
);
3145 if (stmts
.contains (def_stmt
))
3148 rhs1
= gimple_assign_rhs1 (def_stmt
);
3149 rhs_code
= gimple_assign_rhs_code (def_stmt
);
3153 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
3158 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
3160 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
3165 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
3172 if (! check_bool_pattern (rhs1
, vinfo
, stmts
)
3173 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt
), vinfo
, stmts
))
3178 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
3180 tree vecitype
, comp_vectype
;
3182 /* If the comparison can throw, then is_gimple_condexpr will be
3183 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3184 if (stmt_could_throw_p (def_stmt
))
3187 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
3188 if (comp_vectype
== NULL_TREE
)
3191 tree mask_type
= get_mask_type_for_scalar_type (TREE_TYPE (rhs1
));
3193 && expand_vec_cmp_expr_p (comp_vectype
, mask_type
, rhs_code
))
3196 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
3198 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
3200 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3201 vecitype
= get_vectype_for_scalar_type (itype
);
3202 if (vecitype
== NULL_TREE
)
3206 vecitype
= comp_vectype
;
3207 if (! expand_vec_cond_expr_p (vecitype
, comp_vectype
, rhs_code
))
3215 bool res
= stmts
.add (def_stmt
);
3216 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3223 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3224 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3225 pattern sequence. */
3228 adjust_bool_pattern_cast (tree type
, tree var
, stmt_vec_info stmt_info
)
3230 gimple
*cast_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
3232 stmt_vec_info patt_vinfo
= new_stmt_vec_info (cast_stmt
, stmt_info
->vinfo
);
3233 set_vinfo_for_stmt (cast_stmt
, patt_vinfo
);
3234 STMT_VINFO_VECTYPE (patt_vinfo
) = get_vectype_for_scalar_type (type
);
3235 append_pattern_def_seq (stmt_info
, cast_stmt
);
3236 return gimple_assign_lhs (cast_stmt
);
3239 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3240 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3241 type, OUT_TYPE is the desired final integer type of the whole pattern.
3242 STMT_INFO is the info of the pattern root and is where pattern stmts should
3243 be associated with. DEFS is a map of pattern defs. */
3246 adjust_bool_pattern (tree var
, tree out_type
,
3247 stmt_vec_info stmt_info
, hash_map
<tree
, tree
> &defs
)
3249 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
3250 enum tree_code rhs_code
, def_rhs_code
;
3251 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
3253 gimple
*pattern_stmt
, *def_stmt
;
3254 tree trueval
= NULL_TREE
;
3256 rhs1
= gimple_assign_rhs1 (stmt
);
3257 rhs2
= gimple_assign_rhs2 (stmt
);
3258 rhs_code
= gimple_assign_rhs_code (stmt
);
3259 loc
= gimple_location (stmt
);
3264 irhs1
= *defs
.get (rhs1
);
3265 itype
= TREE_TYPE (irhs1
);
3267 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3272 irhs1
= *defs
.get (rhs1
);
3273 itype
= TREE_TYPE (irhs1
);
3275 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3276 BIT_XOR_EXPR
, irhs1
, build_int_cst (itype
, 1));
3280 /* Try to optimize x = y & (a < b ? 1 : 0); into
3281 x = (a < b ? y : 0);
3287 S1 a_b = x1 CMP1 y1;
3288 S2 b_b = x2 CMP2 y2;
3290 S4 d_T = (TYPE) c_b;
3292 we would normally emit:
3294 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3295 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3296 S3' c_T = a_T & b_T;
3299 but we can save one stmt by using the
3300 result of one of the COND_EXPRs in the other COND_EXPR and leave
3301 BIT_AND_EXPR stmt out:
3303 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3304 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3307 At least when VEC_COND_EXPR is implemented using masks
3308 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3309 computes the comparison masks and ands it, in one case with
3310 all ones vector, in the other case with a vector register.
3311 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3312 often more expensive. */
3313 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
3314 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3315 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3317 irhs1
= *defs
.get (rhs1
);
3318 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3319 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3320 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1
))))
3322 rhs_code
= def_rhs_code
;
3324 rhs2
= gimple_assign_rhs2 (def_stmt
);
3329 irhs2
= *defs
.get (rhs2
);
3332 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
3333 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3334 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3336 irhs2
= *defs
.get (rhs2
);
3337 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3338 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
3339 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1
))))
3341 rhs_code
= def_rhs_code
;
3343 rhs2
= gimple_assign_rhs2 (def_stmt
);
3348 irhs1
= *defs
.get (rhs1
);
3354 irhs1
= *defs
.get (rhs1
);
3355 irhs2
= *defs
.get (rhs2
);
3357 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3358 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
3360 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
3361 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
3362 int out_prec
= TYPE_PRECISION (out_type
);
3363 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
3364 irhs2
= adjust_bool_pattern_cast (TREE_TYPE (irhs1
), irhs2
,
3366 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
3367 irhs1
= adjust_bool_pattern_cast (TREE_TYPE (irhs2
), irhs1
,
3371 irhs1
= adjust_bool_pattern_cast (out_type
, irhs1
, stmt_info
);
3372 irhs2
= adjust_bool_pattern_cast (out_type
, irhs2
, stmt_info
);
3375 itype
= TREE_TYPE (irhs1
);
3377 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3378 rhs_code
, irhs1
, irhs2
);
3383 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
3384 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
3385 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
))
3386 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1
)),
3387 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1
)))))
3389 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
3391 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3394 itype
= TREE_TYPE (rhs1
);
3395 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
3396 if (trueval
== NULL_TREE
)
3397 trueval
= build_int_cst (itype
, 1);
3399 gcc_checking_assert (useless_type_conversion_p (itype
,
3400 TREE_TYPE (trueval
)));
3402 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3403 COND_EXPR
, cond_expr
, trueval
,
3404 build_int_cst (itype
, 0));
3408 gimple_set_location (pattern_stmt
, loc
);
3409 /* ??? Why does vect_mark_pattern_stmts set the vector type on all
3410 pattern def seq stmts instead of just letting auto-detection do
3412 stmt_vec_info patt_vinfo
= new_stmt_vec_info (pattern_stmt
, stmt_info
->vinfo
);
3413 set_vinfo_for_stmt (pattern_stmt
, patt_vinfo
);
3414 STMT_VINFO_VECTYPE (patt_vinfo
) = get_vectype_for_scalar_type (itype
);
3415 append_pattern_def_seq (stmt_info
, pattern_stmt
);
3416 defs
.put (var
, gimple_assign_lhs (pattern_stmt
));
3419 /* Comparison function to qsort a vector of gimple stmts after UID. */
3422 sort_after_uid (const void *p1
, const void *p2
)
3424 const gimple
*stmt1
= *(const gimple
* const *)p1
;
3425 const gimple
*stmt2
= *(const gimple
* const *)p2
;
3426 return gimple_uid (stmt1
) - gimple_uid (stmt2
);
3429 /* Create pattern stmts for all stmts participating in the bool pattern
3430 specified by BOOL_STMT_SET and its root STMT with the desired type
3431 OUT_TYPE. Return the def of the pattern root. */
3434 adjust_bool_stmts (hash_set
<gimple
*> &bool_stmt_set
,
3435 tree out_type
, gimple
*stmt
)
3437 /* Gather original stmts in the bool pattern in their order of appearance
3439 auto_vec
<gimple
*> bool_stmts (bool_stmt_set
.elements ());
3440 for (hash_set
<gimple
*>::iterator i
= bool_stmt_set
.begin ();
3441 i
!= bool_stmt_set
.end (); ++i
)
3442 bool_stmts
.quick_push (*i
);
3443 bool_stmts
.qsort (sort_after_uid
);
3445 /* Now process them in that order, producing pattern stmts. */
3446 hash_map
<tree
, tree
> defs
;
3447 for (unsigned i
= 0; i
< bool_stmts
.length (); ++i
)
3448 adjust_bool_pattern (gimple_assign_lhs (bool_stmts
[i
]),
3449 out_type
, vinfo_for_stmt (stmt
), defs
);
3451 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3452 gimple
*pattern_stmt
3453 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (stmt
)));
3454 return gimple_assign_lhs (pattern_stmt
);
3457 /* Helper for search_type_for_mask. */
3460 search_type_for_mask_1 (tree var
, vec_info
*vinfo
,
3461 hash_map
<gimple
*, tree
> &cache
)
3464 enum tree_code rhs_code
;
3465 tree res
= NULL_TREE
, res2
;
3467 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var
)))
3470 stmt_vec_info def_stmt_info
= vect_get_internal_def (vinfo
, var
);
3474 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_stmt_info
->stmt
);
3478 tree
*c
= cache
.get (def_stmt
);
3482 rhs_code
= gimple_assign_rhs_code (def_stmt
);
3483 rhs1
= gimple_assign_rhs1 (def_stmt
);
3490 res
= search_type_for_mask_1 (rhs1
, vinfo
, cache
);
3496 res
= search_type_for_mask_1 (rhs1
, vinfo
, cache
);
3497 res2
= search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt
), vinfo
,
3499 if (!res
|| (res2
&& TYPE_PRECISION (res
) > TYPE_PRECISION (res2
)))
3504 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
3506 tree comp_vectype
, mask_type
;
3508 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
3510 res
= search_type_for_mask_1 (rhs1
, vinfo
, cache
);
3511 res2
= search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt
),
3513 if (!res
|| (res2
&& TYPE_PRECISION (res
) > TYPE_PRECISION (res2
)))
3518 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
3519 if (comp_vectype
== NULL_TREE
)
3525 mask_type
= get_mask_type_for_scalar_type (TREE_TYPE (rhs1
));
3527 || !expand_vec_cmp_expr_p (comp_vectype
, mask_type
, rhs_code
))
3533 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
3534 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
3536 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
3537 res
= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3540 res
= TREE_TYPE (rhs1
);
3544 cache
.put (def_stmt
, res
);
3548 /* Return the proper type for converting bool VAR into
3549 an integer value or NULL_TREE if no such type exists.
3550 The type is chosen so that converted value has the
3551 same number of elements as VAR's vector type. */
3554 search_type_for_mask (tree var
, vec_info
*vinfo
)
3556 hash_map
<gimple
*, tree
> cache
;
3557 return search_type_for_mask_1 (var
, vinfo
, cache
);
3560 /* Function vect_recog_bool_pattern
3562 Try to find pattern like following:
3564 bool a_b, b_b, c_b, d_b, e_b;
3567 S1 a_b = x1 CMP1 y1;
3568 S2 b_b = x2 CMP2 y2;
3570 S4 d_b = x3 CMP3 y3;
3572 S6 f_T = (TYPE) e_b;
3574 where type 'TYPE' is an integral type. Or a similar pattern
3577 S6 f_Y = e_b ? r_Y : s_Y;
3579 as results from if-conversion of a complex condition.
3583 * LAST_STMT: A stmt at the end from which the pattern
3584 search begins, i.e. cast of a bool to
3589 * TYPE_OUT: The type of the output of this pattern.
3591 * Return value: A new stmt that will be used to replace the pattern.
3593 Assuming size of TYPE is the same as size of all comparisons
3594 (otherwise some casts would be added where needed), the above
3595 sequence we create related pattern stmts:
3596 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3597 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3598 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3599 S5' e_T = c_T | d_T;
3602 Instead of the above S3' we could emit:
3603 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3604 S3' c_T = a_T | b_T;
3605 but the above is more efficient. */
3608 vect_recog_bool_pattern (vec
<gimple
*> *stmts
, tree
*type_out
)
3610 gimple
*last_stmt
= stmts
->pop ();
3611 enum tree_code rhs_code
;
3612 tree var
, lhs
, rhs
, vectype
;
3613 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
3614 stmt_vec_info new_stmt_info
;
3615 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
3616 gimple
*pattern_stmt
;
3618 if (!is_gimple_assign (last_stmt
))
3621 var
= gimple_assign_rhs1 (last_stmt
);
3622 lhs
= gimple_assign_lhs (last_stmt
);
3624 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var
)))
3627 hash_set
<gimple
*> bool_stmts
;
3629 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3630 if (CONVERT_EXPR_CODE_P (rhs_code
))
3632 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3633 || TYPE_PRECISION (TREE_TYPE (lhs
)) == 1)
3635 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3636 if (vectype
== NULL_TREE
)
3639 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
3641 rhs
= adjust_bool_stmts (bool_stmts
, TREE_TYPE (lhs
), last_stmt
);
3642 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3643 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3644 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3647 = gimple_build_assign (lhs
, NOP_EXPR
, rhs
);
3651 tree type
= search_type_for_mask (var
, vinfo
);
3652 tree cst0
, cst1
, tmp
;
3657 /* We may directly use cond with narrowed type to avoid
3658 multiple cond exprs with following result packing and
3659 perform single cond with packed mask instead. In case
3660 of widening we better make cond first and then extract
3662 if (TYPE_MODE (type
) == TYPE_MODE (TREE_TYPE (lhs
)))
3663 type
= TREE_TYPE (lhs
);
3665 cst0
= build_int_cst (type
, 0);
3666 cst1
= build_int_cst (type
, 1);
3667 tmp
= vect_recog_temp_ssa_var (type
, NULL
);
3668 pattern_stmt
= gimple_build_assign (tmp
, COND_EXPR
, var
, cst1
, cst0
);
3670 if (!useless_type_conversion_p (type
, TREE_TYPE (lhs
)))
3672 tree new_vectype
= get_vectype_for_scalar_type (type
);
3673 new_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
3674 set_vinfo_for_stmt (pattern_stmt
, new_stmt_info
);
3675 STMT_VINFO_VECTYPE (new_stmt_info
) = new_vectype
;
3676 new_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
3678 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3679 pattern_stmt
= gimple_build_assign (lhs
, CONVERT_EXPR
, tmp
);
3683 *type_out
= vectype
;
3684 stmts
->safe_push (last_stmt
);
3685 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
3687 return pattern_stmt
;
3689 else if (rhs_code
== COND_EXPR
3690 && TREE_CODE (var
) == SSA_NAME
)
3692 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3693 if (vectype
== NULL_TREE
)
3696 /* Build a scalar type for the boolean result that when
3697 vectorized matches the vector type of the result in
3698 size and number of elements. */
3700 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype
)),
3701 TYPE_VECTOR_SUBPARTS (vectype
));
3704 = build_nonstandard_integer_type (prec
,
3705 TYPE_UNSIGNED (TREE_TYPE (var
)));
3706 if (get_vectype_for_scalar_type (type
) == NULL_TREE
)
3709 if (!check_bool_pattern (var
, vinfo
, bool_stmts
))
3712 rhs
= adjust_bool_stmts (bool_stmts
, type
, last_stmt
);
3714 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3716 = gimple_build_assign (lhs
, COND_EXPR
,
3717 build2 (NE_EXPR
, boolean_type_node
,
3718 rhs
, build_int_cst (type
, 0)),
3719 gimple_assign_rhs2 (last_stmt
),
3720 gimple_assign_rhs3 (last_stmt
));
3721 *type_out
= vectype
;
3722 stmts
->safe_push (last_stmt
);
3723 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
3725 return pattern_stmt
;
3727 else if (rhs_code
== SSA_NAME
3728 && STMT_VINFO_DATA_REF (stmt_vinfo
))
3730 stmt_vec_info pattern_stmt_info
;
3731 vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
3732 gcc_assert (vectype
!= NULL_TREE
);
3733 if (!VECTOR_MODE_P (TYPE_MODE (vectype
)))
3736 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
3737 rhs
= adjust_bool_stmts (bool_stmts
, TREE_TYPE (vectype
), last_stmt
);
3740 tree type
= search_type_for_mask (var
, vinfo
);
3741 tree cst0
, cst1
, new_vectype
;
3746 if (TYPE_MODE (type
) == TYPE_MODE (TREE_TYPE (vectype
)))
3747 type
= TREE_TYPE (vectype
);
3749 cst0
= build_int_cst (type
, 0);
3750 cst1
= build_int_cst (type
, 1);
3751 new_vectype
= get_vectype_for_scalar_type (type
);
3753 rhs
= vect_recog_temp_ssa_var (type
, NULL
);
3754 pattern_stmt
= gimple_build_assign (rhs
, COND_EXPR
, var
, cst1
, cst0
);
3756 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
3757 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3758 STMT_VINFO_VECTYPE (pattern_stmt_info
) = new_vectype
;
3759 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
3762 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
3763 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3765 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3766 gimple
*cast_stmt
= gimple_build_assign (rhs2
, NOP_EXPR
, rhs
);
3767 append_pattern_def_seq (stmt_vinfo
, cast_stmt
);
3770 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3771 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
3772 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3773 STMT_VINFO_DATA_REF (pattern_stmt_info
)
3774 = STMT_VINFO_DATA_REF (stmt_vinfo
);
3775 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info
)
3776 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo
);
3777 *type_out
= vectype
;
3778 stmts
->safe_push (last_stmt
);
3779 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
3781 return pattern_stmt
;
3788 /* A helper for vect_recog_mask_conversion_pattern. Build
3789 conversion of MASK to a type suitable for masking VECTYPE.
3790 Built statement gets required vectype and is appended to
3791 a pattern sequence of STMT_VINFO.
3793 Return converted mask. */
3796 build_mask_conversion (tree mask
, tree vectype
, stmt_vec_info stmt_vinfo
,
3801 stmt_vec_info new_stmt_info
;
3803 masktype
= build_same_sized_truth_vector_type (vectype
);
3804 tmp
= vect_recog_temp_ssa_var (TREE_TYPE (masktype
), NULL
);
3805 stmt
= gimple_build_assign (tmp
, CONVERT_EXPR
, mask
);
3806 new_stmt_info
= new_stmt_vec_info (stmt
, vinfo
);
3807 set_vinfo_for_stmt (stmt
, new_stmt_info
);
3808 STMT_VINFO_VECTYPE (new_stmt_info
) = masktype
;
3809 append_pattern_def_seq (stmt_vinfo
, stmt
);
3815 /* Function vect_recog_mask_conversion_pattern
3817 Try to find statements which require boolean type
3818 converison. Additional conversion statements are
3819 added to handle such cases. For example:
3829 S4 c_1 = m_3 ? c_2 : c_3;
3831 Will be transformed into:
3835 S3'' m_2' = (_Bool[bitsize=32])m_2
3836 S3' m_3' = m_1 & m_2';
3837 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3838 S4' c_1' = m_3'' ? c_2 : c_3; */
3841 vect_recog_mask_conversion_pattern (vec
<gimple
*> *stmts
, tree
*type_out
)
3843 gimple
*last_stmt
= stmts
->pop ();
3844 enum tree_code rhs_code
;
3845 tree lhs
= NULL_TREE
, rhs1
, rhs2
, tmp
, rhs1_type
, rhs2_type
;
3846 tree vectype1
, vectype2
;
3847 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
3848 stmt_vec_info pattern_stmt_info
;
3849 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
3851 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3852 if (is_gimple_call (last_stmt
)
3853 && gimple_call_internal_p (last_stmt
)
3854 && (gimple_call_internal_fn (last_stmt
) == IFN_MASK_STORE
3855 || gimple_call_internal_fn (last_stmt
) == IFN_MASK_LOAD
))
3857 gcall
*pattern_stmt
;
3858 bool load
= (gimple_call_internal_fn (last_stmt
) == IFN_MASK_LOAD
);
3862 lhs
= gimple_call_lhs (last_stmt
);
3863 vectype1
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3867 rhs2
= gimple_call_arg (last_stmt
, 3);
3868 vectype1
= get_vectype_for_scalar_type (TREE_TYPE (rhs2
));
3871 rhs1
= gimple_call_arg (last_stmt
, 2);
3872 rhs1_type
= search_type_for_mask (rhs1
, vinfo
);
3875 vectype2
= get_mask_type_for_scalar_type (rhs1_type
);
3877 if (!vectype1
|| !vectype2
3878 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1
),
3879 TYPE_VECTOR_SUBPARTS (vectype2
)))
3882 tmp
= build_mask_conversion (rhs1
, vectype1
, stmt_vinfo
, vinfo
);
3886 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3888 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
3889 gimple_call_arg (last_stmt
, 0),
3890 gimple_call_arg (last_stmt
, 1),
3892 gimple_call_set_lhs (pattern_stmt
, lhs
);
3896 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
3897 gimple_call_arg (last_stmt
, 0),
3898 gimple_call_arg (last_stmt
, 1),
3900 gimple_call_arg (last_stmt
, 3));
3902 gimple_call_set_nothrow (pattern_stmt
, true);
3904 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
3905 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3906 STMT_VINFO_DATA_REF (pattern_stmt_info
)
3907 = STMT_VINFO_DATA_REF (stmt_vinfo
);
3908 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info
)
3909 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo
);
3910 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info
)
3911 = STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
);
3913 *type_out
= vectype1
;
3914 stmts
->safe_push (last_stmt
);
3915 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
3917 return pattern_stmt
;
3920 if (!is_gimple_assign (last_stmt
))
3923 gimple
*pattern_stmt
;
3924 lhs
= gimple_assign_lhs (last_stmt
);
3925 rhs1
= gimple_assign_rhs1 (last_stmt
);
3926 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3928 /* Check for cond expression requiring mask conversion. */
3929 if (rhs_code
== COND_EXPR
)
3931 vectype1
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3933 if (TREE_CODE (rhs1
) == SSA_NAME
)
3935 rhs1_type
= search_type_for_mask (rhs1
, vinfo
);
3939 else if (COMPARISON_CLASS_P (rhs1
))
3941 /* Check whether we're comparing scalar booleans and (if so)
3942 whether a better mask type exists than the mask associated
3943 with boolean-sized elements. This avoids unnecessary packs
3944 and unpacks if the booleans are set from comparisons of
3945 wider types. E.g. in:
3947 int x1, x2, x3, x4, y1, y1;
3949 bool b1 = (x1 == x2);
3950 bool b2 = (x3 == x4);
3951 ... = b1 == b2 ? y1 : y2;
3953 it is better for b1 and b2 to use the mask type associated
3954 with int elements rather bool (byte) elements. */
3955 rhs1_type
= search_type_for_mask (TREE_OPERAND (rhs1
, 0), vinfo
);
3957 rhs1_type
= TREE_TYPE (TREE_OPERAND (rhs1
, 0));
3962 vectype2
= get_mask_type_for_scalar_type (rhs1_type
);
3964 if (!vectype1
|| !vectype2
)
3967 /* Continue if a conversion is needed. Also continue if we have
3968 a comparison whose vector type would normally be different from
3969 VECTYPE2 when considered in isolation. In that case we'll
3970 replace the comparison with an SSA name (so that we can record
3971 its vector type) and behave as though the comparison was an SSA
3972 name from the outset. */
3973 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1
),
3974 TYPE_VECTOR_SUBPARTS (vectype2
))
3975 && (TREE_CODE (rhs1
) == SSA_NAME
3976 || rhs1_type
== TREE_TYPE (TREE_OPERAND (rhs1
, 0))))
3979 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
3980 in place, we can handle it in vectorizable_condition. This avoids
3981 unnecessary promotion stmts and increased vectorization factor. */
3982 if (COMPARISON_CLASS_P (rhs1
)
3983 && INTEGRAL_TYPE_P (rhs1_type
)
3984 && known_le (TYPE_VECTOR_SUBPARTS (vectype1
),
3985 TYPE_VECTOR_SUBPARTS (vectype2
)))
3987 enum vect_def_type dt
;
3988 if (vect_is_simple_use (TREE_OPERAND (rhs1
, 0), vinfo
, &dt
)
3989 && dt
== vect_external_def
3990 && vect_is_simple_use (TREE_OPERAND (rhs1
, 1), vinfo
, &dt
)
3991 && (dt
== vect_external_def
3992 || dt
== vect_constant_def
))
3994 tree wide_scalar_type
= build_nonstandard_integer_type
3995 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1
))),
3996 TYPE_UNSIGNED (rhs1_type
));
3997 tree vectype3
= get_vectype_for_scalar_type (wide_scalar_type
);
3998 if (expand_vec_cond_expr_p (vectype1
, vectype3
, TREE_CODE (rhs1
)))
4003 /* If rhs1 is a comparison we need to move it into a
4004 separate statement. */
4005 if (TREE_CODE (rhs1
) != SSA_NAME
)
4007 tmp
= vect_recog_temp_ssa_var (TREE_TYPE (rhs1
), NULL
);
4008 pattern_stmt
= gimple_build_assign (tmp
, rhs1
);
4011 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
4012 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
4013 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vectype2
;
4014 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
4017 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
4018 TYPE_VECTOR_SUBPARTS (vectype2
)))
4019 tmp
= build_mask_conversion (rhs1
, vectype1
, stmt_vinfo
, vinfo
);
4023 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
4024 pattern_stmt
= gimple_build_assign (lhs
, COND_EXPR
, tmp
,
4025 gimple_assign_rhs2 (last_stmt
),
4026 gimple_assign_rhs3 (last_stmt
));
4028 *type_out
= vectype1
;
4029 stmts
->safe_push (last_stmt
);
4030 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
4032 return pattern_stmt
;
4035 /* Now check for binary boolean operations requiring conversion for
4037 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs
)))
4040 if (rhs_code
!= BIT_IOR_EXPR
4041 && rhs_code
!= BIT_XOR_EXPR
4042 && rhs_code
!= BIT_AND_EXPR
4043 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
)
4046 rhs2
= gimple_assign_rhs2 (last_stmt
);
4048 rhs1_type
= search_type_for_mask (rhs1
, vinfo
);
4049 rhs2_type
= search_type_for_mask (rhs2
, vinfo
);
4051 if (!rhs1_type
|| !rhs2_type
4052 || TYPE_PRECISION (rhs1_type
) == TYPE_PRECISION (rhs2_type
))
4055 if (TYPE_PRECISION (rhs1_type
) < TYPE_PRECISION (rhs2_type
))
4057 vectype1
= get_mask_type_for_scalar_type (rhs1_type
);
4060 rhs2
= build_mask_conversion (rhs2
, vectype1
, stmt_vinfo
, vinfo
);
4064 vectype1
= get_mask_type_for_scalar_type (rhs2_type
);
4067 rhs1
= build_mask_conversion (rhs1
, vectype1
, stmt_vinfo
, vinfo
);
4070 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
4071 pattern_stmt
= gimple_build_assign (lhs
, rhs_code
, rhs1
, rhs2
);
4073 *type_out
= vectype1
;
4074 stmts
->safe_push (last_stmt
);
4075 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
4077 return pattern_stmt
;
4080 /* STMT is a load or store. If the load or store is conditional, return
4081 the boolean condition under which it occurs, otherwise return null. */
4084 vect_get_load_store_mask (gimple
*stmt
)
4086 if (gassign
*def_assign
= dyn_cast
<gassign
*> (stmt
))
4088 gcc_assert (gimple_assign_single_p (def_assign
));
4092 if (gcall
*def_call
= dyn_cast
<gcall
*> (stmt
))
4094 internal_fn ifn
= gimple_call_internal_fn (def_call
);
4095 int mask_index
= internal_fn_mask_index (ifn
);
4096 return gimple_call_arg (def_call
, mask_index
);
4102 /* Return the scalar offset type that an internal gather/scatter function
4103 should use. GS_INFO describes the gather/scatter operation. */
4106 vect_get_gather_scatter_offset_type (gather_scatter_info
*gs_info
)
4108 tree offset_type
= TREE_TYPE (gs_info
->offset
);
4109 unsigned int element_bits
= tree_to_uhwi (TYPE_SIZE (gs_info
->element_type
));
4111 /* Enforced by vect_check_gather_scatter. */
4112 unsigned int offset_bits
= TYPE_PRECISION (offset_type
);
4113 gcc_assert (element_bits
>= offset_bits
);
4115 /* If the offset is narrower than the elements, extend it according
4117 if (element_bits
> offset_bits
)
4118 return build_nonstandard_integer_type (element_bits
,
4119 TYPE_UNSIGNED (offset_type
));
4124 /* Return MASK if MASK is suitable for masking an operation on vectors
4125 of type VECTYPE, otherwise convert it into such a form and return
4126 the result. Associate any conversion statements with STMT_INFO's
4130 vect_convert_mask_for_vectype (tree mask
, tree vectype
,
4131 stmt_vec_info stmt_info
, vec_info
*vinfo
)
4133 tree mask_type
= search_type_for_mask (mask
, vinfo
);
4136 tree mask_vectype
= get_mask_type_for_scalar_type (mask_type
);
4138 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
),
4139 TYPE_VECTOR_SUBPARTS (mask_vectype
)))
4140 mask
= build_mask_conversion (mask
, vectype
, stmt_info
, vinfo
);
4145 /* Return the equivalent of:
4147 fold_convert (TYPE, VALUE)
4149 with the expectation that the operation will be vectorized.
4150 If new statements are needed, add them as pattern statements
4154 vect_add_conversion_to_patterm (tree type
, tree value
,
4155 stmt_vec_info stmt_info
,
4158 if (useless_type_conversion_p (type
, TREE_TYPE (value
)))
4161 tree new_value
= vect_recog_temp_ssa_var (type
, NULL
);
4162 gassign
*conversion
= gimple_build_assign (new_value
, CONVERT_EXPR
, value
);
4163 stmt_vec_info new_stmt_info
= new_stmt_vec_info (conversion
, vinfo
);
4164 set_vinfo_for_stmt (conversion
, new_stmt_info
);
4165 STMT_VINFO_VECTYPE (new_stmt_info
) = get_vectype_for_scalar_type (type
);
4166 append_pattern_def_seq (stmt_info
, conversion
);
4170 /* Try to convert STMT into a call to a gather load or scatter store
4171 internal function. Return the final statement on success and set
4172 *TYPE_OUT to the vector type being loaded or stored.
4174 This function only handles gathers and scatters that were recognized
4175 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4178 vect_try_gather_scatter_pattern (gimple
*stmt
, stmt_vec_info last_stmt_info
,
4181 /* Currently we only support this for loop vectorization. */
4182 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4183 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (stmt_info
->vinfo
);
4187 /* Make sure that we're looking at a gather load or scatter store. */
4188 data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
4189 if (!dr
|| !STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
4192 /* Get the boolean that controls whether the load or store happens.
4193 This is null if the operation is unconditional. */
4194 tree mask
= vect_get_load_store_mask (stmt
);
4196 /* Make sure that the target supports an appropriate internal
4197 function for the gather/scatter operation. */
4198 gather_scatter_info gs_info
;
4199 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, &gs_info
)
4203 /* Convert the mask to the right form. */
4204 tree gs_vectype
= get_vectype_for_scalar_type (gs_info
.element_type
);
4206 mask
= vect_convert_mask_for_vectype (mask
, gs_vectype
, last_stmt_info
,
4209 /* Get the invariant base and non-invariant offset, converting the
4210 latter to the same width as the vector elements. */
4211 tree base
= gs_info
.base
;
4212 tree offset_type
= vect_get_gather_scatter_offset_type (&gs_info
);
4213 tree offset
= vect_add_conversion_to_patterm (offset_type
, gs_info
.offset
,
4214 last_stmt_info
, loop_vinfo
);
4216 /* Build the new pattern statement. */
4217 tree scale
= size_int (gs_info
.scale
);
4218 gcall
*pattern_stmt
;
4219 if (DR_IS_READ (dr
))
4222 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 4, base
,
4223 offset
, scale
, mask
);
4225 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 3, base
,
4227 tree load_lhs
= vect_recog_temp_ssa_var (gs_info
.element_type
, NULL
);
4228 gimple_call_set_lhs (pattern_stmt
, load_lhs
);
4232 tree rhs
= vect_get_store_rhs (stmt
);
4234 pattern_stmt
= gimple_build_call_internal (IFN_MASK_SCATTER_STORE
, 5,
4235 base
, offset
, scale
, rhs
,
4238 pattern_stmt
= gimple_build_call_internal (IFN_SCATTER_STORE
, 4,
4239 base
, offset
, scale
, rhs
);
4241 gimple_call_set_nothrow (pattern_stmt
, true);
4243 /* Copy across relevant vectorization info and associate DR with the
4244 new pattern statement instead of the original statement. */
4245 stmt_vec_info pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
,
4247 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
4248 STMT_VINFO_DATA_REF (pattern_stmt_info
) = dr
;
4249 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info
)
4250 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info
);
4251 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info
)
4252 = STMT_VINFO_GATHER_SCATTER_P (stmt_info
);
4254 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4255 *type_out
= vectype
;
4256 vect_pattern_detected ("gather/scatter pattern", stmt
);
4258 return pattern_stmt
;
4261 /* Pattern wrapper around vect_try_gather_scatter_pattern. */
4264 vect_recog_gather_scatter_pattern (vec
<gimple
*> *stmts
, tree
*type_out
)
4266 gimple
*last_stmt
= stmts
->pop ();
4267 stmt_vec_info last_stmt_info
= vinfo_for_stmt (last_stmt
);
4268 gimple
*pattern_stmt
= vect_try_gather_scatter_pattern (last_stmt
,
4272 stmts
->safe_push (last_stmt
);
4273 return pattern_stmt
;
4276 /* Return true if TYPE is a non-boolean integer type. These are the types
4277 that we want to consider for narrowing. */
4280 vect_narrowable_type_p (tree type
)
4282 return INTEGRAL_TYPE_P (type
) && !VECT_SCALAR_BOOLEAN_TYPE_P (type
);
4285 /* Return true if the operation given by CODE can be truncated to N bits
4286 when only N bits of the output are needed. This is only true if bit N+1
4287 of the inputs has no effect on the low N bits of the result. */
4290 vect_truncatable_operation_p (tree_code code
)
4308 /* Record that STMT_INFO could be changed from operating on TYPE to
4309 operating on a type with the precision and sign given by PRECISION
4310 and SIGN respectively. PRECISION is an arbitrary bit precision;
4311 it might not be a whole number of bytes. */
4314 vect_set_operation_type (stmt_vec_info stmt_info
, tree type
,
4315 unsigned int precision
, signop sign
)
4317 /* Round the precision up to a whole number of bytes. */
4318 precision
= vect_element_precision (precision
);
4319 if (precision
< TYPE_PRECISION (type
)
4320 && (!stmt_info
->operation_precision
4321 || stmt_info
->operation_precision
> precision
))
4323 stmt_info
->operation_precision
= precision
;
4324 stmt_info
->operation_sign
= sign
;
4328 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4329 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4330 is an arbitrary bit precision; it might not be a whole number of bytes. */
4333 vect_set_min_input_precision (stmt_vec_info stmt_info
, tree type
,
4334 unsigned int min_input_precision
)
4336 /* This operation in isolation only requires the inputs to have
4337 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4338 that MIN_INPUT_PRECISION is a natural precision for the chain
4339 as a whole. E.g. consider something like:
4341 unsigned short *x, *y;
4342 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4344 The right shift can be done on unsigned chars, and only requires the
4345 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4346 approach would mean turning a natural chain of single-vector unsigned
4347 short operations into one that truncates "*x" and then extends
4348 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4349 operation and one vector for each unsigned char operation.
4350 This would be a significant pessimization.
4352 Instead only propagate the maximum of this precision and the precision
4353 required by the users of the result. This means that we don't pessimize
4354 the case above but continue to optimize things like:
4358 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4360 Here we would truncate two vectors of *x to a single vector of
4361 unsigned chars and use single-vector unsigned char operations for
4362 everything else, rather than doing two unsigned short copies of
4363 "(*x & 0xf0) >> 4" and then truncating the result. */
4364 min_input_precision
= MAX (min_input_precision
,
4365 stmt_info
->min_output_precision
);
4367 if (min_input_precision
< TYPE_PRECISION (type
)
4368 && (!stmt_info
->min_input_precision
4369 || stmt_info
->min_input_precision
> min_input_precision
))
4370 stmt_info
->min_input_precision
= min_input_precision
;
4373 /* Subroutine of vect_determine_min_output_precision. Return true if
4374 we can calculate a reduced number of output bits for STMT_INFO,
4375 whose result is LHS. */
4378 vect_determine_min_output_precision_1 (stmt_vec_info stmt_info
, tree lhs
)
4380 /* Take the maximum precision required by users of the result. */
4381 unsigned int precision
= 0;
4382 imm_use_iterator iter
;
4384 FOR_EACH_IMM_USE_FAST (use
, iter
, lhs
)
4386 gimple
*use_stmt
= USE_STMT (use
);
4387 if (is_gimple_debug (use_stmt
))
4389 if (!vect_stmt_in_region_p (stmt_info
->vinfo
, use_stmt
))
4391 stmt_vec_info use_stmt_info
= vinfo_for_stmt (use_stmt
);
4392 if (!use_stmt_info
->min_input_precision
)
4394 precision
= MAX (precision
, use_stmt_info
->min_input_precision
);
4397 if (dump_enabled_p ())
4399 dump_printf_loc (MSG_NOTE
, vect_location
, "only the low %d bits of ",
4401 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, lhs
);
4402 dump_printf (MSG_NOTE
, " are significant\n");
4404 stmt_info
->min_output_precision
= precision
;
4408 /* Calculate min_output_precision for STMT_INFO. */
4411 vect_determine_min_output_precision (stmt_vec_info stmt_info
)
4413 /* We're only interested in statements with a narrowable result. */
4414 tree lhs
= gimple_get_lhs (stmt_info
->stmt
);
4416 || TREE_CODE (lhs
) != SSA_NAME
4417 || !vect_narrowable_type_p (TREE_TYPE (lhs
)))
4420 if (!vect_determine_min_output_precision_1 (stmt_info
, lhs
))
4421 stmt_info
->min_output_precision
= TYPE_PRECISION (TREE_TYPE (lhs
));
4424 /* Use range information to decide whether STMT (described by STMT_INFO)
4425 could be done in a narrower type. This is effectively a forward
4426 propagation, since it uses context-independent information that applies
4427 to all users of an SSA name. */
4430 vect_determine_precisions_from_range (stmt_vec_info stmt_info
, gassign
*stmt
)
4432 tree lhs
= gimple_assign_lhs (stmt
);
4433 if (!lhs
|| TREE_CODE (lhs
) != SSA_NAME
)
4436 tree type
= TREE_TYPE (lhs
);
4437 if (!vect_narrowable_type_p (type
))
4440 /* First see whether we have any useful range information for the result. */
4441 unsigned int precision
= TYPE_PRECISION (type
);
4442 signop sign
= TYPE_SIGN (type
);
4443 wide_int min_value
, max_value
;
4444 if (!vect_get_range_info (lhs
, &min_value
, &max_value
))
4447 tree_code code
= gimple_assign_rhs_code (stmt
);
4448 unsigned int nops
= gimple_num_ops (stmt
);
4450 if (!vect_truncatable_operation_p (code
))
4451 /* Check that all relevant input operands are compatible, and update
4452 [MIN_VALUE, MAX_VALUE] to include their ranges. */
4453 for (unsigned int i
= 1; i
< nops
; ++i
)
4455 tree op
= gimple_op (stmt
, i
);
4456 if (TREE_CODE (op
) == INTEGER_CST
)
4458 /* Don't require the integer to have RHS_TYPE (which it might
4459 not for things like shift amounts, etc.), but do require it
4461 if (!int_fits_type_p (op
, type
))
4464 min_value
= wi::min (min_value
, wi::to_wide (op
, precision
), sign
);
4465 max_value
= wi::max (max_value
, wi::to_wide (op
, precision
), sign
);
4467 else if (TREE_CODE (op
) == SSA_NAME
)
4469 /* Ignore codes that don't take uniform arguments. */
4470 if (!types_compatible_p (TREE_TYPE (op
), type
))
4473 wide_int op_min_value
, op_max_value
;
4474 if (!vect_get_range_info (op
, &op_min_value
, &op_max_value
))
4477 min_value
= wi::min (min_value
, op_min_value
, sign
);
4478 max_value
= wi::max (max_value
, op_max_value
, sign
);
4484 /* Try to switch signed types for unsigned types if we can.
4485 This is better for two reasons. First, unsigned ops tend
4486 to be cheaper than signed ops. Second, it means that we can
4490 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4495 unsigned short res_1 = (unsigned short) c & 0xff00;
4496 int res = (int) res_1;
4498 where the intermediate result res_1 has unsigned rather than
4500 if (sign
== SIGNED
&& !wi::neg_p (min_value
))
4503 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
4504 unsigned int precision1
= wi::min_precision (min_value
, sign
);
4505 unsigned int precision2
= wi::min_precision (max_value
, sign
);
4506 unsigned int value_precision
= MAX (precision1
, precision2
);
4507 if (value_precision
>= precision
)
4510 if (dump_enabled_p ())
4512 dump_printf_loc (MSG_NOTE
, vect_location
, "can narrow to %s:%d"
4513 " without loss of precision: ",
4514 sign
== SIGNED
? "signed" : "unsigned",
4516 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
4519 vect_set_operation_type (stmt_info
, type
, value_precision
, sign
);
4520 vect_set_min_input_precision (stmt_info
, type
, value_precision
);
4523 /* Use information about the users of STMT's result to decide whether
4524 STMT (described by STMT_INFO) could be done in a narrower type.
4525 This is effectively a backward propagation. */
4528 vect_determine_precisions_from_users (stmt_vec_info stmt_info
, gassign
*stmt
)
4530 tree_code code
= gimple_assign_rhs_code (stmt
);
4531 unsigned int opno
= (code
== COND_EXPR
? 2 : 1);
4532 tree type
= TREE_TYPE (gimple_op (stmt
, opno
));
4533 if (!vect_narrowable_type_p (type
))
4536 unsigned int precision
= TYPE_PRECISION (type
);
4537 unsigned int operation_precision
, min_input_precision
;
4541 /* Only the bits that contribute to the output matter. Don't change
4542 the precision of the operation itself. */
4543 operation_precision
= precision
;
4544 min_input_precision
= stmt_info
->min_output_precision
;
4550 tree shift
= gimple_assign_rhs2 (stmt
);
4551 if (TREE_CODE (shift
) != INTEGER_CST
4552 || !wi::ltu_p (wi::to_widest (shift
), precision
))
4554 unsigned int const_shift
= TREE_INT_CST_LOW (shift
);
4555 if (code
== LSHIFT_EXPR
)
4557 /* We need CONST_SHIFT fewer bits of the input. */
4558 operation_precision
= stmt_info
->min_output_precision
;
4559 min_input_precision
= (MAX (operation_precision
, const_shift
)
4564 /* We need CONST_SHIFT extra bits to do the operation. */
4565 operation_precision
= (stmt_info
->min_output_precision
4567 min_input_precision
= operation_precision
;
4573 if (vect_truncatable_operation_p (code
))
4575 /* Input bit N has no effect on output bits N-1 and lower. */
4576 operation_precision
= stmt_info
->min_output_precision
;
4577 min_input_precision
= operation_precision
;
4583 if (operation_precision
< precision
)
4585 if (dump_enabled_p ())
4587 dump_printf_loc (MSG_NOTE
, vect_location
, "can narrow to %s:%d"
4588 " without affecting users: ",
4589 TYPE_UNSIGNED (type
) ? "unsigned" : "signed",
4590 operation_precision
);
4591 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
4593 vect_set_operation_type (stmt_info
, type
, operation_precision
,
4596 vect_set_min_input_precision (stmt_info
, type
, min_input_precision
);
4599 /* Handle vect_determine_precisions for STMT_INFO, given that we
4600 have already done so for the users of its result. */
4603 vect_determine_stmt_precisions (stmt_vec_info stmt_info
)
4605 vect_determine_min_output_precision (stmt_info
);
4606 if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
4608 vect_determine_precisions_from_range (stmt_info
, stmt
);
4609 vect_determine_precisions_from_users (stmt_info
, stmt
);
4613 /* Walk backwards through the vectorizable region to determine the
4614 values of these fields:
4616 - min_output_precision
4617 - min_input_precision
4618 - operation_precision
4619 - operation_sign. */
4622 vect_determine_precisions (vec_info
*vinfo
)
4624 DUMP_VECT_SCOPE ("vect_determine_precisions");
4626 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
4628 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4629 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
4630 unsigned int nbbs
= loop
->num_nodes
;
4632 for (unsigned int i
= 0; i
< nbbs
; i
++)
4634 basic_block bb
= bbs
[nbbs
- i
- 1];
4635 for (gimple_stmt_iterator si
= gsi_last_bb (bb
);
4636 !gsi_end_p (si
); gsi_prev (&si
))
4637 vect_determine_stmt_precisions (vinfo_for_stmt (gsi_stmt (si
)));
4642 bb_vec_info bb_vinfo
= as_a
<bb_vec_info
> (vinfo
);
4643 gimple_stmt_iterator si
= bb_vinfo
->region_end
;
4648 si
= gsi_last_bb (bb_vinfo
->bb
);
4651 stmt
= gsi_stmt (si
);
4652 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4653 if (stmt_info
&& STMT_VINFO_VECTORIZABLE (stmt_info
))
4654 vect_determine_stmt_precisions (stmt_info
);
4656 while (stmt
!= gsi_stmt (bb_vinfo
->region_begin
));
4660 typedef gimple
*(*vect_recog_func_ptr
) (vec
<gimple
*> *, tree
*);
4662 struct vect_recog_func
4664 vect_recog_func_ptr fn
;
4668 /* Note that ordering matters - the first pattern matching on a stmt is
4669 taken which means usually the more complex one needs to preceed the
4670 less comples onex (widen_sum only after dot_prod or sad for example). */
4671 static vect_recog_func vect_vect_recog_func_ptrs
[] = {
4672 { vect_recog_over_widening_pattern
, "over_widening" },
4673 { vect_recog_cast_forwprop_pattern
, "cast_forwprop" },
4674 { vect_recog_widen_mult_pattern
, "widen_mult" },
4675 { vect_recog_dot_prod_pattern
, "dot_prod" },
4676 { vect_recog_sad_pattern
, "sad" },
4677 { vect_recog_widen_sum_pattern
, "widen_sum" },
4678 { vect_recog_pow_pattern
, "pow" },
4679 { vect_recog_widen_shift_pattern
, "widen_shift" },
4680 { vect_recog_rotate_pattern
, "rotate" },
4681 { vect_recog_vector_vector_shift_pattern
, "vector_vector_shift" },
4682 { vect_recog_divmod_pattern
, "divmod" },
4683 { vect_recog_mult_pattern
, "mult" },
4684 { vect_recog_mixed_size_cond_pattern
, "mixed_size_cond" },
4685 { vect_recog_bool_pattern
, "bool" },
4686 /* This must come before mask conversion, and includes the parts
4687 of mask conversion that are needed for gather and scatter
4688 internal functions. */
4689 { vect_recog_gather_scatter_pattern
, "gather_scatter" },
4690 { vect_recog_mask_conversion_pattern
, "mask_conversion" }
4693 const unsigned int NUM_PATTERNS
= ARRAY_SIZE (vect_vect_recog_func_ptrs
);
4695 /* Mark statements that are involved in a pattern. */
4698 vect_mark_pattern_stmts (gimple
*orig_stmt
, gimple
*pattern_stmt
,
4699 tree pattern_vectype
)
4701 stmt_vec_info orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
4702 gimple
*def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
4704 bool old_pattern_p
= is_pattern_stmt_p (orig_stmt_info
);
4707 /* We're replacing a statement in an existing pattern definition
4709 if (dump_enabled_p ())
4711 dump_printf_loc (MSG_NOTE
, vect_location
,
4712 "replacing earlier pattern ");
4713 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, orig_stmt
, 0);
4716 /* To keep the book-keeping simple, just swap the lhs of the
4717 old and new statements, so that the old one has a valid but
4719 tree old_lhs
= gimple_get_lhs (orig_stmt
);
4720 gimple_set_lhs (orig_stmt
, gimple_get_lhs (pattern_stmt
));
4721 gimple_set_lhs (pattern_stmt
, old_lhs
);
4723 if (dump_enabled_p ())
4725 dump_printf_loc (MSG_NOTE
, vect_location
, "with ");
4726 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
4729 /* Switch to the statement that ORIG replaces. */
4731 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (orig_stmt_info
));
4733 /* We shouldn't be replacing the main pattern statement. */
4734 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info
) != orig_stmt
);
4738 for (gimple_stmt_iterator si
= gsi_start (def_seq
);
4739 !gsi_end_p (si
); gsi_next (&si
))
4740 vect_init_pattern_stmt (gsi_stmt (si
), orig_stmt_info
, pattern_vectype
);
4744 vect_init_pattern_stmt (pattern_stmt
, orig_stmt_info
, pattern_vectype
);
4746 /* Insert all the new pattern statements before the original one. */
4747 gimple_seq
*orig_def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
4748 gimple_stmt_iterator gsi
= gsi_for_stmt (orig_stmt
, orig_def_seq
);
4749 gsi_insert_seq_before_without_update (&gsi
, def_seq
, GSI_SAME_STMT
);
4750 gsi_insert_before_without_update (&gsi
, pattern_stmt
, GSI_SAME_STMT
);
4752 /* Remove the pattern statement that this new pattern replaces. */
4753 gsi_remove (&gsi
, false);
4756 vect_set_pattern_stmt (pattern_stmt
, orig_stmt_info
, pattern_vectype
);
4759 /* Function vect_pattern_recog_1
4762 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4763 computation pattern.
4764 STMT: A stmt from which the pattern search should start.
4766 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
4767 a sequence of statements that has the same functionality and can be
4768 used to replace STMT. It returns the last statement in the sequence
4769 and adds any earlier statements to STMT's STMT_VINFO_PATTERN_DEF_SEQ.
4770 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
4771 statement, having first checked that the target supports the new operation
4774 This function also does some bookkeeping, as explained in the documentation
4775 for vect_recog_pattern. */
4778 vect_pattern_recog_1 (vect_recog_func
*recog_func
,
4779 gimple_stmt_iterator si
,
4780 vec
<gimple
*> *stmts_to_replace
)
4782 gimple
*stmt
= gsi_stmt (si
), *pattern_stmt
;
4783 stmt_vec_info stmt_info
;
4784 loop_vec_info loop_vinfo
;
4785 tree pattern_vectype
;
4788 /* If this statement has already been replaced with pattern statements,
4789 leave the original statement alone, since the first match wins.
4790 Instead try to match against the definition statements that feed
4791 the main pattern statement. */
4792 stmt_info
= vinfo_for_stmt (stmt
);
4793 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
4795 gimple_stmt_iterator gsi
;
4796 for (gsi
= gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
4797 !gsi_end_p (gsi
); gsi_next (&gsi
))
4798 vect_pattern_recog_1 (recog_func
, gsi
, stmts_to_replace
);
4802 stmts_to_replace
->truncate (0);
4803 stmts_to_replace
->quick_push (stmt
);
4804 pattern_stmt
= recog_func
->fn (stmts_to_replace
, &pattern_vectype
);
4807 /* Clear related stmt info that analysis might have noted for
4808 to be replaced stmts. */
4809 for (i
= 0; stmts_to_replace
->iterate (i
, &stmt
)
4810 && (unsigned) i
< stmts_to_replace
->length ();
4813 stmt_info
= vinfo_for_stmt (stmt
);
4814 if (!is_pattern_stmt_p (stmt_info
))
4815 STMT_VINFO_RELATED_STMT (stmt_info
) = NULL
;
4817 /* Clear any half-formed pattern definition sequence. */
4818 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
4822 stmt
= stmts_to_replace
->last ();
4823 stmt_info
= vinfo_for_stmt (stmt
);
4824 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4825 gcc_assert (pattern_vectype
);
4827 /* Found a vectorizable pattern. */
4828 if (dump_enabled_p ())
4830 dump_printf_loc (MSG_NOTE
, vect_location
,
4831 "%s pattern recognized: ", recog_func
->name
);
4832 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
4835 /* Mark the stmts that are involved in the pattern. */
4836 vect_mark_pattern_stmts (stmt
, pattern_stmt
, pattern_vectype
);
4838 /* Patterns cannot be vectorized using SLP, because they change the order of
4844 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo
), ix
, ix2
,
4845 elem_ptr
, *elem_ptr
== stmt
);
4848 /* It is possible that additional pattern stmts are created and inserted in
4849 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
4850 relevant statements. */
4851 for (i
= 0; stmts_to_replace
->iterate (i
, &stmt
)
4852 && (unsigned) i
< (stmts_to_replace
->length () - 1);
4855 stmt_info
= vinfo_for_stmt (stmt
);
4856 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
4857 if (dump_enabled_p ())
4859 dump_printf_loc (MSG_NOTE
, vect_location
,
4860 "additional pattern stmt: ");
4861 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
4864 vect_mark_pattern_stmts (stmt
, pattern_stmt
, NULL_TREE
);
4871 /* Function vect_pattern_recog
4874 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4877 Output - for each computation idiom that is detected we create a new stmt
4878 that provides the same functionality and that can be vectorized. We
4879 also record some information in the struct_stmt_info of the relevant
4880 stmts, as explained below:
4882 At the entry to this function we have the following stmts, with the
4883 following initial value in the STMT_VINFO fields:
4885 stmt in_pattern_p related_stmt vec_stmt
4886 S1: a_i = .... - - -
4887 S2: a_2 = ..use(a_i).. - - -
4888 S3: a_1 = ..use(a_2).. - - -
4889 S4: a_0 = ..use(a_1).. - - -
4890 S5: ... = ..use(a_0).. - - -
4892 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4893 represented by a single stmt. We then:
4894 - create a new stmt S6 equivalent to the pattern (the stmt is not
4895 inserted into the code)
4896 - fill in the STMT_VINFO fields as follows:
4898 in_pattern_p related_stmt vec_stmt
4899 S1: a_i = .... - - -
4900 S2: a_2 = ..use(a_i).. - - -
4901 S3: a_1 = ..use(a_2).. - - -
4902 S4: a_0 = ..use(a_1).. true S6 -
4903 '---> S6: a_new = .... - S4 -
4904 S5: ... = ..use(a_0).. - - -
4906 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4907 to each other through the RELATED_STMT field).
4909 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4910 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4911 remain irrelevant unless used by stmts other than S4.
4913 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4914 (because they are marked as irrelevant). It will vectorize S6, and record
4915 a pointer to the new vector stmt VS6 from S6 (as usual).
4916 S4 will be skipped, and S5 will be vectorized as usual:
4918 in_pattern_p related_stmt vec_stmt
4919 S1: a_i = .... - - -
4920 S2: a_2 = ..use(a_i).. - - -
4921 S3: a_1 = ..use(a_2).. - - -
4922 > VS6: va_new = .... - - -
4923 S4: a_0 = ..use(a_1).. true S6 VS6
4924 '---> S6: a_new = .... - S4 VS6
4925 > VS5: ... = ..vuse(va_new).. - - -
4926 S5: ... = ..use(a_0).. - - -
4928 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4929 elsewhere), and we'll end up with:
4932 VS5: ... = ..vuse(va_new)..
4934 In case of more than one pattern statements, e.g., widen-mult with
4938 S2 a_T = (TYPE) a_t;
4939 '--> S3: a_it = (interm_type) a_t;
4940 S4 prod_T = a_T * CONST;
4941 '--> S5: prod_T' = a_it w* CONST;
4943 there may be other users of a_T outside the pattern. In that case S2 will
4944 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4945 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4946 be recorded in S3. */
4949 vect_pattern_recog (vec_info
*vinfo
)
4954 gimple_stmt_iterator si
;
4956 auto_vec
<gimple
*, 1> stmts_to_replace
;
4958 vect_determine_precisions (vinfo
);
4960 DUMP_VECT_SCOPE ("vect_pattern_recog");
4962 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
4964 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4965 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
4966 nbbs
= loop
->num_nodes
;
4968 /* Scan through the loop stmts, applying the pattern recognition
4969 functions starting at each stmt visited: */
4970 for (i
= 0; i
< nbbs
; i
++)
4972 basic_block bb
= bbs
[i
];
4973 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
4974 /* Scan over all generic vect_recog_xxx_pattern functions. */
4975 for (j
= 0; j
< NUM_PATTERNS
; j
++)
4976 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs
[j
], si
,
4982 bb_vec_info bb_vinfo
= as_a
<bb_vec_info
> (vinfo
);
4983 for (si
= bb_vinfo
->region_begin
;
4984 gsi_stmt (si
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&si
))
4986 gimple
*stmt
= gsi_stmt (si
);
4987 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4988 if (stmt_info
&& !STMT_VINFO_VECTORIZABLE (stmt_info
))
4991 /* Scan over all generic vect_recog_xxx_pattern functions. */
4992 for (j
= 0; j
< NUM_PATTERNS
; j
++)
4993 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs
[j
], si
,