1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
37 #include "gimple-iterator.h"
39 #include "tree-vectorizer.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
47 #include "omp-simd-clone.h"
50 /* Return true if we have a useful VR_RANGE range for VAR, storing it
51 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
54 vect_get_range_info (tree var
, wide_int
*min_value
, wide_int
*max_value
)
56 value_range_type vr_type
= get_range_info (var
, min_value
, max_value
);
57 wide_int nonzero
= get_nonzero_bits (var
);
58 signop sgn
= TYPE_SIGN (TREE_TYPE (var
));
59 if (intersect_range_with_nonzero_bits (vr_type
, min_value
, max_value
,
60 nonzero
, sgn
) == VR_RANGE
)
62 if (dump_enabled_p ())
64 dump_generic_expr_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, var
);
65 dump_printf (MSG_NOTE
, " has range [");
66 dump_hex (MSG_NOTE
, *min_value
);
67 dump_printf (MSG_NOTE
, ", ");
68 dump_hex (MSG_NOTE
, *max_value
);
69 dump_printf (MSG_NOTE
, "]\n");
75 if (dump_enabled_p ())
77 dump_generic_expr_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, var
);
78 dump_printf (MSG_NOTE
, " has no range info\n");
84 /* Report that we've found an instance of pattern PATTERN in
88 vect_pattern_detected (const char *name
, gimple
*stmt
)
90 if (dump_enabled_p ())
92 dump_printf_loc (MSG_NOTE
, vect_location
, "%s: detected: ", name
);
93 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
97 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO.
98 Set its vector type to VECTYPE if it doesn't have one already. */
101 vect_init_pattern_stmt (gimple
*pattern_stmt
, stmt_vec_info orig_stmt_info
,
104 stmt_vec_info pattern_stmt_info
= vinfo_for_stmt (pattern_stmt
);
105 if (pattern_stmt_info
== NULL
)
107 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
,
108 orig_stmt_info
->vinfo
);
109 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
111 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt_info
->stmt
));
113 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt_info
->stmt
;
114 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
115 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
116 if (!STMT_VINFO_VECTYPE (pattern_stmt_info
))
117 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vectype
;
120 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
121 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
125 vect_set_pattern_stmt (gimple
*pattern_stmt
, stmt_vec_info orig_stmt_info
,
128 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
129 STMT_VINFO_RELATED_STMT (orig_stmt_info
) = pattern_stmt
;
130 vect_init_pattern_stmt (pattern_stmt
, orig_stmt_info
, vectype
);
133 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
134 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
135 be different from the vector type of the final pattern statement. */
138 append_pattern_def_seq (stmt_vec_info stmt_info
, gimple
*new_stmt
,
139 tree vectype
= NULL_TREE
)
141 vec_info
*vinfo
= stmt_info
->vinfo
;
144 gcc_assert (!vinfo_for_stmt (new_stmt
));
145 stmt_vec_info new_stmt_info
= new_stmt_vec_info (new_stmt
, vinfo
);
146 set_vinfo_for_stmt (new_stmt
, new_stmt_info
);
147 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
149 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
153 /* The caller wants to perform new operations on vect_external variable
154 VAR, so that the result of the operations would also be vect_external.
155 Return the edge on which the operations can be performed, if one exists.
156 Return null if the operations should instead be treated as part of
157 the pattern that needs them. */
160 vect_get_external_def_edge (vec_info
*vinfo
, tree var
)
163 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
165 e
= loop_preheader_edge (loop_vinfo
->loop
);
166 if (!SSA_NAME_IS_DEFAULT_DEF (var
))
168 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
170 || !dominated_by_p (CDI_DOMINATORS
, e
->dest
, bb
))
177 /* Return true if the target supports a vector version of CODE,
178 where CODE is known to map to a direct optab. ITYPE specifies
179 the type of (some of) the scalar inputs and OTYPE specifies the
180 type of the scalar result.
182 If CODE allows the inputs and outputs to have different type
183 (such as for WIDEN_SUM_EXPR), it is the input mode rather
184 than the output mode that determines the appropriate target pattern.
185 Operand 0 of the target pattern then specifies the mode that the output
188 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
189 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
193 vect_supportable_direct_optab_p (tree otype
, tree_code code
,
194 tree itype
, tree
*vecotype_out
,
195 tree
*vecitype_out
= NULL
)
197 tree vecitype
= get_vectype_for_scalar_type (itype
);
201 tree vecotype
= get_vectype_for_scalar_type (otype
);
205 optab optab
= optab_for_tree_code (code
, vecitype
, optab_default
);
209 insn_code icode
= optab_handler (optab
, TYPE_MODE (vecitype
));
210 if (icode
== CODE_FOR_nothing
211 || insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (vecotype
))
214 *vecotype_out
= vecotype
;
216 *vecitype_out
= vecitype
;
220 /* Round bit precision PRECISION up to a full element. */
223 vect_element_precision (unsigned int precision
)
225 precision
= 1 << ceil_log2 (precision
);
226 return MAX (precision
, BITS_PER_UNIT
);
229 /* If OP is defined by a statement that's being considered for vectorization,
230 return information about that statement, otherwise return NULL. */
233 vect_get_internal_def (vec_info
*vinfo
, tree op
)
237 if (TREE_CODE (op
) != SSA_NAME
238 || !vect_is_simple_use (op
, vinfo
, &dt
, &def_stmt
)
239 || dt
!= vect_internal_def
)
242 return vinfo_for_stmt (def_stmt
);
245 /* Check whether NAME, an ssa-name used in USE_STMT,
246 is a result of a type promotion, such that:
247 DEF_STMT: NAME = NOP (name0)
248 If CHECK_SIGN is TRUE, check that either both types are signed or both are
252 type_conversion_p (tree name
, gimple
*use_stmt
, bool check_sign
,
253 tree
*orig_type
, gimple
**def_stmt
, bool *promotion
)
255 stmt_vec_info stmt_vinfo
;
256 tree type
= TREE_TYPE (name
);
258 enum vect_def_type dt
;
260 stmt_vinfo
= vinfo_for_stmt (use_stmt
);
261 if (!vect_is_simple_use (name
, stmt_vinfo
->vinfo
, &dt
, def_stmt
))
264 if (dt
!= vect_internal_def
265 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
271 if (!is_gimple_assign (*def_stmt
))
274 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt
)))
277 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
279 *orig_type
= TREE_TYPE (oprnd0
);
280 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*orig_type
)
281 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*orig_type
)) && check_sign
))
284 if (TYPE_PRECISION (type
) >= (TYPE_PRECISION (*orig_type
) * 2))
289 if (!vect_is_simple_use (oprnd0
, stmt_vinfo
->vinfo
, &dt
))
295 /* Holds information about an input operand after some sign changes
296 and type promotions have been peeled away. */
297 struct vect_unpromoted_value
{
298 vect_unpromoted_value ();
300 void set_op (tree
, vect_def_type
, stmt_vec_info
= NULL
);
302 /* The value obtained after peeling away zero or more casts. */
305 /* The type of OP. */
308 /* The definition type of OP. */
311 /* If OP is the result of peeling at least one cast, and if the cast
312 of OP itself is a vectorizable statement, CASTER identifies that
313 statement, otherwise it is null. */
314 stmt_vec_info caster
;
317 inline vect_unpromoted_value::vect_unpromoted_value ()
320 dt (vect_uninitialized_def
),
325 /* Set the operand to OP_IN, its definition type to DT_IN, and the
326 statement that casts it to CASTER_IN. */
329 vect_unpromoted_value::set_op (tree op_in
, vect_def_type dt_in
,
330 stmt_vec_info caster_in
)
333 type
= TREE_TYPE (op
);
338 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
339 to reach some vectorizable inner operand OP', continuing as long as it
340 is possible to convert OP' back to OP using a possible sign change
341 followed by a possible promotion P. Return this OP', or null if OP is
342 not a vectorizable SSA name. If there is a promotion P, describe its
343 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
344 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
345 have more than one user.
347 A successful return means that it is possible to go from OP' to OP
348 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
349 whereas the cast from UNPROM to OP might be a promotion, a sign
354 signed short *ptr = ...;
355 signed short C = *ptr;
356 unsigned short B = (unsigned short) C; // sign change
357 signed int A = (signed int) B; // unsigned promotion
358 ...possible other uses of A...
359 unsigned int OP = (unsigned int) A; // sign change
361 In this case it's possible to go directly from C to OP using:
363 OP = (unsigned int) (unsigned short) C;
364 +------------+ +--------------+
365 promotion sign change
367 so OP' would be C. The input to the promotion is B, so UNPROM
371 vect_look_through_possible_promotion (vec_info
*vinfo
, tree op
,
372 vect_unpromoted_value
*unprom
,
373 bool *single_use_p
= NULL
)
375 tree res
= NULL_TREE
;
376 tree op_type
= TREE_TYPE (op
);
377 unsigned int orig_precision
= TYPE_PRECISION (op_type
);
378 stmt_vec_info caster
= NULL
;
379 while (TREE_CODE (op
) == SSA_NAME
&& INTEGRAL_TYPE_P (op_type
))
381 /* See whether OP is simple enough to vectorize. */
384 if (!vect_is_simple_use (op
, vinfo
, &dt
, &def_stmt
))
387 /* If OP is the input of a demotion, skip over it to see whether
388 OP is itself the result of a promotion. If so, the combined
389 effect of the promotion and the demotion might fit the required
390 pattern, otherwise neither operation fits.
392 This copes with cases such as the result of an arithmetic
393 operation being truncated before being stored, and where that
394 arithmetic operation has been recognized as an over-widened one. */
395 if (TYPE_PRECISION (op_type
) <= orig_precision
)
397 /* Use OP as the UNPROM described above if we haven't yet
398 found a promotion, or if using the new input preserves the
399 sign of the previous promotion. */
401 || TYPE_PRECISION (unprom
->type
) == orig_precision
402 || TYPE_SIGN (unprom
->type
) == TYPE_SIGN (op_type
))
403 unprom
->set_op (op
, dt
, caster
);
404 /* Stop if we've already seen a promotion and if this
405 conversion does more than change the sign. */
406 else if (TYPE_PRECISION (op_type
)
407 != TYPE_PRECISION (unprom
->type
))
410 /* The sequence now extends to OP. */
414 /* See whether OP is defined by a cast. Record it as CASTER if
415 the cast is potentially vectorizable. */
418 if (dt
== vect_internal_def
)
420 caster
= vinfo_for_stmt (def_stmt
);
421 /* Ignore pattern statements, since we don't link uses for them. */
423 && !STMT_VINFO_RELATED_STMT (caster
)
424 && !has_single_use (res
))
425 *single_use_p
= false;
429 gassign
*assign
= dyn_cast
<gassign
*> (def_stmt
);
430 if (!assign
|| !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt
)))
433 /* Continue with the input to the cast. */
434 op
= gimple_assign_rhs1 (def_stmt
);
435 op_type
= TREE_TYPE (op
);
440 /* OP is an integer operand to an operation that returns TYPE, and we
441 want to treat the operation as a widening one. So far we can treat
442 it as widening from *COMMON_TYPE.
444 Return true if OP is suitable for such a widening operation,
445 either widening from *COMMON_TYPE or from some supertype of it.
446 Update *COMMON_TYPE to the supertype in the latter case.
448 SHIFT_P is true if OP is a shift amount. */
451 vect_joust_widened_integer (tree type
, bool shift_p
, tree op
,
454 /* Calculate the minimum precision required by OP, without changing
455 the sign of either operand. */
456 unsigned int precision
;
459 if (!wi::leu_p (wi::to_widest (op
), TYPE_PRECISION (type
) / 2))
461 precision
= TREE_INT_CST_LOW (op
);
465 precision
= wi::min_precision (wi::to_widest (op
),
466 TYPE_SIGN (*common_type
));
467 if (precision
* 2 > TYPE_PRECISION (type
))
471 /* If OP requires a wider type, switch to that type. The checks
472 above ensure that this is still narrower than the result. */
473 precision
= vect_element_precision (precision
);
474 if (TYPE_PRECISION (*common_type
) < precision
)
475 *common_type
= build_nonstandard_integer_type
476 (precision
, TYPE_UNSIGNED (*common_type
));
480 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
481 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
484 vect_joust_widened_type (tree type
, tree new_type
, tree
*common_type
)
486 if (types_compatible_p (*common_type
, new_type
))
489 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
490 if ((TYPE_PRECISION (new_type
) < TYPE_PRECISION (*common_type
))
491 && (TYPE_UNSIGNED (new_type
) || !TYPE_UNSIGNED (*common_type
)))
494 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
495 if (TYPE_PRECISION (*common_type
) < TYPE_PRECISION (new_type
)
496 && (TYPE_UNSIGNED (*common_type
) || !TYPE_UNSIGNED (new_type
)))
498 *common_type
= new_type
;
502 /* We have mismatched signs, with the signed type being
503 no wider than the unsigned type. In this case we need
504 a wider signed type. */
505 unsigned int precision
= MAX (TYPE_PRECISION (*common_type
),
506 TYPE_PRECISION (new_type
));
508 if (precision
* 2 > TYPE_PRECISION (type
))
511 *common_type
= build_nonstandard_integer_type (precision
, false);
515 /* Check whether STMT_INFO can be viewed as a tree of integer operations
516 in which each node either performs CODE or WIDENED_CODE, and where
517 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
518 specifies the maximum number of leaf operands. SHIFT_P says whether
519 CODE and WIDENED_CODE are some sort of shift.
521 If STMT_INFO is such a tree, return the number of leaf operands
522 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
523 to a type that (a) is narrower than the result of STMT_INFO and
524 (b) can hold all leaf operand values.
526 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
530 vect_widened_op_tree (stmt_vec_info stmt_info
, tree_code code
,
531 tree_code widened_code
, bool shift_p
,
532 unsigned int max_nops
,
533 vect_unpromoted_value
*unprom
, tree
*common_type
)
535 /* Check for an integer operation with the right code. */
536 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
540 tree_code rhs_code
= gimple_assign_rhs_code (assign
);
541 if (rhs_code
!= code
&& rhs_code
!= widened_code
)
544 tree type
= gimple_expr_type (assign
);
545 if (!INTEGRAL_TYPE_P (type
))
548 /* Assume that both operands will be leaf operands. */
551 /* Check the operands. */
552 unsigned int next_op
= 0;
553 for (unsigned int i
= 0; i
< 2; ++i
)
555 vect_unpromoted_value
*this_unprom
= &unprom
[next_op
];
556 unsigned int nops
= 1;
557 tree op
= gimple_op (assign
, i
+ 1);
558 if (i
== 1 && TREE_CODE (op
) == INTEGER_CST
)
560 /* We already have a common type from earlier operands.
561 Update it to account for OP. */
562 this_unprom
->set_op (op
, vect_constant_def
);
563 if (!vect_joust_widened_integer (type
, shift_p
, op
, common_type
))
568 /* Only allow shifts by constants. */
569 if (shift_p
&& i
== 1)
572 if (!vect_look_through_possible_promotion (stmt_info
->vinfo
, op
,
576 if (TYPE_PRECISION (this_unprom
->type
) == TYPE_PRECISION (type
))
578 /* The operand isn't widened. If STMT_INFO has the code
579 for an unwidened operation, recursively check whether
580 this operand is a node of the tree. */
583 || this_unprom
->dt
!= vect_internal_def
)
586 /* Give back the leaf slot allocated above now that we're
587 not treating this as a leaf operand. */
590 /* Recursively process the definition of the operand. */
591 stmt_vec_info def_stmt_info
592 = vinfo_for_stmt (SSA_NAME_DEF_STMT (this_unprom
->op
));
593 nops
= vect_widened_op_tree (def_stmt_info
, code
, widened_code
,
594 shift_p
, max_nops
, this_unprom
,
603 /* Make sure that the operand is narrower than the result. */
604 if (TYPE_PRECISION (this_unprom
->type
) * 2
605 > TYPE_PRECISION (type
))
608 /* Update COMMON_TYPE for the new operand. */
610 *common_type
= this_unprom
->type
;
611 else if (!vect_joust_widened_type (type
, this_unprom
->type
,
621 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
622 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
625 vect_recog_temp_ssa_var (tree type
, gimple
*stmt
)
627 return make_temp_ssa_name (type
, stmt
, "patt");
630 /* STMT2_INFO describes a type conversion that could be split into STMT1
631 followed by a version of STMT2_INFO that takes NEW_RHS as its first
632 input. Try to do this using pattern statements, returning true on
636 vect_split_statement (stmt_vec_info stmt2_info
, tree new_rhs
,
637 gimple
*stmt1
, tree vectype
)
639 if (is_pattern_stmt_p (stmt2_info
))
641 /* STMT2_INFO is part of a pattern. Get the statement to which
642 the pattern is attached. */
643 stmt_vec_info orig_stmt2_info
644 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt2_info
));
645 vect_init_pattern_stmt (stmt1
, orig_stmt2_info
, vectype
);
647 if (dump_enabled_p ())
649 dump_printf_loc (MSG_NOTE
, vect_location
,
650 "Splitting pattern statement: ");
651 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt2_info
->stmt
, 0);
654 /* Since STMT2_INFO is a pattern statement, we can change it
655 in-situ without worrying about changing the code for the
657 gimple_assign_set_rhs1 (stmt2_info
->stmt
, new_rhs
);
659 if (dump_enabled_p ())
661 dump_printf_loc (MSG_NOTE
, vect_location
, "into: ");
662 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt1
, 0);
663 dump_printf_loc (MSG_NOTE
, vect_location
, "and: ");
664 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt2_info
->stmt
, 0);
667 gimple_seq
*def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info
);
668 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info
) == stmt2_info
->stmt
)
669 /* STMT2_INFO is the actual pattern statement. Add STMT1
670 to the end of the definition sequence. */
671 gimple_seq_add_stmt_without_update (def_seq
, stmt1
);
674 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
676 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt2_info
->stmt
, def_seq
);
677 gsi_insert_before_without_update (&gsi
, stmt1
, GSI_SAME_STMT
);
683 /* STMT2_INFO doesn't yet have a pattern. Try to create a
684 two-statement pattern now. */
685 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info
));
686 tree lhs_type
= TREE_TYPE (gimple_get_lhs (stmt2_info
->stmt
));
687 tree lhs_vectype
= get_vectype_for_scalar_type (lhs_type
);
691 if (dump_enabled_p ())
693 dump_printf_loc (MSG_NOTE
, vect_location
,
694 "Splitting statement: ");
695 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt2_info
->stmt
, 0);
698 /* Add STMT1 as a singleton pattern definition sequence. */
699 gimple_seq
*def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info
);
700 vect_init_pattern_stmt (stmt1
, stmt2_info
, vectype
);
701 gimple_seq_add_stmt_without_update (def_seq
, stmt1
);
703 /* Build the second of the two pattern statements. */
704 tree new_lhs
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
705 gassign
*new_stmt2
= gimple_build_assign (new_lhs
, NOP_EXPR
, new_rhs
);
706 vect_set_pattern_stmt (new_stmt2
, stmt2_info
, lhs_vectype
);
708 if (dump_enabled_p ())
710 dump_printf_loc (MSG_NOTE
, vect_location
,
711 "into pattern statements: ");
712 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt1
, 0);
713 dump_printf_loc (MSG_NOTE
, vect_location
, "and: ");
714 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt2
, 0);
721 /* Convert UNPROM to TYPE and return the result, adding new statements
722 to STMT_INFO's pattern definition statements if no better way is
723 available. VECTYPE is the vector form of TYPE. */
726 vect_convert_input (stmt_vec_info stmt_info
, tree type
,
727 vect_unpromoted_value
*unprom
, tree vectype
)
729 /* Check for a no-op conversion. */
730 if (types_compatible_p (type
, TREE_TYPE (unprom
->op
)))
733 /* Allow the caller to create constant vect_unpromoted_values. */
734 if (TREE_CODE (unprom
->op
) == INTEGER_CST
)
735 return wide_int_to_tree (type
, wi::to_widest (unprom
->op
));
737 /* See if we can reuse an existing result. */
740 tree lhs
= gimple_get_lhs (unprom
->caster
->stmt
);
741 if (types_compatible_p (TREE_TYPE (lhs
), type
))
745 /* We need a new conversion statement. */
746 tree new_op
= vect_recog_temp_ssa_var (type
, NULL
);
747 gassign
*new_stmt
= gimple_build_assign (new_op
, NOP_EXPR
, unprom
->op
);
749 /* If the operation is the input to a vectorizable cast, try splitting
750 that cast into two, taking the required result as a mid-way point. */
753 tree lhs
= gimple_get_lhs (unprom
->caster
->stmt
);
754 if (TYPE_PRECISION (TREE_TYPE (lhs
)) > TYPE_PRECISION (type
)
755 && TYPE_PRECISION (type
) > TYPE_PRECISION (unprom
->type
)
756 && (TYPE_UNSIGNED (unprom
->type
) || !TYPE_UNSIGNED (type
))
757 && vect_split_statement (unprom
->caster
, new_op
, new_stmt
, vectype
))
761 /* If OP is an external value, see if we can insert the new statement
762 on an incoming edge. */
763 if (unprom
->dt
== vect_external_def
)
764 if (edge e
= vect_get_external_def_edge (stmt_info
->vinfo
, unprom
->op
))
766 basic_block new_bb
= gsi_insert_on_edge_immediate (e
, new_stmt
);
767 gcc_assert (!new_bb
);
771 /* As a (common) last resort, add the statement to the pattern itself. */
772 append_pattern_def_seq (stmt_info
, new_stmt
, vectype
);
776 /* Invoke vect_convert_input for N elements of UNPROM and store the
777 result in the corresponding elements of RESULT. */
780 vect_convert_inputs (stmt_vec_info stmt_info
, unsigned int n
,
781 tree
*result
, tree type
, vect_unpromoted_value
*unprom
,
784 for (unsigned int i
= 0; i
< n
; ++i
)
787 for (j
= 0; j
< i
; ++j
)
788 if (unprom
[j
].op
== unprom
[i
].op
)
791 result
[i
] = result
[j
];
793 result
[i
] = vect_convert_input (stmt_info
, type
, &unprom
[i
], vectype
);
797 /* The caller has created a (possibly empty) sequence of pattern definition
798 statements followed by a single statement PATTERN_STMT. Cast the result
799 of this final statement to TYPE. If a new statement is needed, add
800 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
801 and return the new statement, otherwise return PATTERN_STMT as-is.
802 VECITYPE is the vector form of PATTERN_STMT's result type. */
805 vect_convert_output (stmt_vec_info stmt_info
, tree type
, gimple
*pattern_stmt
,
808 tree lhs
= gimple_get_lhs (pattern_stmt
);
809 if (!types_compatible_p (type
, TREE_TYPE (lhs
)))
811 append_pattern_def_seq (stmt_info
, pattern_stmt
, vecitype
);
812 tree cast_var
= vect_recog_temp_ssa_var (type
, NULL
);
813 pattern_stmt
= gimple_build_assign (cast_var
, NOP_EXPR
, lhs
);
818 /* Return true if STMT_VINFO describes a reduction for which reassociation
819 is allowed. If STMT_INFO is part of a group, assume that it's part of
820 a reduction chain and optimistically assume that all statements
821 except the last allow reassociation. */
824 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo
)
826 return (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
827 ? STMT_VINFO_REDUC_TYPE (stmt_vinfo
) != FOLD_LEFT_REDUCTION
828 : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo
) != NULL
);
831 /* As above, but also require it to have code CODE and to be a reduction
832 in the outermost loop. When returning true, store the operands in
833 *OP0_OUT and *OP1_OUT. */
836 vect_reassociating_reduction_p (stmt_vec_info stmt_info
, tree_code code
,
837 tree
*op0_out
, tree
*op1_out
)
839 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
843 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
844 if (!assign
|| gimple_assign_rhs_code (assign
) != code
)
847 /* We don't allow changing the order of the computation in the inner-loop
848 when doing outer-loop vectorization. */
849 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
850 if (loop
&& nested_in_vect_loop_p (loop
, assign
))
853 if (!vect_reassociating_reduction_p (stmt_info
))
856 *op0_out
= gimple_assign_rhs1 (assign
);
857 *op1_out
= gimple_assign_rhs2 (assign
);
861 /* Function vect_recog_dot_prod_pattern
863 Try to find the following pattern:
869 sum_0 = phi <init, sum_1>
872 S3 x_T = (TYPE1) x_t;
873 S4 y_T = (TYPE1) y_t;
875 [S6 prod = (TYPE2) prod; #optional]
876 S7 sum_1 = prod + sum_0;
878 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
879 same size of 'TYPE1' or bigger. This is a special case of a reduction
884 * STMT_VINFO: The stmt from which the pattern search begins. In the
885 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
890 * TYPE_OUT: The type of the output of this pattern.
892 * Return value: A new stmt that will be used to replace the sequence of
893 stmts that constitute the pattern. In this case it will be:
894 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
896 Note: The dot-prod idiom is a widening reduction pattern that is
897 vectorized without preserving all the intermediate results. It
898 produces only N/2 (widened) results (by summing up pairs of
899 intermediate results) rather than all N results. Therefore, we
900 cannot allow this pattern when we want to get all the results and in
901 the correct order (as is the case when this computation is in an
902 inner-loop nested in an outer-loop that us being vectorized). */
905 vect_recog_dot_prod_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
908 gimple
*last_stmt
= stmt_vinfo
->stmt
;
909 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
910 tree type
, half_type
;
911 gimple
*pattern_stmt
;
914 /* Look for the following pattern
918 DDPROD = (TYPE2) DPROD;
919 sum_1 = DDPROD + sum_0;
921 - DX is double the size of X
922 - DY is double the size of Y
923 - DX, DY, DPROD all have the same type
924 - sum is the same size of DPROD or bigger
925 - sum has been recognized as a reduction variable.
927 This is equivalent to:
928 DPROD = X w* Y; #widen mult
929 sum_1 = DPROD w+ sum_0; #widen summation
931 DPROD = X w* Y; #widen mult
932 sum_1 = DPROD + sum_0; #summation
935 /* Starting from LAST_STMT, follow the defs of its uses in search
936 of the above pattern. */
938 if (!vect_reassociating_reduction_p (stmt_vinfo
, PLUS_EXPR
,
942 type
= gimple_expr_type (last_stmt
);
944 vect_unpromoted_value unprom_mult
;
945 oprnd0
= vect_look_through_possible_promotion (vinfo
, oprnd0
, &unprom_mult
);
947 /* So far so good. Since last_stmt was detected as a (summation) reduction,
948 we know that oprnd1 is the reduction variable (defined by a loop-header
949 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
950 Left to check that oprnd0 is defined by a (widen_)mult_expr */
954 stmt_vec_info mult_vinfo
= vect_get_internal_def (vinfo
, oprnd0
);
958 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
959 inside the loop (in case we are analyzing an outer-loop). */
960 vect_unpromoted_value unprom0
[2];
961 if (!vect_widened_op_tree (mult_vinfo
, MULT_EXPR
, WIDEN_MULT_EXPR
,
962 false, 2, unprom0
, &half_type
))
965 /* If there are two widening operations, make sure they agree on
966 the sign of the extension. */
967 if (TYPE_PRECISION (unprom_mult
.type
) != TYPE_PRECISION (type
)
968 && TYPE_SIGN (unprom_mult
.type
) != TYPE_SIGN (half_type
))
971 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt
);
974 if (!vect_supportable_direct_optab_p (type
, DOT_PROD_EXPR
, half_type
,
975 type_out
, &half_vectype
))
978 /* Get the inputs in the appropriate types. */
980 vect_convert_inputs (stmt_vinfo
, 2, mult_oprnd
, half_type
,
981 unprom0
, half_vectype
);
983 var
= vect_recog_temp_ssa_var (type
, NULL
);
984 pattern_stmt
= gimple_build_assign (var
, DOT_PROD_EXPR
,
985 mult_oprnd
[0], mult_oprnd
[1], oprnd1
);
991 /* Function vect_recog_sad_pattern
993 Try to find the following Sum of Absolute Difference (SAD) pattern:
996 signed TYPE1 diff, abs_diff;
999 sum_0 = phi <init, sum_1>
1002 S3 x_T = (TYPE1) x_t;
1003 S4 y_T = (TYPE1) y_t;
1004 S5 diff = x_T - y_T;
1005 S6 abs_diff = ABS_EXPR <diff>;
1006 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1007 S8 sum_1 = abs_diff + sum_0;
1009 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1010 same size of 'TYPE1' or bigger. This is a special case of a reduction
1015 * STMT_VINFO: The stmt from which the pattern search begins. In the
1016 example, when this function is called with S8, the pattern
1017 {S3,S4,S5,S6,S7,S8} will be detected.
1021 * TYPE_OUT: The type of the output of this pattern.
1023 * Return value: A new stmt that will be used to replace the sequence of
1024 stmts that constitute the pattern. In this case it will be:
1025 SAD_EXPR <x_t, y_t, sum_0>
1029 vect_recog_sad_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
1031 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1032 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
1035 /* Look for the following pattern
1039 DAD = ABS_EXPR <DDIFF>;
1040 DDPROD = (TYPE2) DPROD;
1041 sum_1 = DAD + sum_0;
1043 - DX is at least double the size of X
1044 - DY is at least double the size of Y
1045 - DX, DY, DDIFF, DAD all have the same type
1046 - sum is the same size of DAD or bigger
1047 - sum has been recognized as a reduction variable.
1049 This is equivalent to:
1050 DDIFF = X w- Y; #widen sub
1051 DAD = ABS_EXPR <DDIFF>;
1052 sum_1 = DAD w+ sum_0; #widen summation
1054 DDIFF = X w- Y; #widen sub
1055 DAD = ABS_EXPR <DDIFF>;
1056 sum_1 = DAD + sum_0; #summation
1059 /* Starting from LAST_STMT, follow the defs of its uses in search
1060 of the above pattern. */
1062 tree plus_oprnd0
, plus_oprnd1
;
1063 if (!vect_reassociating_reduction_p (stmt_vinfo
, PLUS_EXPR
,
1064 &plus_oprnd0
, &plus_oprnd1
))
1067 tree sum_type
= gimple_expr_type (last_stmt
);
1069 /* Any non-truncating sequence of conversions is OK here, since
1070 with a successful match, the result of the ABS(U) is known to fit
1071 within the nonnegative range of the result type. (It cannot be the
1072 negative of the minimum signed value due to the range of the widening
1074 vect_unpromoted_value unprom_abs
;
1075 plus_oprnd0
= vect_look_through_possible_promotion (vinfo
, plus_oprnd0
,
1078 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1079 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1080 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1081 Then check that plus_oprnd0 is defined by an abs_expr. */
1086 stmt_vec_info abs_stmt_vinfo
= vect_get_internal_def (vinfo
, plus_oprnd0
);
1087 if (!abs_stmt_vinfo
)
1090 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1091 inside the loop (in case we are analyzing an outer-loop). */
1092 gassign
*abs_stmt
= dyn_cast
<gassign
*> (abs_stmt_vinfo
->stmt
);
1094 || (gimple_assign_rhs_code (abs_stmt
) != ABS_EXPR
1095 && gimple_assign_rhs_code (abs_stmt
) != ABSU_EXPR
))
1098 tree abs_oprnd
= gimple_assign_rhs1 (abs_stmt
);
1099 tree abs_type
= TREE_TYPE (abs_oprnd
);
1100 if (TYPE_UNSIGNED (abs_type
))
1103 /* Peel off conversions from the ABS input. This can involve sign
1104 changes (e.g. from an unsigned subtraction to a signed ABS input)
1105 or signed promotion, but it can't include unsigned promotion.
1106 (Note that ABS of an unsigned promotion should have been folded
1107 away before now anyway.) */
1108 vect_unpromoted_value unprom_diff
;
1109 abs_oprnd
= vect_look_through_possible_promotion (vinfo
, abs_oprnd
,
1113 if (TYPE_PRECISION (unprom_diff
.type
) != TYPE_PRECISION (abs_type
)
1114 && TYPE_UNSIGNED (unprom_diff
.type
))
1117 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1118 stmt_vec_info diff_stmt_vinfo
= vect_get_internal_def (vinfo
, abs_oprnd
);
1119 if (!diff_stmt_vinfo
)
1122 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1123 inside the loop (in case we are analyzing an outer-loop). */
1124 vect_unpromoted_value unprom
[2];
1125 if (!vect_widened_op_tree (diff_stmt_vinfo
, MINUS_EXPR
, MINUS_EXPR
,
1126 false, 2, unprom
, &half_type
))
1129 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt
);
1132 if (!vect_supportable_direct_optab_p (sum_type
, SAD_EXPR
, half_type
,
1133 type_out
, &half_vectype
))
1136 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1138 vect_convert_inputs (stmt_vinfo
, 2, sad_oprnd
, half_type
,
1139 unprom
, half_vectype
);
1141 tree var
= vect_recog_temp_ssa_var (sum_type
, NULL
);
1142 gimple
*pattern_stmt
= gimple_build_assign (var
, SAD_EXPR
, sad_oprnd
[0],
1143 sad_oprnd
[1], plus_oprnd1
);
1145 return pattern_stmt
;
1148 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1149 so that it can be treated as though it had the form:
1153 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1154 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1155 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1156 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1157 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1159 Try to replace the pattern with:
1163 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1164 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1165 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1166 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1168 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1170 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1171 name of the pattern being matched, for dump purposes. */
1174 vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info
, tree
*type_out
,
1175 tree_code orig_code
, tree_code wide_code
,
1176 bool shift_p
, const char *name
)
1178 gimple
*last_stmt
= last_stmt_info
->stmt
;
1180 vect_unpromoted_value unprom
[2];
1182 if (!vect_widened_op_tree (last_stmt_info
, orig_code
, orig_code
,
1183 shift_p
, 2, unprom
, &half_type
))
1186 /* Pattern detected. */
1187 vect_pattern_detected (name
, last_stmt
);
1189 tree type
= gimple_expr_type (last_stmt
);
1191 if (TYPE_PRECISION (type
) != TYPE_PRECISION (half_type
) * 2
1192 || TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type
))
1193 itype
= build_nonstandard_integer_type (TYPE_PRECISION (half_type
) * 2,
1194 TYPE_UNSIGNED (half_type
));
1196 /* Check target support */
1197 tree vectype
= get_vectype_for_scalar_type (half_type
);
1198 tree vecitype
= get_vectype_for_scalar_type (itype
);
1199 enum tree_code dummy_code
;
1201 auto_vec
<tree
> dummy_vec
;
1204 || !supportable_widening_operation (wide_code
, last_stmt
,
1206 &dummy_code
, &dummy_code
,
1207 &dummy_int
, &dummy_vec
))
1210 *type_out
= get_vectype_for_scalar_type (type
);
1215 vect_convert_inputs (last_stmt_info
, 2, oprnd
, half_type
, unprom
, vectype
);
1217 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
1218 gimple
*pattern_stmt
= gimple_build_assign (var
, wide_code
,
1219 oprnd
[0], oprnd
[1]);
1221 return vect_convert_output (last_stmt_info
, type
, pattern_stmt
, vecitype
);
1224 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1225 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1228 vect_recog_widen_mult_pattern (stmt_vec_info last_stmt_info
, tree
*type_out
)
1230 return vect_recog_widen_op_pattern (last_stmt_info
, type_out
, MULT_EXPR
,
1231 WIDEN_MULT_EXPR
, false,
1232 "vect_recog_widen_mult_pattern");
1235 /* Function vect_recog_pow_pattern
1237 Try to find the following pattern:
1241 with POW being one of pow, powf, powi, powif and N being
1246 * STMT_VINFO: The stmt from which the pattern search begins.
1250 * TYPE_OUT: The type of the output of this pattern.
1252 * Return value: A new stmt that will be used to replace the sequence of
1253 stmts that constitute the pattern. In this case it will be:
1260 vect_recog_pow_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
1262 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1267 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
1270 switch (gimple_call_combined_fn (last_stmt
))
1280 base
= gimple_call_arg (last_stmt
, 0);
1281 exp
= gimple_call_arg (last_stmt
, 1);
1282 if (TREE_CODE (exp
) != REAL_CST
1283 && TREE_CODE (exp
) != INTEGER_CST
)
1285 if (flag_unsafe_math_optimizations
1286 && TREE_CODE (base
) == REAL_CST
1287 && !gimple_call_internal_p (last_stmt
))
1289 combined_fn log_cfn
;
1290 built_in_function exp_bfn
;
1291 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt
)))
1294 log_cfn
= CFN_BUILT_IN_LOG
;
1295 exp_bfn
= BUILT_IN_EXP
;
1298 log_cfn
= CFN_BUILT_IN_LOGF
;
1299 exp_bfn
= BUILT_IN_EXPF
;
1302 log_cfn
= CFN_BUILT_IN_LOGL
;
1303 exp_bfn
= BUILT_IN_EXPL
;
1308 tree logc
= fold_const_call (log_cfn
, TREE_TYPE (base
), base
);
1309 tree exp_decl
= builtin_decl_implicit (exp_bfn
);
1310 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1311 does that, but if C is a power of 2, we want to use
1312 exp2 (log2 (C) * x) in the non-vectorized version, but for
1313 vectorization we don't have vectorized exp2. */
1315 && TREE_CODE (logc
) == REAL_CST
1317 && lookup_attribute ("omp declare simd",
1318 DECL_ATTRIBUTES (exp_decl
)))
1320 cgraph_node
*node
= cgraph_node::get_create (exp_decl
);
1321 if (node
->simd_clones
== NULL
)
1323 if (targetm
.simd_clone
.compute_vecsize_and_simdlen
== NULL
1324 || node
->definition
)
1326 expand_simd_clones (node
);
1327 if (node
->simd_clones
== NULL
)
1330 *type_out
= get_vectype_for_scalar_type (TREE_TYPE (base
));
1333 tree def
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1334 gimple
*g
= gimple_build_assign (def
, MULT_EXPR
, exp
, logc
);
1335 append_pattern_def_seq (stmt_vinfo
, g
);
1336 tree res
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1337 g
= gimple_build_call (exp_decl
, 1, def
);
1338 gimple_call_set_lhs (g
, res
);
1346 /* We now have a pow or powi builtin function call with a constant
1349 /* Catch squaring. */
1350 if ((tree_fits_shwi_p (exp
)
1351 && tree_to_shwi (exp
) == 2)
1352 || (TREE_CODE (exp
) == REAL_CST
1353 && real_equal (&TREE_REAL_CST (exp
), &dconst2
)))
1355 if (!vect_supportable_direct_optab_p (TREE_TYPE (base
), MULT_EXPR
,
1356 TREE_TYPE (base
), type_out
))
1359 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1360 stmt
= gimple_build_assign (var
, MULT_EXPR
, base
, base
);
1364 /* Catch square root. */
1365 if (TREE_CODE (exp
) == REAL_CST
1366 && real_equal (&TREE_REAL_CST (exp
), &dconsthalf
))
1368 *type_out
= get_vectype_for_scalar_type (TREE_TYPE (base
));
1370 && direct_internal_fn_supported_p (IFN_SQRT
, *type_out
,
1371 OPTIMIZE_FOR_SPEED
))
1373 gcall
*stmt
= gimple_build_call_internal (IFN_SQRT
, 1, base
);
1374 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
1375 gimple_call_set_lhs (stmt
, var
);
1376 gimple_call_set_nothrow (stmt
, true);
1385 /* Function vect_recog_widen_sum_pattern
1387 Try to find the following pattern:
1390 TYPE x_T, sum = init;
1392 sum_0 = phi <init, sum_1>
1394 S2 x_T = (TYPE) x_t;
1395 S3 sum_1 = x_T + sum_0;
1397 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1398 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1399 a special case of a reduction computation.
1403 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1404 when this function is called with S3, the pattern {S2,S3} will be detected.
1408 * TYPE_OUT: The type of the output of this pattern.
1410 * Return value: A new stmt that will be used to replace the sequence of
1411 stmts that constitute the pattern. In this case it will be:
1412 WIDEN_SUM <x_t, sum_0>
1414 Note: The widening-sum idiom is a widening reduction pattern that is
1415 vectorized without preserving all the intermediate results. It
1416 produces only N/2 (widened) results (by summing up pairs of
1417 intermediate results) rather than all N results. Therefore, we
1418 cannot allow this pattern when we want to get all the results and in
1419 the correct order (as is the case when this computation is in an
1420 inner-loop nested in an outer-loop that us being vectorized). */
1423 vect_recog_widen_sum_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
1425 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1426 tree oprnd0
, oprnd1
;
1427 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
1429 gimple
*pattern_stmt
;
1432 /* Look for the following pattern
1435 In which DX is at least double the size of X, and sum_1 has been
1436 recognized as a reduction variable.
1439 /* Starting from LAST_STMT, follow the defs of its uses in search
1440 of the above pattern. */
1442 if (!vect_reassociating_reduction_p (stmt_vinfo
, PLUS_EXPR
,
1446 type
= gimple_expr_type (last_stmt
);
1448 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1449 we know that oprnd1 is the reduction variable (defined by a loop-header
1450 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1451 Left to check that oprnd0 is defined by a cast from type 'type' to type
1454 vect_unpromoted_value unprom0
;
1455 if (!vect_look_through_possible_promotion (vinfo
, oprnd0
, &unprom0
)
1456 || TYPE_PRECISION (unprom0
.type
) * 2 > TYPE_PRECISION (type
))
1459 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt
);
1461 if (!vect_supportable_direct_optab_p (type
, WIDEN_SUM_EXPR
, unprom0
.type
,
1465 var
= vect_recog_temp_ssa_var (type
, NULL
);
1466 pattern_stmt
= gimple_build_assign (var
, WIDEN_SUM_EXPR
, unprom0
.op
, oprnd1
);
1468 return pattern_stmt
;
1471 /* Recognize cases in which an operation is performed in one type WTYPE
1472 but could be done more efficiently in a narrower type NTYPE. For example,
1475 ATYPE a; // narrower than NTYPE
1476 BTYPE b; // narrower than NTYPE
1477 WTYPE aw = (WTYPE) a;
1478 WTYPE bw = (WTYPE) b;
1479 WTYPE res = aw + bw; // only uses of aw and bw
1481 then it would be more efficient to do:
1483 NTYPE an = (NTYPE) a;
1484 NTYPE bn = (NTYPE) b;
1485 NTYPE resn = an + bn;
1486 WTYPE res = (WTYPE) resn;
1488 Other situations include things like:
1490 ATYPE a; // NTYPE or narrower
1491 WTYPE aw = (WTYPE) a;
1494 when only "(NTYPE) res" is significant. In that case it's more efficient
1495 to truncate "b" and do the operation on NTYPE instead:
1497 NTYPE an = (NTYPE) a;
1498 NTYPE bn = (NTYPE) b; // truncation
1499 NTYPE resn = an + bn;
1500 WTYPE res = (WTYPE) resn;
1502 All users of "res" should then use "resn" instead, making the final
1503 statement dead (not marked as relevant). The final statement is still
1504 needed to maintain the type correctness of the IR.
1506 vect_determine_precisions has already determined the minimum
1507 precison of the operation and the minimum precision required
1508 by users of the result. */
1511 vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info
, tree
*type_out
)
1513 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
1517 /* See whether we have found that this operation can be done on a
1518 narrower type without changing its semantics. */
1519 unsigned int new_precision
= last_stmt_info
->operation_precision
;
1523 vec_info
*vinfo
= last_stmt_info
->vinfo
;
1524 tree lhs
= gimple_assign_lhs (last_stmt
);
1525 tree type
= TREE_TYPE (lhs
);
1526 tree_code code
= gimple_assign_rhs_code (last_stmt
);
1528 /* Keep the first operand of a COND_EXPR as-is: only the other two
1529 operands are interesting. */
1530 unsigned int first_op
= (code
== COND_EXPR
? 2 : 1);
1532 /* Check the operands. */
1533 unsigned int nops
= gimple_num_ops (last_stmt
) - first_op
;
1534 auto_vec
<vect_unpromoted_value
, 3> unprom (nops
);
1535 unprom
.quick_grow (nops
);
1536 unsigned int min_precision
= 0;
1537 bool single_use_p
= false;
1538 for (unsigned int i
= 0; i
< nops
; ++i
)
1540 tree op
= gimple_op (last_stmt
, first_op
+ i
);
1541 if (TREE_CODE (op
) == INTEGER_CST
)
1542 unprom
[i
].set_op (op
, vect_constant_def
);
1543 else if (TREE_CODE (op
) == SSA_NAME
)
1545 bool op_single_use_p
= true;
1546 if (!vect_look_through_possible_promotion (vinfo
, op
, &unprom
[i
],
1551 (1) N bits of the result are needed;
1552 (2) all inputs are widened from M<N bits; and
1553 (3) one operand OP is a single-use SSA name
1555 we can shift the M->N widening from OP to the output
1556 without changing the number or type of extensions involved.
1557 This then reduces the number of copies of STMT_INFO.
1559 If instead of (3) more than one operand is a single-use SSA name,
1560 shifting the extension to the output is even more of a win.
1564 (1) N bits of the result are needed;
1565 (2) one operand OP2 is widened from M2<N bits;
1566 (3) another operand OP1 is widened from M1<M2 bits; and
1567 (4) both OP1 and OP2 are single-use
1569 the choice is between:
1571 (a) truncating OP2 to M1, doing the operation on M1,
1572 and then widening the result to N
1574 (b) widening OP1 to M2, doing the operation on M2, and then
1575 widening the result to N
1577 Both shift the M2->N widening of the inputs to the output.
1578 (a) additionally shifts the M1->M2 widening to the output;
1579 it requires fewer copies of STMT_INFO but requires an extra
1582 Which is better will depend on the complexity and cost of
1583 STMT_INFO, which is hard to predict at this stage. However,
1584 a clear tie-breaker in favor of (b) is the fact that the
1585 truncation in (a) increases the length of the operation chain.
1587 If instead of (4) only one of OP1 or OP2 is single-use,
1588 (b) is still a win over doing the operation in N bits:
1589 it still shifts the M2->N widening on the single-use operand
1590 to the output and reduces the number of STMT_INFO copies.
1592 If neither operand is single-use then operating on fewer than
1593 N bits might lead to more extensions overall. Whether it does
1594 or not depends on global information about the vectorization
1595 region, and whether that's a good trade-off would again
1596 depend on the complexity and cost of the statements involved,
1597 as well as things like register pressure that are not normally
1598 modelled at this stage. We therefore ignore these cases
1599 and just optimize the clear single-use wins above.
1601 Thus we take the maximum precision of the unpromoted operands
1602 and record whether any operand is single-use. */
1603 if (unprom
[i
].dt
== vect_internal_def
)
1605 min_precision
= MAX (min_precision
,
1606 TYPE_PRECISION (unprom
[i
].type
));
1607 single_use_p
|= op_single_use_p
;
1612 /* Although the operation could be done in operation_precision, we have
1613 to balance that against introducing extra truncations or extensions.
1614 Calculate the minimum precision that can be handled efficiently.
1616 The loop above determined that the operation could be handled
1617 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1618 extension from the inputs to the output without introducing more
1619 instructions, and would reduce the number of instructions required
1620 for STMT_INFO itself.
1622 vect_determine_precisions has also determined that the result only
1623 needs min_output_precision bits. Truncating by a factor of N times
1624 requires a tree of N - 1 instructions, so if TYPE is N times wider
1625 than min_output_precision, doing the operation in TYPE and truncating
1626 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1629 - truncating the input to a unary operation and doing the operation
1630 in the new type requires at most N - 1 + 1 = N instructions per
1633 - doing the same for a binary operation requires at most
1634 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1636 Both unary and binary operations require fewer instructions than
1637 this if the operands were extended from a suitable truncated form.
1638 Thus there is usually nothing to lose by doing operations in
1639 min_output_precision bits, but there can be something to gain. */
1641 min_precision
= last_stmt_info
->min_output_precision
;
1643 min_precision
= MIN (min_precision
, last_stmt_info
->min_output_precision
);
1645 /* Apply the minimum efficient precision we just calculated. */
1646 if (new_precision
< min_precision
)
1647 new_precision
= min_precision
;
1648 if (new_precision
>= TYPE_PRECISION (type
))
1651 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt
);
1653 *type_out
= get_vectype_for_scalar_type (type
);
1657 /* We've found a viable pattern. Get the new type of the operation. */
1658 bool unsigned_p
= (last_stmt_info
->operation_sign
== UNSIGNED
);
1659 tree new_type
= build_nonstandard_integer_type (new_precision
, unsigned_p
);
1661 /* We specifically don't check here whether the target supports the
1662 new operation, since it might be something that a later pattern
1663 wants to rewrite anyway. If targets have a minimum element size
1664 for some optabs, we should pattern-match smaller ops to larger ops
1665 where beneficial. */
1666 tree new_vectype
= get_vectype_for_scalar_type (new_type
);
1670 if (dump_enabled_p ())
1672 dump_printf_loc (MSG_NOTE
, vect_location
, "demoting ");
1673 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, type
);
1674 dump_printf (MSG_NOTE
, " to ");
1675 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, new_type
);
1676 dump_printf (MSG_NOTE
, "\n");
1679 /* Calculate the rhs operands for an operation on NEW_TYPE. */
1681 for (unsigned int i
= 1; i
< first_op
; ++i
)
1682 ops
[i
- 1] = gimple_op (last_stmt
, i
);
1683 vect_convert_inputs (last_stmt_info
, nops
, &ops
[first_op
- 1],
1684 new_type
, &unprom
[0], new_vectype
);
1686 /* Use the operation to produce a result of type NEW_TYPE. */
1687 tree new_var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1688 gimple
*pattern_stmt
= gimple_build_assign (new_var
, code
,
1689 ops
[0], ops
[1], ops
[2]);
1690 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
1692 if (dump_enabled_p ())
1694 dump_printf_loc (MSG_NOTE
, vect_location
,
1695 "created pattern stmt: ");
1696 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1699 pattern_stmt
= vect_convert_output (last_stmt_info
, type
,
1700 pattern_stmt
, new_vectype
);
1702 return pattern_stmt
;
1705 /* Recognize the patterns:
1707 ATYPE a; // narrower than TYPE
1708 BTYPE b; // narrower than TYPE
1709 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1710 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1712 where only the bottom half of avg is used. Try to transform them into:
1714 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1715 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1719 TYPE avg = (TYPE) avg';
1721 where NTYPE is no wider than half of TYPE. Since only the bottom half
1722 of avg is used, all or part of the cast of avg' should become redundant. */
1725 vect_recog_average_pattern (stmt_vec_info last_stmt_info
, tree
*type_out
)
1727 /* Check for a shift right by one bit. */
1728 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
1729 vec_info
*vinfo
= last_stmt_info
->vinfo
;
1731 || gimple_assign_rhs_code (last_stmt
) != RSHIFT_EXPR
1732 || !integer_onep (gimple_assign_rhs2 (last_stmt
)))
1735 /* Check that the shift result is wider than the users of the
1736 result need (i.e. that narrowing would be a natural choice). */
1737 tree lhs
= gimple_assign_lhs (last_stmt
);
1738 tree type
= TREE_TYPE (lhs
);
1739 unsigned int target_precision
1740 = vect_element_precision (last_stmt_info
->min_output_precision
);
1741 if (!INTEGRAL_TYPE_P (type
) || target_precision
>= TYPE_PRECISION (type
))
1744 /* Get the definition of the shift input. */
1745 tree rshift_rhs
= gimple_assign_rhs1 (last_stmt
);
1746 stmt_vec_info plus_stmt_info
= vect_get_internal_def (vinfo
, rshift_rhs
);
1747 if (!plus_stmt_info
)
1750 /* Check whether the shift input can be seen as a tree of additions on
1751 2 or 3 widened inputs.
1753 Note that the pattern should be a win even if the result of one or
1754 more additions is reused elsewhere: if the pattern matches, we'd be
1755 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
1756 internal_fn ifn
= IFN_AVG_FLOOR
;
1757 vect_unpromoted_value unprom
[3];
1759 unsigned int nops
= vect_widened_op_tree (plus_stmt_info
, PLUS_EXPR
,
1760 PLUS_EXPR
, false, 3,
1766 /* Check that one operand is 1. */
1768 for (i
= 0; i
< 3; ++i
)
1769 if (integer_onep (unprom
[i
].op
))
1773 /* Throw away the 1 operand and keep the other two. */
1775 unprom
[i
] = unprom
[2];
1779 vect_pattern_detected ("vect_recog_average_pattern", last_stmt
);
1783 (a) the operation can be viewed as:
1785 TYPE widened0 = (TYPE) UNPROM[0];
1786 TYPE widened1 = (TYPE) UNPROM[1];
1787 TYPE tmp1 = widened0 + widened1 {+ 1};
1788 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
1790 (b) the first two statements are equivalent to:
1792 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
1793 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
1795 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
1798 (d) all the operations can be performed correctly at twice the width of
1799 NEW_TYPE, due to the nature of the average operation; and
1801 (e) users of the result of the right shift need only TARGET_PRECISION
1802 bits, where TARGET_PRECISION is no more than half of TYPE's
1805 Under these circumstances, the only situation in which NEW_TYPE
1806 could be narrower than TARGET_PRECISION is if widened0, widened1
1807 and an addition result are all used more than once. Thus we can
1808 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
1809 as "free", whereas widening the result of the average instruction
1810 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
1811 therefore better not to go narrower than TARGET_PRECISION. */
1812 if (TYPE_PRECISION (new_type
) < target_precision
)
1813 new_type
= build_nonstandard_integer_type (target_precision
,
1814 TYPE_UNSIGNED (new_type
));
1816 /* Check for target support. */
1817 tree new_vectype
= get_vectype_for_scalar_type (new_type
);
1819 || !direct_internal_fn_supported_p (ifn
, new_vectype
,
1820 OPTIMIZE_FOR_SPEED
))
1823 /* The IR requires a valid vector type for the cast result, even though
1824 it's likely to be discarded. */
1825 *type_out
= get_vectype_for_scalar_type (type
);
1829 /* Generate the IFN_AVG* call. */
1830 tree new_var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1832 vect_convert_inputs (last_stmt_info
, 2, new_ops
, new_type
,
1833 unprom
, new_vectype
);
1834 gcall
*average_stmt
= gimple_build_call_internal (ifn
, 2, new_ops
[0],
1836 gimple_call_set_lhs (average_stmt
, new_var
);
1837 gimple_set_location (average_stmt
, gimple_location (last_stmt
));
1839 if (dump_enabled_p ())
1841 dump_printf_loc (MSG_NOTE
, vect_location
,
1842 "created pattern stmt: ");
1843 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, average_stmt
, 0);
1846 return vect_convert_output (last_stmt_info
, type
, average_stmt
, new_vectype
);
1849 /* Recognize cases in which the input to a cast is wider than its
1850 output, and the input is fed by a widening operation. Fold this
1851 by removing the unnecessary intermediate widening. E.g.:
1854 unsigned int b = (unsigned int) a;
1855 unsigned short c = (unsigned short) b;
1859 unsigned short c = (unsigned short) a;
1861 Although this is rare in input IR, it is an expected side-effect
1862 of the over-widening pattern above.
1864 This is beneficial also for integer-to-float conversions, if the
1865 widened integer has more bits than the float, and if the unwidened
1869 vect_recog_cast_forwprop_pattern (stmt_vec_info last_stmt_info
, tree
*type_out
)
1871 /* Check for a cast, including an integer-to-float conversion. */
1872 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
1875 tree_code code
= gimple_assign_rhs_code (last_stmt
);
1876 if (!CONVERT_EXPR_CODE_P (code
) && code
!= FLOAT_EXPR
)
1879 /* Make sure that the rhs is a scalar with a natural bitsize. */
1880 tree lhs
= gimple_assign_lhs (last_stmt
);
1883 tree lhs_type
= TREE_TYPE (lhs
);
1884 scalar_mode lhs_mode
;
1885 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type
)
1886 || !is_a
<scalar_mode
> (TYPE_MODE (lhs_type
), &lhs_mode
))
1889 /* Check for a narrowing operation (from a vector point of view). */
1890 tree rhs
= gimple_assign_rhs1 (last_stmt
);
1891 tree rhs_type
= TREE_TYPE (rhs
);
1892 if (!INTEGRAL_TYPE_P (rhs_type
)
1893 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type
)
1894 || TYPE_PRECISION (rhs_type
) <= GET_MODE_BITSIZE (lhs_mode
))
1897 /* Try to find an unpromoted input. */
1898 vec_info
*vinfo
= last_stmt_info
->vinfo
;
1899 vect_unpromoted_value unprom
;
1900 if (!vect_look_through_possible_promotion (vinfo
, rhs
, &unprom
)
1901 || TYPE_PRECISION (unprom
.type
) >= TYPE_PRECISION (rhs_type
))
1904 /* If the bits above RHS_TYPE matter, make sure that they're the
1905 same when extending from UNPROM as they are when extending from RHS. */
1906 if (!INTEGRAL_TYPE_P (lhs_type
)
1907 && TYPE_SIGN (rhs_type
) != TYPE_SIGN (unprom
.type
))
1910 /* We can get the same result by casting UNPROM directly, to avoid
1911 the unnecessary widening and narrowing. */
1912 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt
);
1914 *type_out
= get_vectype_for_scalar_type (lhs_type
);
1918 tree new_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
1919 gimple
*pattern_stmt
= gimple_build_assign (new_var
, code
, unprom
.op
);
1920 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
1922 return pattern_stmt
;
1925 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
1926 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
1929 vect_recog_widen_shift_pattern (stmt_vec_info last_stmt_info
, tree
*type_out
)
1931 return vect_recog_widen_op_pattern (last_stmt_info
, type_out
, LSHIFT_EXPR
,
1932 WIDEN_LSHIFT_EXPR
, true,
1933 "vect_recog_widen_shift_pattern");
1936 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1940 S0 a_t = b_t r<< c_t;
1944 * STMT_VINFO: The stmt from which the pattern search begins,
1945 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1949 S2 e_t = d_t & (B - 1);
1950 S3 f_t = b_t << c_t;
1951 S4 g_t = b_t >> e_t;
1954 where B is element bitsize of type.
1958 * TYPE_OUT: The type of the output of this pattern.
1960 * Return value: A new stmt that will be used to replace the rotate
1964 vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
1966 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1967 tree oprnd0
, oprnd1
, lhs
, var
, var1
, var2
, vectype
, type
, stype
, def
, def2
;
1968 gimple
*pattern_stmt
, *def_stmt
;
1969 enum tree_code rhs_code
;
1970 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
1971 enum vect_def_type dt
;
1972 optab optab1
, optab2
;
1973 edge ext_def
= NULL
;
1975 if (!is_gimple_assign (last_stmt
))
1978 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1988 lhs
= gimple_assign_lhs (last_stmt
);
1989 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1990 type
= TREE_TYPE (oprnd0
);
1991 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1992 if (TREE_CODE (oprnd0
) != SSA_NAME
1993 || TYPE_PRECISION (TREE_TYPE (lhs
)) != TYPE_PRECISION (type
)
1994 || !INTEGRAL_TYPE_P (type
)
1995 || !TYPE_UNSIGNED (type
))
1998 if (!vect_is_simple_use (oprnd1
, vinfo
, &dt
, &def_stmt
))
2001 if (dt
!= vect_internal_def
2002 && dt
!= vect_constant_def
2003 && dt
!= vect_external_def
)
2006 vectype
= get_vectype_for_scalar_type (type
);
2007 if (vectype
== NULL_TREE
)
2010 /* If vector/vector or vector/scalar rotate is supported by the target,
2011 don't do anything here. */
2012 optab1
= optab_for_tree_code (rhs_code
, vectype
, optab_vector
);
2014 && optab_handler (optab1
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
2017 if (is_a
<bb_vec_info
> (vinfo
) || dt
!= vect_internal_def
)
2019 optab2
= optab_for_tree_code (rhs_code
, vectype
, optab_scalar
);
2021 && optab_handler (optab2
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
2025 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2026 don't do anything here either. */
2027 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_vector
);
2028 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_vector
);
2030 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
2032 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
2034 if (! is_a
<bb_vec_info
> (vinfo
) && dt
== vect_internal_def
)
2036 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_scalar
);
2037 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_scalar
);
2039 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
2041 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
2045 *type_out
= vectype
;
2047 if (dt
== vect_external_def
2048 && TREE_CODE (oprnd1
) == SSA_NAME
)
2049 ext_def
= vect_get_external_def_edge (vinfo
, oprnd1
);
2052 scalar_int_mode mode
= SCALAR_INT_TYPE_MODE (type
);
2053 if (TREE_CODE (oprnd1
) == INTEGER_CST
2054 || TYPE_MODE (TREE_TYPE (oprnd1
)) == mode
)
2056 else if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
2058 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
2059 if (TYPE_MODE (TREE_TYPE (rhs1
)) == mode
2060 && TYPE_PRECISION (TREE_TYPE (rhs1
))
2061 == TYPE_PRECISION (type
))
2065 if (def
== NULL_TREE
)
2067 def
= vect_recog_temp_ssa_var (type
, NULL
);
2068 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
2072 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
2073 gcc_assert (!new_bb
);
2076 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2078 stype
= TREE_TYPE (def
);
2079 scalar_int_mode smode
= SCALAR_INT_TYPE_MODE (stype
);
2081 if (TREE_CODE (def
) == INTEGER_CST
)
2083 if (!tree_fits_uhwi_p (def
)
2084 || tree_to_uhwi (def
) >= GET_MODE_PRECISION (mode
)
2085 || integer_zerop (def
))
2087 def2
= build_int_cst (stype
,
2088 GET_MODE_PRECISION (mode
) - tree_to_uhwi (def
));
2092 tree vecstype
= get_vectype_for_scalar_type (stype
);
2094 if (vecstype
== NULL_TREE
)
2096 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
2097 def_stmt
= gimple_build_assign (def2
, NEGATE_EXPR
, def
);
2101 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
2102 gcc_assert (!new_bb
);
2105 append_pattern_def_seq (stmt_vinfo
, def_stmt
, vecstype
);
2107 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
2108 tree mask
= build_int_cst (stype
, GET_MODE_PRECISION (smode
) - 1);
2109 def_stmt
= gimple_build_assign (def2
, BIT_AND_EXPR
,
2110 gimple_assign_lhs (def_stmt
), mask
);
2114 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
2115 gcc_assert (!new_bb
);
2118 append_pattern_def_seq (stmt_vinfo
, def_stmt
, vecstype
);
2121 var1
= vect_recog_temp_ssa_var (type
, NULL
);
2122 def_stmt
= gimple_build_assign (var1
, rhs_code
== LROTATE_EXPR
2123 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
2125 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2127 var2
= vect_recog_temp_ssa_var (type
, NULL
);
2128 def_stmt
= gimple_build_assign (var2
, rhs_code
== LROTATE_EXPR
2129 ? RSHIFT_EXPR
: LSHIFT_EXPR
,
2131 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2133 /* Pattern detected. */
2134 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt
);
2136 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2137 var
= vect_recog_temp_ssa_var (type
, NULL
);
2138 pattern_stmt
= gimple_build_assign (var
, BIT_IOR_EXPR
, var1
, var2
);
2140 return pattern_stmt
;
2143 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2151 S3 res_T = b_T op a_t;
2153 where type 'TYPE' is a type with different size than 'type',
2154 and op is <<, >> or rotate.
2159 TYPE b_T, c_T, res_T;
2162 S1 a_t = (type) c_T;
2164 S3 res_T = b_T op a_t;
2168 * STMT_VINFO: The stmt from which the pattern search begins,
2169 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2170 with a shift/rotate which has same type on both operands, in the
2171 second case just b_T op c_T, in the first case with added cast
2172 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2176 * TYPE_OUT: The type of the output of this pattern.
2178 * Return value: A new stmt that will be used to replace the shift/rotate
2182 vect_recog_vector_vector_shift_pattern (stmt_vec_info stmt_vinfo
,
2185 gimple
*last_stmt
= stmt_vinfo
->stmt
;
2186 tree oprnd0
, oprnd1
, lhs
, var
;
2187 gimple
*pattern_stmt
;
2188 enum tree_code rhs_code
;
2189 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
2191 if (!is_gimple_assign (last_stmt
))
2194 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2206 lhs
= gimple_assign_lhs (last_stmt
);
2207 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2208 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2209 if (TREE_CODE (oprnd0
) != SSA_NAME
2210 || TREE_CODE (oprnd1
) != SSA_NAME
2211 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
2212 || !type_has_mode_precision_p (TREE_TYPE (oprnd1
))
2213 || TYPE_PRECISION (TREE_TYPE (lhs
))
2214 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2217 stmt_vec_info def_vinfo
= vect_get_internal_def (vinfo
, oprnd1
);
2221 *type_out
= get_vectype_for_scalar_type (TREE_TYPE (oprnd0
));
2222 if (*type_out
== NULL_TREE
)
2225 tree def
= NULL_TREE
;
2226 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_vinfo
->stmt
);
2227 if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
2229 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
2230 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
2231 && TYPE_PRECISION (TREE_TYPE (rhs1
))
2232 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2234 if (TYPE_PRECISION (TREE_TYPE (oprnd1
))
2235 >= TYPE_PRECISION (TREE_TYPE (rhs1
)))
2240 = build_low_bits_mask (TREE_TYPE (rhs1
),
2241 TYPE_PRECISION (TREE_TYPE (oprnd1
)));
2242 def
= vect_recog_temp_ssa_var (TREE_TYPE (rhs1
), NULL
);
2243 def_stmt
= gimple_build_assign (def
, BIT_AND_EXPR
, rhs1
, mask
);
2244 tree vecstype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
2245 append_pattern_def_seq (stmt_vinfo
, def_stmt
, vecstype
);
2250 if (def
== NULL_TREE
)
2252 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2253 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
2254 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2257 /* Pattern detected. */
2258 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt
);
2260 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2261 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2262 pattern_stmt
= gimple_build_assign (var
, rhs_code
, oprnd0
, def
);
2264 return pattern_stmt
;
2267 /* Return true iff the target has a vector optab implementing the operation
2268 CODE on type VECTYPE. */
2271 target_has_vecop_for_code (tree_code code
, tree vectype
)
2273 optab voptab
= optab_for_tree_code (code
, vectype
, optab_vector
);
2275 && optab_handler (voptab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
;
2278 /* Verify that the target has optabs of VECTYPE to perform all the steps
2279 needed by the multiplication-by-immediate synthesis algorithm described by
2280 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2281 present. Return true iff the target supports all the steps. */
2284 target_supports_mult_synth_alg (struct algorithm
*alg
, mult_variant var
,
2285 tree vectype
, bool synth_shift_p
)
2287 if (alg
->op
[0] != alg_zero
&& alg
->op
[0] != alg_m
)
2290 bool supports_vminus
= target_has_vecop_for_code (MINUS_EXPR
, vectype
);
2291 bool supports_vplus
= target_has_vecop_for_code (PLUS_EXPR
, vectype
);
2293 if (var
== negate_variant
2294 && !target_has_vecop_for_code (NEGATE_EXPR
, vectype
))
2297 /* If we must synthesize shifts with additions make sure that vector
2298 addition is available. */
2299 if ((var
== add_variant
|| synth_shift_p
) && !supports_vplus
)
2302 for (int i
= 1; i
< alg
->ops
; i
++)
2310 case alg_add_factor
:
2311 if (!supports_vplus
)
2316 case alg_sub_factor
:
2317 if (!supports_vminus
)
2323 case alg_impossible
:
2333 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2334 putting the final result in DEST. Append all statements but the last into
2335 VINFO. Return the last statement. */
2338 synth_lshift_by_additions (tree dest
, tree op
, HOST_WIDE_INT amnt
,
2339 stmt_vec_info vinfo
)
2342 tree itype
= TREE_TYPE (op
);
2344 gcc_assert (amnt
>= 0);
2345 for (i
= 0; i
< amnt
; i
++)
2347 tree tmp_var
= (i
< amnt
- 1) ? vect_recog_temp_ssa_var (itype
, NULL
)
2350 = gimple_build_assign (tmp_var
, PLUS_EXPR
, prev_res
, prev_res
);
2353 append_pattern_def_seq (vinfo
, stmt
);
2361 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2362 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2363 the process if necessary. Append the resulting assignment statements
2364 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2365 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2366 left shifts using additions. */
2369 apply_binop_and_append_stmt (tree_code code
, tree op1
, tree op2
,
2370 stmt_vec_info stmt_vinfo
, bool synth_shift_p
)
2372 if (integer_zerop (op2
)
2373 && (code
== LSHIFT_EXPR
2374 || code
== PLUS_EXPR
))
2376 gcc_assert (TREE_CODE (op1
) == SSA_NAME
);
2381 tree itype
= TREE_TYPE (op1
);
2382 tree tmp_var
= vect_recog_temp_ssa_var (itype
, NULL
);
2384 if (code
== LSHIFT_EXPR
2387 stmt
= synth_lshift_by_additions (tmp_var
, op1
, TREE_INT_CST_LOW (op2
),
2389 append_pattern_def_seq (stmt_vinfo
, stmt
);
2393 stmt
= gimple_build_assign (tmp_var
, code
, op1
, op2
);
2394 append_pattern_def_seq (stmt_vinfo
, stmt
);
2398 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2399 and simple arithmetic operations to be vectorized. Record the statements
2400 produced in STMT_VINFO and return the last statement in the sequence or
2401 NULL if it's not possible to synthesize such a multiplication.
2402 This function mirrors the behavior of expand_mult_const in expmed.c but
2403 works on tree-ssa form. */
2406 vect_synth_mult_by_constant (tree op
, tree val
,
2407 stmt_vec_info stmt_vinfo
)
2409 tree itype
= TREE_TYPE (op
);
2410 machine_mode mode
= TYPE_MODE (itype
);
2411 struct algorithm alg
;
2412 mult_variant variant
;
2413 if (!tree_fits_shwi_p (val
))
2416 /* Multiplication synthesis by shifts, adds and subs can introduce
2417 signed overflow where the original operation didn't. Perform the
2418 operations on an unsigned type and cast back to avoid this.
2419 In the future we may want to relax this for synthesis algorithms
2420 that we can prove do not cause unexpected overflow. */
2421 bool cast_to_unsigned_p
= !TYPE_OVERFLOW_WRAPS (itype
);
2423 tree multtype
= cast_to_unsigned_p
? unsigned_type_for (itype
) : itype
;
2425 /* Targets that don't support vector shifts but support vector additions
2426 can synthesize shifts that way. */
2427 bool synth_shift_p
= !vect_supportable_shift (LSHIFT_EXPR
, multtype
);
2429 HOST_WIDE_INT hwval
= tree_to_shwi (val
);
2430 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2431 The vectorizer's benefit analysis will decide whether it's beneficial
2433 bool possible
= choose_mult_variant (mode
, hwval
, &alg
,
2434 &variant
, MAX_COST
);
2438 tree vectype
= get_vectype_for_scalar_type (multtype
);
2441 || !target_supports_mult_synth_alg (&alg
, variant
,
2442 vectype
, synth_shift_p
))
2447 /* Clear out the sequence of statements so we can populate it below. */
2448 gimple
*stmt
= NULL
;
2450 if (cast_to_unsigned_p
)
2452 tree tmp_op
= vect_recog_temp_ssa_var (multtype
, NULL
);
2453 stmt
= gimple_build_assign (tmp_op
, CONVERT_EXPR
, op
);
2454 append_pattern_def_seq (stmt_vinfo
, stmt
);
2458 if (alg
.op
[0] == alg_zero
)
2459 accumulator
= build_int_cst (multtype
, 0);
2463 bool needs_fixup
= (variant
== negate_variant
)
2464 || (variant
== add_variant
);
2466 for (int i
= 1; i
< alg
.ops
; i
++)
2468 tree shft_log
= build_int_cst (multtype
, alg
.log
[i
]);
2469 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
2470 tree tmp_var
= NULL_TREE
;
2477 = synth_lshift_by_additions (accum_tmp
, accumulator
, alg
.log
[i
],
2480 stmt
= gimple_build_assign (accum_tmp
, LSHIFT_EXPR
, accumulator
,
2485 = apply_binop_and_append_stmt (LSHIFT_EXPR
, op
, shft_log
,
2486 stmt_vinfo
, synth_shift_p
);
2487 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
,
2491 tmp_var
= apply_binop_and_append_stmt (LSHIFT_EXPR
, op
,
2492 shft_log
, stmt_vinfo
,
2494 /* In some algorithms the first step involves zeroing the
2495 accumulator. If subtracting from such an accumulator
2496 just emit the negation directly. */
2497 if (integer_zerop (accumulator
))
2498 stmt
= gimple_build_assign (accum_tmp
, NEGATE_EXPR
, tmp_var
);
2500 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, accumulator
,
2505 = apply_binop_and_append_stmt (LSHIFT_EXPR
, accumulator
, shft_log
,
2506 stmt_vinfo
, synth_shift_p
);
2507 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, tmp_var
, op
);
2511 = apply_binop_and_append_stmt (LSHIFT_EXPR
, accumulator
, shft_log
,
2512 stmt_vinfo
, synth_shift_p
);
2513 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, tmp_var
, op
);
2515 case alg_add_factor
:
2517 = apply_binop_and_append_stmt (LSHIFT_EXPR
, accumulator
, shft_log
,
2518 stmt_vinfo
, synth_shift_p
);
2519 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
,
2522 case alg_sub_factor
:
2524 = apply_binop_and_append_stmt (LSHIFT_EXPR
, accumulator
, shft_log
,
2525 stmt_vinfo
, synth_shift_p
);
2526 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, tmp_var
,
2532 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2533 but rather return it directly. */
2535 if ((i
< alg
.ops
- 1) || needs_fixup
|| cast_to_unsigned_p
)
2536 append_pattern_def_seq (stmt_vinfo
, stmt
);
2537 accumulator
= accum_tmp
;
2539 if (variant
== negate_variant
)
2541 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
2542 stmt
= gimple_build_assign (accum_tmp
, NEGATE_EXPR
, accumulator
);
2543 accumulator
= accum_tmp
;
2544 if (cast_to_unsigned_p
)
2545 append_pattern_def_seq (stmt_vinfo
, stmt
);
2547 else if (variant
== add_variant
)
2549 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
2550 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
, op
);
2551 accumulator
= accum_tmp
;
2552 if (cast_to_unsigned_p
)
2553 append_pattern_def_seq (stmt_vinfo
, stmt
);
2555 /* Move back to a signed if needed. */
2556 if (cast_to_unsigned_p
)
2558 tree accum_tmp
= vect_recog_temp_ssa_var (itype
, NULL
);
2559 stmt
= gimple_build_assign (accum_tmp
, CONVERT_EXPR
, accumulator
);
2565 /* Detect multiplication by constant and convert it into a sequence of
2566 shifts and additions, subtractions, negations. We reuse the
2567 choose_mult_variant algorithms from expmed.c
2571 STMT_VINFO: The stmt from which the pattern search begins,
2576 * TYPE_OUT: The type of the output of this pattern.
2578 * Return value: A new stmt that will be used to replace
2579 the multiplication. */
2582 vect_recog_mult_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
2584 gimple
*last_stmt
= stmt_vinfo
->stmt
;
2585 tree oprnd0
, oprnd1
, vectype
, itype
;
2586 gimple
*pattern_stmt
;
2588 if (!is_gimple_assign (last_stmt
))
2591 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
2594 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2595 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2596 itype
= TREE_TYPE (oprnd0
);
2598 if (TREE_CODE (oprnd0
) != SSA_NAME
2599 || TREE_CODE (oprnd1
) != INTEGER_CST
2600 || !INTEGRAL_TYPE_P (itype
)
2601 || !type_has_mode_precision_p (itype
))
2604 vectype
= get_vectype_for_scalar_type (itype
);
2605 if (vectype
== NULL_TREE
)
2608 /* If the target can handle vectorized multiplication natively,
2609 don't attempt to optimize this. */
2610 optab mul_optab
= optab_for_tree_code (MULT_EXPR
, vectype
, optab_default
);
2611 if (mul_optab
!= unknown_optab
)
2613 machine_mode vec_mode
= TYPE_MODE (vectype
);
2614 int icode
= (int) optab_handler (mul_optab
, vec_mode
);
2615 if (icode
!= CODE_FOR_nothing
)
2619 pattern_stmt
= vect_synth_mult_by_constant (oprnd0
, oprnd1
, stmt_vinfo
);
2623 /* Pattern detected. */
2624 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt
);
2626 *type_out
= vectype
;
2628 return pattern_stmt
;
2631 /* Detect a signed division by a constant that wouldn't be
2632 otherwise vectorized:
2638 where type 'type' is an integral type and N is a constant.
2640 Similarly handle modulo by a constant:
2646 * STMT_VINFO: The stmt from which the pattern search begins,
2647 i.e. the division stmt. S1 is replaced by if N is a power
2648 of two constant and type is signed:
2649 S3 y_t = b_t < 0 ? N - 1 : 0;
2651 S1' a_t = x_t >> log2 (N);
2653 S4 is replaced if N is a power of two constant and
2654 type is signed by (where *_T temporaries have unsigned type):
2655 S9 y_T = b_t < 0 ? -1U : 0U;
2656 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2657 S7 z_t = (type) z_T;
2659 S5 x_t = w_t & (N - 1);
2660 S4' a_t = x_t - z_t;
2664 * TYPE_OUT: The type of the output of this pattern.
2666 * Return value: A new stmt that will be used to replace the division
2667 S1 or modulo S4 stmt. */
2670 vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
2672 gimple
*last_stmt
= stmt_vinfo
->stmt
;
2673 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
2674 gimple
*pattern_stmt
, *def_stmt
;
2675 enum tree_code rhs_code
;
2678 int dummy_int
, prec
;
2680 if (!is_gimple_assign (last_stmt
))
2683 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2686 case TRUNC_DIV_EXPR
:
2687 case EXACT_DIV_EXPR
:
2688 case TRUNC_MOD_EXPR
:
2694 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2695 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2696 itype
= TREE_TYPE (oprnd0
);
2697 if (TREE_CODE (oprnd0
) != SSA_NAME
2698 || TREE_CODE (oprnd1
) != INTEGER_CST
2699 || TREE_CODE (itype
) != INTEGER_TYPE
2700 || !type_has_mode_precision_p (itype
))
2703 scalar_int_mode itype_mode
= SCALAR_INT_TYPE_MODE (itype
);
2704 vectype
= get_vectype_for_scalar_type (itype
);
2705 if (vectype
== NULL_TREE
)
2708 if (optimize_bb_for_size_p (gimple_bb (last_stmt
)))
2710 /* If the target can handle vectorized division or modulo natively,
2711 don't attempt to optimize this, since native division is likely
2712 to give smaller code. */
2713 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
2714 if (optab
!= unknown_optab
)
2716 machine_mode vec_mode
= TYPE_MODE (vectype
);
2717 int icode
= (int) optab_handler (optab
, vec_mode
);
2718 if (icode
!= CODE_FOR_nothing
)
2723 prec
= TYPE_PRECISION (itype
);
2724 if (integer_pow2p (oprnd1
))
2726 if (TYPE_UNSIGNED (itype
) || tree_int_cst_sgn (oprnd1
) != 1)
2729 /* Pattern detected. */
2730 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt
);
2732 cond
= build2 (LT_EXPR
, boolean_type_node
, oprnd0
,
2733 build_int_cst (itype
, 0));
2734 if (rhs_code
== TRUNC_DIV_EXPR
2735 || rhs_code
== EXACT_DIV_EXPR
)
2737 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
2740 = gimple_build_assign (var
, COND_EXPR
, cond
,
2741 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2742 build_int_cst (itype
, 1)),
2743 build_int_cst (itype
, 0));
2744 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2745 var
= vect_recog_temp_ssa_var (itype
, NULL
);
2747 = gimple_build_assign (var
, PLUS_EXPR
, oprnd0
,
2748 gimple_assign_lhs (def_stmt
));
2749 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2751 shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
2753 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2754 RSHIFT_EXPR
, var
, shift
);
2759 if (compare_tree_int (oprnd1
, 2) == 0)
2761 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2762 def_stmt
= gimple_build_assign (signmask
, COND_EXPR
, cond
,
2763 build_int_cst (itype
, 1),
2764 build_int_cst (itype
, 0));
2765 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2770 = build_nonstandard_integer_type (prec
, 1);
2771 tree vecutype
= get_vectype_for_scalar_type (utype
);
2773 = build_int_cst (utype
, GET_MODE_BITSIZE (itype_mode
)
2774 - tree_log2 (oprnd1
));
2775 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
2777 def_stmt
= gimple_build_assign (var
, COND_EXPR
, cond
,
2778 build_int_cst (utype
, -1),
2779 build_int_cst (utype
, 0));
2780 append_pattern_def_seq (stmt_vinfo
, def_stmt
, vecutype
);
2781 var
= vect_recog_temp_ssa_var (utype
, NULL
);
2782 def_stmt
= gimple_build_assign (var
, RSHIFT_EXPR
,
2783 gimple_assign_lhs (def_stmt
),
2785 append_pattern_def_seq (stmt_vinfo
, def_stmt
, vecutype
);
2786 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2788 = gimple_build_assign (signmask
, NOP_EXPR
, var
);
2789 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2792 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2793 PLUS_EXPR
, oprnd0
, signmask
);
2794 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2796 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2797 BIT_AND_EXPR
, gimple_assign_lhs (def_stmt
),
2798 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2799 build_int_cst (itype
, 1)));
2800 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2803 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2804 MINUS_EXPR
, gimple_assign_lhs (def_stmt
),
2808 *type_out
= vectype
;
2809 return pattern_stmt
;
2812 if (prec
> HOST_BITS_PER_WIDE_INT
2813 || integer_zerop (oprnd1
))
2816 if (!can_mult_highpart_p (TYPE_MODE (vectype
), TYPE_UNSIGNED (itype
)))
2819 if (TYPE_UNSIGNED (itype
))
2821 unsigned HOST_WIDE_INT mh
, ml
;
2822 int pre_shift
, post_shift
;
2823 unsigned HOST_WIDE_INT d
= (TREE_INT_CST_LOW (oprnd1
)
2824 & GET_MODE_MASK (itype_mode
));
2825 tree t1
, t2
, t3
, t4
;
2827 if (d
>= (HOST_WIDE_INT_1U
<< (prec
- 1)))
2828 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2831 /* Find a suitable multiplier and right shift count
2832 instead of multiplying with D. */
2833 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
2835 /* If the suggested multiplier is more than SIZE bits, we can do better
2836 for even divisors, using an initial right shift. */
2837 if (mh
!= 0 && (d
& 1) == 0)
2839 pre_shift
= ctz_or_zero (d
);
2840 mh
= choose_multiplier (d
>> pre_shift
, prec
, prec
- pre_shift
,
2841 &ml
, &post_shift
, &dummy_int
);
2849 if (post_shift
- 1 >= prec
)
2852 /* t1 = oprnd0 h* ml;
2856 q = t4 >> (post_shift - 1); */
2857 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2858 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2859 build_int_cst (itype
, ml
));
2860 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2862 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2864 = gimple_build_assign (t2
, MINUS_EXPR
, oprnd0
, t1
);
2865 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2867 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2869 = gimple_build_assign (t3
, RSHIFT_EXPR
, t2
, integer_one_node
);
2870 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2872 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2874 = gimple_build_assign (t4
, PLUS_EXPR
, t1
, t3
);
2876 if (post_shift
!= 1)
2878 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2880 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2882 = gimple_build_assign (q
, RSHIFT_EXPR
, t4
,
2883 build_int_cst (itype
, post_shift
- 1));
2888 pattern_stmt
= def_stmt
;
2893 if (pre_shift
>= prec
|| post_shift
>= prec
)
2896 /* t1 = oprnd0 >> pre_shift;
2898 q = t2 >> post_shift; */
2901 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2903 = gimple_build_assign (t1
, RSHIFT_EXPR
, oprnd0
,
2904 build_int_cst (NULL
, pre_shift
));
2905 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2910 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2911 def_stmt
= gimple_build_assign (t2
, MULT_HIGHPART_EXPR
, t1
,
2912 build_int_cst (itype
, ml
));
2916 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2918 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2920 = gimple_build_assign (q
, RSHIFT_EXPR
, t2
,
2921 build_int_cst (itype
, post_shift
));
2926 pattern_stmt
= def_stmt
;
2931 unsigned HOST_WIDE_INT ml
;
2933 HOST_WIDE_INT d
= TREE_INT_CST_LOW (oprnd1
);
2934 unsigned HOST_WIDE_INT abs_d
;
2936 tree t1
, t2
, t3
, t4
;
2938 /* Give up for -1. */
2942 /* Since d might be INT_MIN, we have to cast to
2943 unsigned HOST_WIDE_INT before negating to avoid
2944 undefined signed overflow. */
2946 ? (unsigned HOST_WIDE_INT
) d
2947 : - (unsigned HOST_WIDE_INT
) d
);
2949 /* n rem d = n rem -d */
2950 if (rhs_code
== TRUNC_MOD_EXPR
&& d
< 0)
2953 oprnd1
= build_int_cst (itype
, abs_d
);
2955 else if (HOST_BITS_PER_WIDE_INT
>= prec
2956 && abs_d
== HOST_WIDE_INT_1U
<< (prec
- 1))
2957 /* This case is not handled correctly below. */
2960 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
, &post_shift
, &dummy_int
);
2961 if (ml
>= HOST_WIDE_INT_1U
<< (prec
- 1))
2964 ml
|= HOST_WIDE_INT_M1U
<< (prec
- 1);
2966 if (post_shift
>= prec
)
2969 /* t1 = oprnd0 h* ml; */
2970 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2971 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2972 build_int_cst (itype
, ml
));
2976 /* t2 = t1 + oprnd0; */
2977 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2978 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2979 def_stmt
= gimple_build_assign (t2
, PLUS_EXPR
, t1
, oprnd0
);
2986 /* t3 = t2 >> post_shift; */
2987 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2988 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2989 def_stmt
= gimple_build_assign (t3
, RSHIFT_EXPR
, t2
,
2990 build_int_cst (itype
, post_shift
));
2995 wide_int oprnd0_min
, oprnd0_max
;
2997 if (get_range_info (oprnd0
, &oprnd0_min
, &oprnd0_max
) == VR_RANGE
)
2999 if (!wi::neg_p (oprnd0_min
, TYPE_SIGN (itype
)))
3001 else if (wi::neg_p (oprnd0_max
, TYPE_SIGN (itype
)))
3005 if (msb
== 0 && d
>= 0)
3009 pattern_stmt
= def_stmt
;
3013 /* t4 = oprnd0 >> (prec - 1);
3014 or if we know from VRP that oprnd0 >= 0
3016 or if we know from VRP that oprnd0 < 0
3018 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
3019 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
3021 def_stmt
= gimple_build_assign (t4
, INTEGER_CST
,
3022 build_int_cst (itype
, msb
));
3024 def_stmt
= gimple_build_assign (t4
, RSHIFT_EXPR
, oprnd0
,
3025 build_int_cst (itype
, prec
- 1));
3026 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
3028 /* q = t3 - t4; or q = t4 - t3; */
3029 q
= vect_recog_temp_ssa_var (itype
, NULL
);
3030 pattern_stmt
= gimple_build_assign (q
, MINUS_EXPR
, d
< 0 ? t4
: t3
,
3035 if (rhs_code
== TRUNC_MOD_EXPR
)
3039 /* We divided. Now finish by:
3042 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
3044 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
3045 def_stmt
= gimple_build_assign (t1
, MULT_EXPR
, q
, oprnd1
);
3046 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
3048 r
= vect_recog_temp_ssa_var (itype
, NULL
);
3049 pattern_stmt
= gimple_build_assign (r
, MINUS_EXPR
, oprnd0
, t1
);
3052 /* Pattern detected. */
3053 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt
);
3055 *type_out
= vectype
;
3056 return pattern_stmt
;
3059 /* Function vect_recog_mixed_size_cond_pattern
3061 Try to find the following pattern:
3066 S1 a_T = x_t CMP y_t ? b_T : c_T;
3068 where type 'TYPE' is an integral type which has different size
3069 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3070 than 'type', the constants need to fit into an integer type
3071 with the same width as 'type') or results of conversion from 'type'.
3075 * STMT_VINFO: The stmt from which the pattern search begins.
3079 * TYPE_OUT: The type of the output of this pattern.
3081 * Return value: A new stmt that will be used to replace the pattern.
3082 Additionally a def_stmt is added.
3084 a_it = x_t CMP y_t ? b_it : c_it;
3085 a_T = (TYPE) a_it; */
3088 vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
3090 gimple
*last_stmt
= stmt_vinfo
->stmt
;
3091 tree cond_expr
, then_clause
, else_clause
;
3092 tree type
, vectype
, comp_vectype
, itype
= NULL_TREE
, vecitype
;
3093 gimple
*pattern_stmt
, *def_stmt
;
3094 tree orig_type0
= NULL_TREE
, orig_type1
= NULL_TREE
;
3095 gimple
*def_stmt0
= NULL
, *def_stmt1
= NULL
;
3097 tree comp_scalar_type
;
3099 if (!is_gimple_assign (last_stmt
)
3100 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
3101 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
3104 cond_expr
= gimple_assign_rhs1 (last_stmt
);
3105 then_clause
= gimple_assign_rhs2 (last_stmt
);
3106 else_clause
= gimple_assign_rhs3 (last_stmt
);
3108 if (!COMPARISON_CLASS_P (cond_expr
))
3111 comp_scalar_type
= TREE_TYPE (TREE_OPERAND (cond_expr
, 0));
3112 comp_vectype
= get_vectype_for_scalar_type (comp_scalar_type
);
3113 if (comp_vectype
== NULL_TREE
)
3116 type
= gimple_expr_type (last_stmt
);
3117 if (types_compatible_p (type
, comp_scalar_type
)
3118 || ((TREE_CODE (then_clause
) != INTEGER_CST
3119 || TREE_CODE (else_clause
) != INTEGER_CST
)
3120 && !INTEGRAL_TYPE_P (comp_scalar_type
))
3121 || !INTEGRAL_TYPE_P (type
))
3124 if ((TREE_CODE (then_clause
) != INTEGER_CST
3125 && !type_conversion_p (then_clause
, last_stmt
, false, &orig_type0
,
3126 &def_stmt0
, &promotion
))
3127 || (TREE_CODE (else_clause
) != INTEGER_CST
3128 && !type_conversion_p (else_clause
, last_stmt
, false, &orig_type1
,
3129 &def_stmt1
, &promotion
)))
3132 if (orig_type0
&& orig_type1
3133 && !types_compatible_p (orig_type0
, orig_type1
))
3138 if (!types_compatible_p (orig_type0
, comp_scalar_type
))
3140 then_clause
= gimple_assign_rhs1 (def_stmt0
);
3146 if (!types_compatible_p (orig_type1
, comp_scalar_type
))
3148 else_clause
= gimple_assign_rhs1 (def_stmt1
);
3153 HOST_WIDE_INT cmp_mode_size
3154 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype
));
3156 scalar_int_mode type_mode
= SCALAR_INT_TYPE_MODE (type
);
3157 if (GET_MODE_BITSIZE (type_mode
) == cmp_mode_size
)
3160 vectype
= get_vectype_for_scalar_type (type
);
3161 if (vectype
== NULL_TREE
)
3164 if (expand_vec_cond_expr_p (vectype
, comp_vectype
, TREE_CODE (cond_expr
)))
3167 if (itype
== NULL_TREE
)
3168 itype
= build_nonstandard_integer_type (cmp_mode_size
,
3169 TYPE_UNSIGNED (type
));
3171 if (itype
== NULL_TREE
3172 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype
)) != cmp_mode_size
)
3175 vecitype
= get_vectype_for_scalar_type (itype
);
3176 if (vecitype
== NULL_TREE
)
3179 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
, TREE_CODE (cond_expr
)))
3182 if (GET_MODE_BITSIZE (type_mode
) > cmp_mode_size
)
3184 if ((TREE_CODE (then_clause
) == INTEGER_CST
3185 && !int_fits_type_p (then_clause
, itype
))
3186 || (TREE_CODE (else_clause
) == INTEGER_CST
3187 && !int_fits_type_p (else_clause
, itype
)))
3191 def_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3192 COND_EXPR
, unshare_expr (cond_expr
),
3193 fold_convert (itype
, then_clause
),
3194 fold_convert (itype
, else_clause
));
3195 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
3196 NOP_EXPR
, gimple_assign_lhs (def_stmt
));
3198 append_pattern_def_seq (stmt_vinfo
, def_stmt
, vecitype
);
3199 *type_out
= vectype
;
3201 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt
);
3203 return pattern_stmt
;
3207 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3208 true if bool VAR can and should be optimized that way. Assume it shouldn't
3209 in case it's a result of a comparison which can be directly vectorized into
3210 a vector comparison. Fills in STMTS with all stmts visited during the
3214 check_bool_pattern (tree var
, vec_info
*vinfo
, hash_set
<gimple
*> &stmts
)
3217 enum tree_code rhs_code
;
3219 stmt_vec_info def_stmt_info
= vect_get_internal_def (vinfo
, var
);
3223 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_stmt_info
->stmt
);
3227 if (stmts
.contains (def_stmt
))
3230 rhs1
= gimple_assign_rhs1 (def_stmt
);
3231 rhs_code
= gimple_assign_rhs_code (def_stmt
);
3235 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
3240 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
3242 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
3247 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
3254 if (! check_bool_pattern (rhs1
, vinfo
, stmts
)
3255 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt
), vinfo
, stmts
))
3260 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
3262 tree vecitype
, comp_vectype
;
3264 /* If the comparison can throw, then is_gimple_condexpr will be
3265 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3266 if (stmt_could_throw_p (def_stmt
))
3269 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
3270 if (comp_vectype
== NULL_TREE
)
3273 tree mask_type
= get_mask_type_for_scalar_type (TREE_TYPE (rhs1
));
3275 && expand_vec_cmp_expr_p (comp_vectype
, mask_type
, rhs_code
))
3278 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
3280 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
3282 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3283 vecitype
= get_vectype_for_scalar_type (itype
);
3284 if (vecitype
== NULL_TREE
)
3288 vecitype
= comp_vectype
;
3289 if (! expand_vec_cond_expr_p (vecitype
, comp_vectype
, rhs_code
))
3297 bool res
= stmts
.add (def_stmt
);
3298 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3305 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3306 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3307 pattern sequence. */
3310 adjust_bool_pattern_cast (tree type
, tree var
, stmt_vec_info stmt_info
)
3312 gimple
*cast_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
3314 append_pattern_def_seq (stmt_info
, cast_stmt
,
3315 get_vectype_for_scalar_type (type
));
3316 return gimple_assign_lhs (cast_stmt
);
3319 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3320 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3321 type, OUT_TYPE is the desired final integer type of the whole pattern.
3322 STMT_INFO is the info of the pattern root and is where pattern stmts should
3323 be associated with. DEFS is a map of pattern defs. */
3326 adjust_bool_pattern (tree var
, tree out_type
,
3327 stmt_vec_info stmt_info
, hash_map
<tree
, tree
> &defs
)
3329 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
3330 enum tree_code rhs_code
, def_rhs_code
;
3331 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
3333 gimple
*pattern_stmt
, *def_stmt
;
3334 tree trueval
= NULL_TREE
;
3336 rhs1
= gimple_assign_rhs1 (stmt
);
3337 rhs2
= gimple_assign_rhs2 (stmt
);
3338 rhs_code
= gimple_assign_rhs_code (stmt
);
3339 loc
= gimple_location (stmt
);
3344 irhs1
= *defs
.get (rhs1
);
3345 itype
= TREE_TYPE (irhs1
);
3347 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3352 irhs1
= *defs
.get (rhs1
);
3353 itype
= TREE_TYPE (irhs1
);
3355 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3356 BIT_XOR_EXPR
, irhs1
, build_int_cst (itype
, 1));
3360 /* Try to optimize x = y & (a < b ? 1 : 0); into
3361 x = (a < b ? y : 0);
3367 S1 a_b = x1 CMP1 y1;
3368 S2 b_b = x2 CMP2 y2;
3370 S4 d_T = (TYPE) c_b;
3372 we would normally emit:
3374 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3375 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3376 S3' c_T = a_T & b_T;
3379 but we can save one stmt by using the
3380 result of one of the COND_EXPRs in the other COND_EXPR and leave
3381 BIT_AND_EXPR stmt out:
3383 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3384 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3387 At least when VEC_COND_EXPR is implemented using masks
3388 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3389 computes the comparison masks and ands it, in one case with
3390 all ones vector, in the other case with a vector register.
3391 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3392 often more expensive. */
3393 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
3394 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3395 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3397 irhs1
= *defs
.get (rhs1
);
3398 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3399 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3400 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1
))))
3402 rhs_code
= def_rhs_code
;
3404 rhs2
= gimple_assign_rhs2 (def_stmt
);
3409 irhs2
= *defs
.get (rhs2
);
3412 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
3413 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3414 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3416 irhs2
= *defs
.get (rhs2
);
3417 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3418 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
3419 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1
))))
3421 rhs_code
= def_rhs_code
;
3423 rhs2
= gimple_assign_rhs2 (def_stmt
);
3428 irhs1
= *defs
.get (rhs1
);
3434 irhs1
= *defs
.get (rhs1
);
3435 irhs2
= *defs
.get (rhs2
);
3437 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3438 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
3440 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
3441 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
3442 int out_prec
= TYPE_PRECISION (out_type
);
3443 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
3444 irhs2
= adjust_bool_pattern_cast (TREE_TYPE (irhs1
), irhs2
,
3446 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
3447 irhs1
= adjust_bool_pattern_cast (TREE_TYPE (irhs2
), irhs1
,
3451 irhs1
= adjust_bool_pattern_cast (out_type
, irhs1
, stmt_info
);
3452 irhs2
= adjust_bool_pattern_cast (out_type
, irhs2
, stmt_info
);
3455 itype
= TREE_TYPE (irhs1
);
3457 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3458 rhs_code
, irhs1
, irhs2
);
3463 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
3464 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
3465 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
))
3466 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1
)),
3467 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1
)))))
3469 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
3471 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3474 itype
= TREE_TYPE (rhs1
);
3475 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
3476 if (trueval
== NULL_TREE
)
3477 trueval
= build_int_cst (itype
, 1);
3479 gcc_checking_assert (useless_type_conversion_p (itype
,
3480 TREE_TYPE (trueval
)));
3482 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3483 COND_EXPR
, cond_expr
, trueval
,
3484 build_int_cst (itype
, 0));
3488 gimple_set_location (pattern_stmt
, loc
);
3489 append_pattern_def_seq (stmt_info
, pattern_stmt
,
3490 get_vectype_for_scalar_type (itype
));
3491 defs
.put (var
, gimple_assign_lhs (pattern_stmt
));
3494 /* Comparison function to qsort a vector of gimple stmts after UID. */
3497 sort_after_uid (const void *p1
, const void *p2
)
3499 const gimple
*stmt1
= *(const gimple
* const *)p1
;
3500 const gimple
*stmt2
= *(const gimple
* const *)p2
;
3501 return gimple_uid (stmt1
) - gimple_uid (stmt2
);
3504 /* Create pattern stmts for all stmts participating in the bool pattern
3505 specified by BOOL_STMT_SET and its root STMT with the desired type
3506 OUT_TYPE. Return the def of the pattern root. */
3509 adjust_bool_stmts (hash_set
<gimple
*> &bool_stmt_set
,
3510 tree out_type
, gimple
*stmt
)
3512 /* Gather original stmts in the bool pattern in their order of appearance
3514 auto_vec
<gimple
*> bool_stmts (bool_stmt_set
.elements ());
3515 for (hash_set
<gimple
*>::iterator i
= bool_stmt_set
.begin ();
3516 i
!= bool_stmt_set
.end (); ++i
)
3517 bool_stmts
.quick_push (*i
);
3518 bool_stmts
.qsort (sort_after_uid
);
3520 /* Now process them in that order, producing pattern stmts. */
3521 hash_map
<tree
, tree
> defs
;
3522 for (unsigned i
= 0; i
< bool_stmts
.length (); ++i
)
3523 adjust_bool_pattern (gimple_assign_lhs (bool_stmts
[i
]),
3524 out_type
, vinfo_for_stmt (stmt
), defs
);
3526 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3527 gimple
*pattern_stmt
3528 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (stmt
)));
3529 return gimple_assign_lhs (pattern_stmt
);
3532 /* Helper for search_type_for_mask. */
3535 search_type_for_mask_1 (tree var
, vec_info
*vinfo
,
3536 hash_map
<gimple
*, tree
> &cache
)
3539 enum tree_code rhs_code
;
3540 tree res
= NULL_TREE
, res2
;
3542 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var
)))
3545 stmt_vec_info def_stmt_info
= vect_get_internal_def (vinfo
, var
);
3549 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_stmt_info
->stmt
);
3553 tree
*c
= cache
.get (def_stmt
);
3557 rhs_code
= gimple_assign_rhs_code (def_stmt
);
3558 rhs1
= gimple_assign_rhs1 (def_stmt
);
3565 res
= search_type_for_mask_1 (rhs1
, vinfo
, cache
);
3571 res
= search_type_for_mask_1 (rhs1
, vinfo
, cache
);
3572 res2
= search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt
), vinfo
,
3574 if (!res
|| (res2
&& TYPE_PRECISION (res
) > TYPE_PRECISION (res2
)))
3579 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
3581 tree comp_vectype
, mask_type
;
3583 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
3585 res
= search_type_for_mask_1 (rhs1
, vinfo
, cache
);
3586 res2
= search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt
),
3588 if (!res
|| (res2
&& TYPE_PRECISION (res
) > TYPE_PRECISION (res2
)))
3593 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
3594 if (comp_vectype
== NULL_TREE
)
3600 mask_type
= get_mask_type_for_scalar_type (TREE_TYPE (rhs1
));
3602 || !expand_vec_cmp_expr_p (comp_vectype
, mask_type
, rhs_code
))
3608 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
3609 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
3611 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
3612 res
= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3615 res
= TREE_TYPE (rhs1
);
3619 cache
.put (def_stmt
, res
);
3623 /* Return the proper type for converting bool VAR into
3624 an integer value or NULL_TREE if no such type exists.
3625 The type is chosen so that converted value has the
3626 same number of elements as VAR's vector type. */
3629 search_type_for_mask (tree var
, vec_info
*vinfo
)
3631 hash_map
<gimple
*, tree
> cache
;
3632 return search_type_for_mask_1 (var
, vinfo
, cache
);
3635 /* Function vect_recog_bool_pattern
3637 Try to find pattern like following:
3639 bool a_b, b_b, c_b, d_b, e_b;
3642 S1 a_b = x1 CMP1 y1;
3643 S2 b_b = x2 CMP2 y2;
3645 S4 d_b = x3 CMP3 y3;
3647 S6 f_T = (TYPE) e_b;
3649 where type 'TYPE' is an integral type. Or a similar pattern
3652 S6 f_Y = e_b ? r_Y : s_Y;
3654 as results from if-conversion of a complex condition.
3658 * STMT_VINFO: The stmt at the end from which the pattern
3659 search begins, i.e. cast of a bool to
3664 * TYPE_OUT: The type of the output of this pattern.
3666 * Return value: A new stmt that will be used to replace the pattern.
3668 Assuming size of TYPE is the same as size of all comparisons
3669 (otherwise some casts would be added where needed), the above
3670 sequence we create related pattern stmts:
3671 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3672 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3673 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3674 S5' e_T = c_T | d_T;
3677 Instead of the above S3' we could emit:
3678 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3679 S3' c_T = a_T | b_T;
3680 but the above is more efficient. */
3683 vect_recog_bool_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
3685 gimple
*last_stmt
= stmt_vinfo
->stmt
;
3686 enum tree_code rhs_code
;
3687 tree var
, lhs
, rhs
, vectype
;
3688 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
3689 gimple
*pattern_stmt
;
3691 if (!is_gimple_assign (last_stmt
))
3694 var
= gimple_assign_rhs1 (last_stmt
);
3695 lhs
= gimple_assign_lhs (last_stmt
);
3697 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var
)))
3700 hash_set
<gimple
*> bool_stmts
;
3702 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3703 if (CONVERT_EXPR_CODE_P (rhs_code
))
3705 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3706 || TYPE_PRECISION (TREE_TYPE (lhs
)) == 1)
3708 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3709 if (vectype
== NULL_TREE
)
3712 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
3714 rhs
= adjust_bool_stmts (bool_stmts
, TREE_TYPE (lhs
), last_stmt
);
3715 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3716 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3717 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3720 = gimple_build_assign (lhs
, NOP_EXPR
, rhs
);
3724 tree type
= search_type_for_mask (var
, vinfo
);
3725 tree cst0
, cst1
, tmp
;
3730 /* We may directly use cond with narrowed type to avoid
3731 multiple cond exprs with following result packing and
3732 perform single cond with packed mask instead. In case
3733 of widening we better make cond first and then extract
3735 if (TYPE_MODE (type
) == TYPE_MODE (TREE_TYPE (lhs
)))
3736 type
= TREE_TYPE (lhs
);
3738 cst0
= build_int_cst (type
, 0);
3739 cst1
= build_int_cst (type
, 1);
3740 tmp
= vect_recog_temp_ssa_var (type
, NULL
);
3741 pattern_stmt
= gimple_build_assign (tmp
, COND_EXPR
, var
, cst1
, cst0
);
3743 if (!useless_type_conversion_p (type
, TREE_TYPE (lhs
)))
3745 tree new_vectype
= get_vectype_for_scalar_type (type
);
3746 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
, new_vectype
);
3748 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3749 pattern_stmt
= gimple_build_assign (lhs
, CONVERT_EXPR
, tmp
);
3753 *type_out
= vectype
;
3754 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
3756 return pattern_stmt
;
3758 else if (rhs_code
== COND_EXPR
3759 && TREE_CODE (var
) == SSA_NAME
)
3761 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3762 if (vectype
== NULL_TREE
)
3765 /* Build a scalar type for the boolean result that when
3766 vectorized matches the vector type of the result in
3767 size and number of elements. */
3769 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype
)),
3770 TYPE_VECTOR_SUBPARTS (vectype
));
3773 = build_nonstandard_integer_type (prec
,
3774 TYPE_UNSIGNED (TREE_TYPE (var
)));
3775 if (get_vectype_for_scalar_type (type
) == NULL_TREE
)
3778 if (!check_bool_pattern (var
, vinfo
, bool_stmts
))
3781 rhs
= adjust_bool_stmts (bool_stmts
, type
, last_stmt
);
3783 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3785 = gimple_build_assign (lhs
, COND_EXPR
,
3786 build2 (NE_EXPR
, boolean_type_node
,
3787 rhs
, build_int_cst (type
, 0)),
3788 gimple_assign_rhs2 (last_stmt
),
3789 gimple_assign_rhs3 (last_stmt
));
3790 *type_out
= vectype
;
3791 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
3793 return pattern_stmt
;
3795 else if (rhs_code
== SSA_NAME
3796 && STMT_VINFO_DATA_REF (stmt_vinfo
))
3798 stmt_vec_info pattern_stmt_info
;
3799 vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
3800 gcc_assert (vectype
!= NULL_TREE
);
3801 if (!VECTOR_MODE_P (TYPE_MODE (vectype
)))
3804 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
3805 rhs
= adjust_bool_stmts (bool_stmts
, TREE_TYPE (vectype
), last_stmt
);
3808 tree type
= search_type_for_mask (var
, vinfo
);
3809 tree cst0
, cst1
, new_vectype
;
3814 if (TYPE_MODE (type
) == TYPE_MODE (TREE_TYPE (vectype
)))
3815 type
= TREE_TYPE (vectype
);
3817 cst0
= build_int_cst (type
, 0);
3818 cst1
= build_int_cst (type
, 1);
3819 new_vectype
= get_vectype_for_scalar_type (type
);
3821 rhs
= vect_recog_temp_ssa_var (type
, NULL
);
3822 pattern_stmt
= gimple_build_assign (rhs
, COND_EXPR
, var
, cst1
, cst0
);
3823 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
, new_vectype
);
3826 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
3827 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3829 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3830 gimple
*cast_stmt
= gimple_build_assign (rhs2
, NOP_EXPR
, rhs
);
3831 append_pattern_def_seq (stmt_vinfo
, cast_stmt
);
3834 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3835 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
3836 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3837 STMT_VINFO_DATA_REF (pattern_stmt_info
)
3838 = STMT_VINFO_DATA_REF (stmt_vinfo
);
3839 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info
)
3840 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo
);
3841 *type_out
= vectype
;
3842 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
3844 return pattern_stmt
;
3851 /* A helper for vect_recog_mask_conversion_pattern. Build
3852 conversion of MASK to a type suitable for masking VECTYPE.
3853 Built statement gets required vectype and is appended to
3854 a pattern sequence of STMT_VINFO.
3856 Return converted mask. */
3859 build_mask_conversion (tree mask
, tree vectype
, stmt_vec_info stmt_vinfo
)
3864 masktype
= build_same_sized_truth_vector_type (vectype
);
3865 tmp
= vect_recog_temp_ssa_var (TREE_TYPE (masktype
), NULL
);
3866 stmt
= gimple_build_assign (tmp
, CONVERT_EXPR
, mask
);
3867 append_pattern_def_seq (stmt_vinfo
, stmt
, masktype
);
3873 /* Function vect_recog_mask_conversion_pattern
3875 Try to find statements which require boolean type
3876 converison. Additional conversion statements are
3877 added to handle such cases. For example:
3887 S4 c_1 = m_3 ? c_2 : c_3;
3889 Will be transformed into:
3893 S3'' m_2' = (_Bool[bitsize=32])m_2
3894 S3' m_3' = m_1 & m_2';
3895 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3896 S4' c_1' = m_3'' ? c_2 : c_3; */
3899 vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
3901 gimple
*last_stmt
= stmt_vinfo
->stmt
;
3902 enum tree_code rhs_code
;
3903 tree lhs
= NULL_TREE
, rhs1
, rhs2
, tmp
, rhs1_type
, rhs2_type
;
3904 tree vectype1
, vectype2
;
3905 stmt_vec_info pattern_stmt_info
;
3906 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
3908 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3909 if (is_gimple_call (last_stmt
)
3910 && gimple_call_internal_p (last_stmt
))
3912 gcall
*pattern_stmt
;
3914 internal_fn ifn
= gimple_call_internal_fn (last_stmt
);
3915 int mask_argno
= internal_fn_mask_index (ifn
);
3919 bool store_p
= internal_store_fn_p (ifn
);
3922 int rhs_index
= internal_fn_stored_value_index (ifn
);
3923 tree rhs
= gimple_call_arg (last_stmt
, rhs_index
);
3924 vectype1
= get_vectype_for_scalar_type (TREE_TYPE (rhs
));
3928 lhs
= gimple_call_lhs (last_stmt
);
3929 vectype1
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3932 tree mask_arg
= gimple_call_arg (last_stmt
, mask_argno
);
3933 tree mask_arg_type
= search_type_for_mask (mask_arg
, vinfo
);
3936 vectype2
= get_mask_type_for_scalar_type (mask_arg_type
);
3938 if (!vectype1
|| !vectype2
3939 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1
),
3940 TYPE_VECTOR_SUBPARTS (vectype2
)))
3943 tmp
= build_mask_conversion (mask_arg
, vectype1
, stmt_vinfo
);
3945 auto_vec
<tree
, 8> args
;
3946 unsigned int nargs
= gimple_call_num_args (last_stmt
);
3947 args
.safe_grow (nargs
);
3948 for (unsigned int i
= 0; i
< nargs
; ++i
)
3949 args
[i
] = ((int) i
== mask_argno
3951 : gimple_call_arg (last_stmt
, i
));
3952 pattern_stmt
= gimple_build_call_internal_vec (ifn
, args
);
3956 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3957 gimple_call_set_lhs (pattern_stmt
, lhs
);
3959 gimple_call_set_nothrow (pattern_stmt
, true);
3961 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
3962 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3963 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
3965 STMT_VINFO_DATA_REF (pattern_stmt_info
)
3966 = STMT_VINFO_DATA_REF (stmt_vinfo
);
3967 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info
)
3968 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo
);
3969 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info
)
3970 = STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
);
3973 *type_out
= vectype1
;
3974 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
3976 return pattern_stmt
;
3979 if (!is_gimple_assign (last_stmt
))
3982 gimple
*pattern_stmt
;
3983 lhs
= gimple_assign_lhs (last_stmt
);
3984 rhs1
= gimple_assign_rhs1 (last_stmt
);
3985 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3987 /* Check for cond expression requiring mask conversion. */
3988 if (rhs_code
== COND_EXPR
)
3990 vectype1
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3992 if (TREE_CODE (rhs1
) == SSA_NAME
)
3994 rhs1_type
= search_type_for_mask (rhs1
, vinfo
);
3998 else if (COMPARISON_CLASS_P (rhs1
))
4000 /* Check whether we're comparing scalar booleans and (if so)
4001 whether a better mask type exists than the mask associated
4002 with boolean-sized elements. This avoids unnecessary packs
4003 and unpacks if the booleans are set from comparisons of
4004 wider types. E.g. in:
4006 int x1, x2, x3, x4, y1, y1;
4008 bool b1 = (x1 == x2);
4009 bool b2 = (x3 == x4);
4010 ... = b1 == b2 ? y1 : y2;
4012 it is better for b1 and b2 to use the mask type associated
4013 with int elements rather bool (byte) elements. */
4014 rhs1_type
= search_type_for_mask (TREE_OPERAND (rhs1
, 0), vinfo
);
4016 rhs1_type
= TREE_TYPE (TREE_OPERAND (rhs1
, 0));
4021 vectype2
= get_mask_type_for_scalar_type (rhs1_type
);
4023 if (!vectype1
|| !vectype2
)
4026 /* Continue if a conversion is needed. Also continue if we have
4027 a comparison whose vector type would normally be different from
4028 VECTYPE2 when considered in isolation. In that case we'll
4029 replace the comparison with an SSA name (so that we can record
4030 its vector type) and behave as though the comparison was an SSA
4031 name from the outset. */
4032 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1
),
4033 TYPE_VECTOR_SUBPARTS (vectype2
))
4034 && (TREE_CODE (rhs1
) == SSA_NAME
4035 || rhs1_type
== TREE_TYPE (TREE_OPERAND (rhs1
, 0))))
4038 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4039 in place, we can handle it in vectorizable_condition. This avoids
4040 unnecessary promotion stmts and increased vectorization factor. */
4041 if (COMPARISON_CLASS_P (rhs1
)
4042 && INTEGRAL_TYPE_P (rhs1_type
)
4043 && known_le (TYPE_VECTOR_SUBPARTS (vectype1
),
4044 TYPE_VECTOR_SUBPARTS (vectype2
)))
4046 enum vect_def_type dt
;
4047 if (vect_is_simple_use (TREE_OPERAND (rhs1
, 0), vinfo
, &dt
)
4048 && dt
== vect_external_def
4049 && vect_is_simple_use (TREE_OPERAND (rhs1
, 1), vinfo
, &dt
)
4050 && (dt
== vect_external_def
4051 || dt
== vect_constant_def
))
4053 tree wide_scalar_type
= build_nonstandard_integer_type
4054 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1
))),
4055 TYPE_UNSIGNED (rhs1_type
));
4056 tree vectype3
= get_vectype_for_scalar_type (wide_scalar_type
);
4057 if (expand_vec_cond_expr_p (vectype1
, vectype3
, TREE_CODE (rhs1
)))
4062 /* If rhs1 is a comparison we need to move it into a
4063 separate statement. */
4064 if (TREE_CODE (rhs1
) != SSA_NAME
)
4066 tmp
= vect_recog_temp_ssa_var (TREE_TYPE (rhs1
), NULL
);
4067 pattern_stmt
= gimple_build_assign (tmp
, rhs1
);
4069 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
, vectype2
);
4072 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
4073 TYPE_VECTOR_SUBPARTS (vectype2
)))
4074 tmp
= build_mask_conversion (rhs1
, vectype1
, stmt_vinfo
);
4078 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
4079 pattern_stmt
= gimple_build_assign (lhs
, COND_EXPR
, tmp
,
4080 gimple_assign_rhs2 (last_stmt
),
4081 gimple_assign_rhs3 (last_stmt
));
4083 *type_out
= vectype1
;
4084 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
4086 return pattern_stmt
;
4089 /* Now check for binary boolean operations requiring conversion for
4091 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs
)))
4094 if (rhs_code
!= BIT_IOR_EXPR
4095 && rhs_code
!= BIT_XOR_EXPR
4096 && rhs_code
!= BIT_AND_EXPR
4097 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
)
4100 rhs2
= gimple_assign_rhs2 (last_stmt
);
4102 rhs1_type
= search_type_for_mask (rhs1
, vinfo
);
4103 rhs2_type
= search_type_for_mask (rhs2
, vinfo
);
4105 if (!rhs1_type
|| !rhs2_type
4106 || TYPE_PRECISION (rhs1_type
) == TYPE_PRECISION (rhs2_type
))
4109 if (TYPE_PRECISION (rhs1_type
) < TYPE_PRECISION (rhs2_type
))
4111 vectype1
= get_mask_type_for_scalar_type (rhs1_type
);
4114 rhs2
= build_mask_conversion (rhs2
, vectype1
, stmt_vinfo
);
4118 vectype1
= get_mask_type_for_scalar_type (rhs2_type
);
4121 rhs1
= build_mask_conversion (rhs1
, vectype1
, stmt_vinfo
);
4124 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
4125 pattern_stmt
= gimple_build_assign (lhs
, rhs_code
, rhs1
, rhs2
);
4127 *type_out
= vectype1
;
4128 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
4130 return pattern_stmt
;
4133 /* STMT is a load or store. If the load or store is conditional, return
4134 the boolean condition under which it occurs, otherwise return null. */
4137 vect_get_load_store_mask (gimple
*stmt
)
4139 if (gassign
*def_assign
= dyn_cast
<gassign
*> (stmt
))
4141 gcc_assert (gimple_assign_single_p (def_assign
));
4145 if (gcall
*def_call
= dyn_cast
<gcall
*> (stmt
))
4147 internal_fn ifn
= gimple_call_internal_fn (def_call
);
4148 int mask_index
= internal_fn_mask_index (ifn
);
4149 return gimple_call_arg (def_call
, mask_index
);
4155 /* Return the scalar offset type that an internal gather/scatter function
4156 should use. GS_INFO describes the gather/scatter operation. */
4159 vect_get_gather_scatter_offset_type (gather_scatter_info
*gs_info
)
4161 tree offset_type
= TREE_TYPE (gs_info
->offset
);
4162 unsigned int element_bits
= tree_to_uhwi (TYPE_SIZE (gs_info
->element_type
));
4164 /* Enforced by vect_check_gather_scatter. */
4165 unsigned int offset_bits
= TYPE_PRECISION (offset_type
);
4166 gcc_assert (element_bits
>= offset_bits
);
4168 /* If the offset is narrower than the elements, extend it according
4170 if (element_bits
> offset_bits
)
4171 return build_nonstandard_integer_type (element_bits
,
4172 TYPE_UNSIGNED (offset_type
));
4177 /* Return MASK if MASK is suitable for masking an operation on vectors
4178 of type VECTYPE, otherwise convert it into such a form and return
4179 the result. Associate any conversion statements with STMT_INFO's
4183 vect_convert_mask_for_vectype (tree mask
, tree vectype
,
4184 stmt_vec_info stmt_info
, vec_info
*vinfo
)
4186 tree mask_type
= search_type_for_mask (mask
, vinfo
);
4189 tree mask_vectype
= get_mask_type_for_scalar_type (mask_type
);
4191 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
),
4192 TYPE_VECTOR_SUBPARTS (mask_vectype
)))
4193 mask
= build_mask_conversion (mask
, vectype
, stmt_info
);
4198 /* Return the equivalent of:
4200 fold_convert (TYPE, VALUE)
4202 with the expectation that the operation will be vectorized.
4203 If new statements are needed, add them as pattern statements
4207 vect_add_conversion_to_pattern (tree type
, tree value
, stmt_vec_info stmt_info
)
4209 if (useless_type_conversion_p (type
, TREE_TYPE (value
)))
4212 tree new_value
= vect_recog_temp_ssa_var (type
, NULL
);
4213 gassign
*conversion
= gimple_build_assign (new_value
, CONVERT_EXPR
, value
);
4214 append_pattern_def_seq (stmt_info
, conversion
,
4215 get_vectype_for_scalar_type (type
));
4219 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4220 internal function. Return the final statement on success and set
4221 *TYPE_OUT to the vector type being loaded or stored.
4223 This function only handles gathers and scatters that were recognized
4224 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4227 vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info
, tree
*type_out
)
4229 /* Currently we only support this for loop vectorization. */
4230 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (stmt_info
->vinfo
);
4234 /* Make sure that we're looking at a gather load or scatter store. */
4235 data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
4236 if (!dr
|| !STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
4239 /* Get the boolean that controls whether the load or store happens.
4240 This is null if the operation is unconditional. */
4241 gimple
*stmt
= stmt_info
->stmt
;
4242 tree mask
= vect_get_load_store_mask (stmt
);
4244 /* Make sure that the target supports an appropriate internal
4245 function for the gather/scatter operation. */
4246 gather_scatter_info gs_info
;
4247 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, &gs_info
)
4251 /* Convert the mask to the right form. */
4252 tree gs_vectype
= get_vectype_for_scalar_type (gs_info
.element_type
);
4254 mask
= vect_convert_mask_for_vectype (mask
, gs_vectype
, stmt_info
,
4257 /* Get the invariant base and non-invariant offset, converting the
4258 latter to the same width as the vector elements. */
4259 tree base
= gs_info
.base
;
4260 tree offset_type
= vect_get_gather_scatter_offset_type (&gs_info
);
4261 tree offset
= vect_add_conversion_to_pattern (offset_type
, gs_info
.offset
,
4264 /* Build the new pattern statement. */
4265 tree scale
= size_int (gs_info
.scale
);
4266 gcall
*pattern_stmt
;
4267 if (DR_IS_READ (dr
))
4270 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 4, base
,
4271 offset
, scale
, mask
);
4273 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 3, base
,
4275 tree load_lhs
= vect_recog_temp_ssa_var (gs_info
.element_type
, NULL
);
4276 gimple_call_set_lhs (pattern_stmt
, load_lhs
);
4280 tree rhs
= vect_get_store_rhs (stmt
);
4282 pattern_stmt
= gimple_build_call_internal (IFN_MASK_SCATTER_STORE
, 5,
4283 base
, offset
, scale
, rhs
,
4286 pattern_stmt
= gimple_build_call_internal (IFN_SCATTER_STORE
, 4,
4287 base
, offset
, scale
, rhs
);
4289 gimple_call_set_nothrow (pattern_stmt
, true);
4291 /* Copy across relevant vectorization info and associate DR with the
4292 new pattern statement instead of the original statement. */
4293 stmt_vec_info pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
,
4295 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
4296 STMT_VINFO_DATA_REF (pattern_stmt_info
) = dr
;
4297 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info
)
4298 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info
);
4299 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info
)
4300 = STMT_VINFO_GATHER_SCATTER_P (stmt_info
);
4302 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4303 *type_out
= vectype
;
4304 vect_pattern_detected ("gather/scatter pattern", stmt
);
4306 return pattern_stmt
;
4309 /* Return true if TYPE is a non-boolean integer type. These are the types
4310 that we want to consider for narrowing. */
4313 vect_narrowable_type_p (tree type
)
4315 return INTEGRAL_TYPE_P (type
) && !VECT_SCALAR_BOOLEAN_TYPE_P (type
);
4318 /* Return true if the operation given by CODE can be truncated to N bits
4319 when only N bits of the output are needed. This is only true if bit N+1
4320 of the inputs has no effect on the low N bits of the result. */
4323 vect_truncatable_operation_p (tree_code code
)
4341 /* Record that STMT_INFO could be changed from operating on TYPE to
4342 operating on a type with the precision and sign given by PRECISION
4343 and SIGN respectively. PRECISION is an arbitrary bit precision;
4344 it might not be a whole number of bytes. */
4347 vect_set_operation_type (stmt_vec_info stmt_info
, tree type
,
4348 unsigned int precision
, signop sign
)
4350 /* Round the precision up to a whole number of bytes. */
4351 precision
= vect_element_precision (precision
);
4352 if (precision
< TYPE_PRECISION (type
)
4353 && (!stmt_info
->operation_precision
4354 || stmt_info
->operation_precision
> precision
))
4356 stmt_info
->operation_precision
= precision
;
4357 stmt_info
->operation_sign
= sign
;
4361 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4362 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4363 is an arbitrary bit precision; it might not be a whole number of bytes. */
4366 vect_set_min_input_precision (stmt_vec_info stmt_info
, tree type
,
4367 unsigned int min_input_precision
)
4369 /* This operation in isolation only requires the inputs to have
4370 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4371 that MIN_INPUT_PRECISION is a natural precision for the chain
4372 as a whole. E.g. consider something like:
4374 unsigned short *x, *y;
4375 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4377 The right shift can be done on unsigned chars, and only requires the
4378 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4379 approach would mean turning a natural chain of single-vector unsigned
4380 short operations into one that truncates "*x" and then extends
4381 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4382 operation and one vector for each unsigned char operation.
4383 This would be a significant pessimization.
4385 Instead only propagate the maximum of this precision and the precision
4386 required by the users of the result. This means that we don't pessimize
4387 the case above but continue to optimize things like:
4391 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4393 Here we would truncate two vectors of *x to a single vector of
4394 unsigned chars and use single-vector unsigned char operations for
4395 everything else, rather than doing two unsigned short copies of
4396 "(*x & 0xf0) >> 4" and then truncating the result. */
4397 min_input_precision
= MAX (min_input_precision
,
4398 stmt_info
->min_output_precision
);
4400 if (min_input_precision
< TYPE_PRECISION (type
)
4401 && (!stmt_info
->min_input_precision
4402 || stmt_info
->min_input_precision
> min_input_precision
))
4403 stmt_info
->min_input_precision
= min_input_precision
;
4406 /* Subroutine of vect_determine_min_output_precision. Return true if
4407 we can calculate a reduced number of output bits for STMT_INFO,
4408 whose result is LHS. */
4411 vect_determine_min_output_precision_1 (stmt_vec_info stmt_info
, tree lhs
)
4413 /* Take the maximum precision required by users of the result. */
4414 unsigned int precision
= 0;
4415 imm_use_iterator iter
;
4417 FOR_EACH_IMM_USE_FAST (use
, iter
, lhs
)
4419 gimple
*use_stmt
= USE_STMT (use
);
4420 if (is_gimple_debug (use_stmt
))
4422 if (!vect_stmt_in_region_p (stmt_info
->vinfo
, use_stmt
))
4424 stmt_vec_info use_stmt_info
= vinfo_for_stmt (use_stmt
);
4425 if (!use_stmt_info
->min_input_precision
)
4427 precision
= MAX (precision
, use_stmt_info
->min_input_precision
);
4430 if (dump_enabled_p ())
4432 dump_printf_loc (MSG_NOTE
, vect_location
, "only the low %d bits of ",
4434 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, lhs
);
4435 dump_printf (MSG_NOTE
, " are significant\n");
4437 stmt_info
->min_output_precision
= precision
;
4441 /* Calculate min_output_precision for STMT_INFO. */
4444 vect_determine_min_output_precision (stmt_vec_info stmt_info
)
4446 /* We're only interested in statements with a narrowable result. */
4447 tree lhs
= gimple_get_lhs (stmt_info
->stmt
);
4449 || TREE_CODE (lhs
) != SSA_NAME
4450 || !vect_narrowable_type_p (TREE_TYPE (lhs
)))
4453 if (!vect_determine_min_output_precision_1 (stmt_info
, lhs
))
4454 stmt_info
->min_output_precision
= TYPE_PRECISION (TREE_TYPE (lhs
));
4457 /* Use range information to decide whether STMT (described by STMT_INFO)
4458 could be done in a narrower type. This is effectively a forward
4459 propagation, since it uses context-independent information that applies
4460 to all users of an SSA name. */
4463 vect_determine_precisions_from_range (stmt_vec_info stmt_info
, gassign
*stmt
)
4465 tree lhs
= gimple_assign_lhs (stmt
);
4466 if (!lhs
|| TREE_CODE (lhs
) != SSA_NAME
)
4469 tree type
= TREE_TYPE (lhs
);
4470 if (!vect_narrowable_type_p (type
))
4473 /* First see whether we have any useful range information for the result. */
4474 unsigned int precision
= TYPE_PRECISION (type
);
4475 signop sign
= TYPE_SIGN (type
);
4476 wide_int min_value
, max_value
;
4477 if (!vect_get_range_info (lhs
, &min_value
, &max_value
))
4480 tree_code code
= gimple_assign_rhs_code (stmt
);
4481 unsigned int nops
= gimple_num_ops (stmt
);
4483 if (!vect_truncatable_operation_p (code
))
4484 /* Check that all relevant input operands are compatible, and update
4485 [MIN_VALUE, MAX_VALUE] to include their ranges. */
4486 for (unsigned int i
= 1; i
< nops
; ++i
)
4488 tree op
= gimple_op (stmt
, i
);
4489 if (TREE_CODE (op
) == INTEGER_CST
)
4491 /* Don't require the integer to have RHS_TYPE (which it might
4492 not for things like shift amounts, etc.), but do require it
4494 if (!int_fits_type_p (op
, type
))
4497 min_value
= wi::min (min_value
, wi::to_wide (op
, precision
), sign
);
4498 max_value
= wi::max (max_value
, wi::to_wide (op
, precision
), sign
);
4500 else if (TREE_CODE (op
) == SSA_NAME
)
4502 /* Ignore codes that don't take uniform arguments. */
4503 if (!types_compatible_p (TREE_TYPE (op
), type
))
4506 wide_int op_min_value
, op_max_value
;
4507 if (!vect_get_range_info (op
, &op_min_value
, &op_max_value
))
4510 min_value
= wi::min (min_value
, op_min_value
, sign
);
4511 max_value
= wi::max (max_value
, op_max_value
, sign
);
4517 /* Try to switch signed types for unsigned types if we can.
4518 This is better for two reasons. First, unsigned ops tend
4519 to be cheaper than signed ops. Second, it means that we can
4523 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4528 unsigned short res_1 = (unsigned short) c & 0xff00;
4529 int res = (int) res_1;
4531 where the intermediate result res_1 has unsigned rather than
4533 if (sign
== SIGNED
&& !wi::neg_p (min_value
))
4536 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
4537 unsigned int precision1
= wi::min_precision (min_value
, sign
);
4538 unsigned int precision2
= wi::min_precision (max_value
, sign
);
4539 unsigned int value_precision
= MAX (precision1
, precision2
);
4540 if (value_precision
>= precision
)
4543 if (dump_enabled_p ())
4545 dump_printf_loc (MSG_NOTE
, vect_location
, "can narrow to %s:%d"
4546 " without loss of precision: ",
4547 sign
== SIGNED
? "signed" : "unsigned",
4549 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
4552 vect_set_operation_type (stmt_info
, type
, value_precision
, sign
);
4553 vect_set_min_input_precision (stmt_info
, type
, value_precision
);
4556 /* Use information about the users of STMT's result to decide whether
4557 STMT (described by STMT_INFO) could be done in a narrower type.
4558 This is effectively a backward propagation. */
4561 vect_determine_precisions_from_users (stmt_vec_info stmt_info
, gassign
*stmt
)
4563 tree_code code
= gimple_assign_rhs_code (stmt
);
4564 unsigned int opno
= (code
== COND_EXPR
? 2 : 1);
4565 tree type
= TREE_TYPE (gimple_op (stmt
, opno
));
4566 if (!vect_narrowable_type_p (type
))
4569 unsigned int precision
= TYPE_PRECISION (type
);
4570 unsigned int operation_precision
, min_input_precision
;
4574 /* Only the bits that contribute to the output matter. Don't change
4575 the precision of the operation itself. */
4576 operation_precision
= precision
;
4577 min_input_precision
= stmt_info
->min_output_precision
;
4583 tree shift
= gimple_assign_rhs2 (stmt
);
4584 if (TREE_CODE (shift
) != INTEGER_CST
4585 || !wi::ltu_p (wi::to_widest (shift
), precision
))
4587 unsigned int const_shift
= TREE_INT_CST_LOW (shift
);
4588 if (code
== LSHIFT_EXPR
)
4590 /* We need CONST_SHIFT fewer bits of the input. */
4591 operation_precision
= stmt_info
->min_output_precision
;
4592 min_input_precision
= (MAX (operation_precision
, const_shift
)
4597 /* We need CONST_SHIFT extra bits to do the operation. */
4598 operation_precision
= (stmt_info
->min_output_precision
4600 min_input_precision
= operation_precision
;
4606 if (vect_truncatable_operation_p (code
))
4608 /* Input bit N has no effect on output bits N-1 and lower. */
4609 operation_precision
= stmt_info
->min_output_precision
;
4610 min_input_precision
= operation_precision
;
4616 if (operation_precision
< precision
)
4618 if (dump_enabled_p ())
4620 dump_printf_loc (MSG_NOTE
, vect_location
, "can narrow to %s:%d"
4621 " without affecting users: ",
4622 TYPE_UNSIGNED (type
) ? "unsigned" : "signed",
4623 operation_precision
);
4624 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
4626 vect_set_operation_type (stmt_info
, type
, operation_precision
,
4629 vect_set_min_input_precision (stmt_info
, type
, min_input_precision
);
4632 /* Handle vect_determine_precisions for STMT_INFO, given that we
4633 have already done so for the users of its result. */
4636 vect_determine_stmt_precisions (stmt_vec_info stmt_info
)
4638 vect_determine_min_output_precision (stmt_info
);
4639 if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
4641 vect_determine_precisions_from_range (stmt_info
, stmt
);
4642 vect_determine_precisions_from_users (stmt_info
, stmt
);
4646 /* Walk backwards through the vectorizable region to determine the
4647 values of these fields:
4649 - min_output_precision
4650 - min_input_precision
4651 - operation_precision
4652 - operation_sign. */
4655 vect_determine_precisions (vec_info
*vinfo
)
4657 DUMP_VECT_SCOPE ("vect_determine_precisions");
4659 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
4661 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4662 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
4663 unsigned int nbbs
= loop
->num_nodes
;
4665 for (unsigned int i
= 0; i
< nbbs
; i
++)
4667 basic_block bb
= bbs
[nbbs
- i
- 1];
4668 for (gimple_stmt_iterator si
= gsi_last_bb (bb
);
4669 !gsi_end_p (si
); gsi_prev (&si
))
4670 vect_determine_stmt_precisions (vinfo_for_stmt (gsi_stmt (si
)));
4675 bb_vec_info bb_vinfo
= as_a
<bb_vec_info
> (vinfo
);
4676 gimple_stmt_iterator si
= bb_vinfo
->region_end
;
4681 si
= gsi_last_bb (bb_vinfo
->bb
);
4684 stmt
= gsi_stmt (si
);
4685 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4686 if (stmt_info
&& STMT_VINFO_VECTORIZABLE (stmt_info
))
4687 vect_determine_stmt_precisions (stmt_info
);
4689 while (stmt
!= gsi_stmt (bb_vinfo
->region_begin
));
4693 typedef gimple
*(*vect_recog_func_ptr
) (stmt_vec_info
, tree
*);
4695 struct vect_recog_func
4697 vect_recog_func_ptr fn
;
4701 /* Note that ordering matters - the first pattern matching on a stmt is
4702 taken which means usually the more complex one needs to preceed the
4703 less comples onex (widen_sum only after dot_prod or sad for example). */
4704 static vect_recog_func vect_vect_recog_func_ptrs
[] = {
4705 { vect_recog_over_widening_pattern
, "over_widening" },
4706 /* Must come after over_widening, which narrows the shift as much as
4707 possible beforehand. */
4708 { vect_recog_average_pattern
, "average" },
4709 { vect_recog_cast_forwprop_pattern
, "cast_forwprop" },
4710 { vect_recog_widen_mult_pattern
, "widen_mult" },
4711 { vect_recog_dot_prod_pattern
, "dot_prod" },
4712 { vect_recog_sad_pattern
, "sad" },
4713 { vect_recog_widen_sum_pattern
, "widen_sum" },
4714 { vect_recog_pow_pattern
, "pow" },
4715 { vect_recog_widen_shift_pattern
, "widen_shift" },
4716 { vect_recog_rotate_pattern
, "rotate" },
4717 { vect_recog_vector_vector_shift_pattern
, "vector_vector_shift" },
4718 { vect_recog_divmod_pattern
, "divmod" },
4719 { vect_recog_mult_pattern
, "mult" },
4720 { vect_recog_mixed_size_cond_pattern
, "mixed_size_cond" },
4721 { vect_recog_bool_pattern
, "bool" },
4722 /* This must come before mask conversion, and includes the parts
4723 of mask conversion that are needed for gather and scatter
4724 internal functions. */
4725 { vect_recog_gather_scatter_pattern
, "gather_scatter" },
4726 { vect_recog_mask_conversion_pattern
, "mask_conversion" }
4729 const unsigned int NUM_PATTERNS
= ARRAY_SIZE (vect_vect_recog_func_ptrs
);
4731 /* Mark statements that are involved in a pattern. */
4734 vect_mark_pattern_stmts (gimple
*orig_stmt
, gimple
*pattern_stmt
,
4735 tree pattern_vectype
)
4737 stmt_vec_info orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
4738 gimple
*def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
4740 bool old_pattern_p
= is_pattern_stmt_p (orig_stmt_info
);
4743 /* We're replacing a statement in an existing pattern definition
4745 if (dump_enabled_p ())
4747 dump_printf_loc (MSG_NOTE
, vect_location
,
4748 "replacing earlier pattern ");
4749 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, orig_stmt
, 0);
4752 /* To keep the book-keeping simple, just swap the lhs of the
4753 old and new statements, so that the old one has a valid but
4755 tree old_lhs
= gimple_get_lhs (orig_stmt
);
4756 gimple_set_lhs (orig_stmt
, gimple_get_lhs (pattern_stmt
));
4757 gimple_set_lhs (pattern_stmt
, old_lhs
);
4759 if (dump_enabled_p ())
4761 dump_printf_loc (MSG_NOTE
, vect_location
, "with ");
4762 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
4765 /* Switch to the statement that ORIG replaces. */
4767 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (orig_stmt_info
));
4769 /* We shouldn't be replacing the main pattern statement. */
4770 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info
) != orig_stmt
);
4774 for (gimple_stmt_iterator si
= gsi_start (def_seq
);
4775 !gsi_end_p (si
); gsi_next (&si
))
4776 vect_init_pattern_stmt (gsi_stmt (si
), orig_stmt_info
, pattern_vectype
);
4780 vect_init_pattern_stmt (pattern_stmt
, orig_stmt_info
, pattern_vectype
);
4782 /* Insert all the new pattern statements before the original one. */
4783 gimple_seq
*orig_def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
4784 gimple_stmt_iterator gsi
= gsi_for_stmt (orig_stmt
, orig_def_seq
);
4785 gsi_insert_seq_before_without_update (&gsi
, def_seq
, GSI_SAME_STMT
);
4786 gsi_insert_before_without_update (&gsi
, pattern_stmt
, GSI_SAME_STMT
);
4788 /* Remove the pattern statement that this new pattern replaces. */
4789 gsi_remove (&gsi
, false);
4792 vect_set_pattern_stmt (pattern_stmt
, orig_stmt_info
, pattern_vectype
);
4795 /* Function vect_pattern_recog_1
4798 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4799 computation pattern.
4800 STMT: A stmt from which the pattern search should start.
4802 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
4803 a sequence of statements that has the same functionality and can be
4804 used to replace STMT. It returns the last statement in the sequence
4805 and adds any earlier statements to STMT's STMT_VINFO_PATTERN_DEF_SEQ.
4806 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
4807 statement, having first checked that the target supports the new operation
4810 This function also does some bookkeeping, as explained in the documentation
4811 for vect_recog_pattern. */
4814 vect_pattern_recog_1 (vect_recog_func
*recog_func
, gimple_stmt_iterator si
)
4816 gimple
*stmt
= gsi_stmt (si
), *pattern_stmt
;
4817 stmt_vec_info stmt_info
;
4818 loop_vec_info loop_vinfo
;
4819 tree pattern_vectype
;
4821 /* If this statement has already been replaced with pattern statements,
4822 leave the original statement alone, since the first match wins.
4823 Instead try to match against the definition statements that feed
4824 the main pattern statement. */
4825 stmt_info
= vinfo_for_stmt (stmt
);
4826 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
4828 gimple_stmt_iterator gsi
;
4829 for (gsi
= gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
4830 !gsi_end_p (gsi
); gsi_next (&gsi
))
4831 vect_pattern_recog_1 (recog_func
, gsi
);
4835 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
4836 pattern_stmt
= recog_func
->fn (stmt_info
, &pattern_vectype
);
4839 /* Clear any half-formed pattern definition sequence. */
4840 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
4844 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4845 gcc_assert (pattern_vectype
);
4847 /* Found a vectorizable pattern. */
4848 if (dump_enabled_p ())
4850 dump_printf_loc (MSG_NOTE
, vect_location
,
4851 "%s pattern recognized: ", recog_func
->name
);
4852 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
4855 /* Mark the stmts that are involved in the pattern. */
4856 vect_mark_pattern_stmts (stmt
, pattern_stmt
, pattern_vectype
);
4858 /* Patterns cannot be vectorized using SLP, because they change the order of
4864 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo
), ix
, ix2
,
4865 elem_ptr
, *elem_ptr
== stmt
);
4870 /* Function vect_pattern_recog
4873 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4876 Output - for each computation idiom that is detected we create a new stmt
4877 that provides the same functionality and that can be vectorized. We
4878 also record some information in the struct_stmt_info of the relevant
4879 stmts, as explained below:
4881 At the entry to this function we have the following stmts, with the
4882 following initial value in the STMT_VINFO fields:
4884 stmt in_pattern_p related_stmt vec_stmt
4885 S1: a_i = .... - - -
4886 S2: a_2 = ..use(a_i).. - - -
4887 S3: a_1 = ..use(a_2).. - - -
4888 S4: a_0 = ..use(a_1).. - - -
4889 S5: ... = ..use(a_0).. - - -
4891 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4892 represented by a single stmt. We then:
4893 - create a new stmt S6 equivalent to the pattern (the stmt is not
4894 inserted into the code)
4895 - fill in the STMT_VINFO fields as follows:
4897 in_pattern_p related_stmt vec_stmt
4898 S1: a_i = .... - - -
4899 S2: a_2 = ..use(a_i).. - - -
4900 S3: a_1 = ..use(a_2).. - - -
4901 S4: a_0 = ..use(a_1).. true S6 -
4902 '---> S6: a_new = .... - S4 -
4903 S5: ... = ..use(a_0).. - - -
4905 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4906 to each other through the RELATED_STMT field).
4908 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4909 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4910 remain irrelevant unless used by stmts other than S4.
4912 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4913 (because they are marked as irrelevant). It will vectorize S6, and record
4914 a pointer to the new vector stmt VS6 from S6 (as usual).
4915 S4 will be skipped, and S5 will be vectorized as usual:
4917 in_pattern_p related_stmt vec_stmt
4918 S1: a_i = .... - - -
4919 S2: a_2 = ..use(a_i).. - - -
4920 S3: a_1 = ..use(a_2).. - - -
4921 > VS6: va_new = .... - - -
4922 S4: a_0 = ..use(a_1).. true S6 VS6
4923 '---> S6: a_new = .... - S4 VS6
4924 > VS5: ... = ..vuse(va_new).. - - -
4925 S5: ... = ..use(a_0).. - - -
4927 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4928 elsewhere), and we'll end up with:
4931 VS5: ... = ..vuse(va_new)..
4933 In case of more than one pattern statements, e.g., widen-mult with
4937 S2 a_T = (TYPE) a_t;
4938 '--> S3: a_it = (interm_type) a_t;
4939 S4 prod_T = a_T * CONST;
4940 '--> S5: prod_T' = a_it w* CONST;
4942 there may be other users of a_T outside the pattern. In that case S2 will
4943 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4944 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4945 be recorded in S3. */
4948 vect_pattern_recog (vec_info
*vinfo
)
4953 gimple_stmt_iterator si
;
4956 vect_determine_precisions (vinfo
);
4958 DUMP_VECT_SCOPE ("vect_pattern_recog");
4960 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
4962 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4963 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
4964 nbbs
= loop
->num_nodes
;
4966 /* Scan through the loop stmts, applying the pattern recognition
4967 functions starting at each stmt visited: */
4968 for (i
= 0; i
< nbbs
; i
++)
4970 basic_block bb
= bbs
[i
];
4971 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
4972 /* Scan over all generic vect_recog_xxx_pattern functions. */
4973 for (j
= 0; j
< NUM_PATTERNS
; j
++)
4974 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs
[j
], si
);
4979 bb_vec_info bb_vinfo
= as_a
<bb_vec_info
> (vinfo
);
4980 for (si
= bb_vinfo
->region_begin
;
4981 gsi_stmt (si
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&si
))
4983 gimple
*stmt
= gsi_stmt (si
);
4984 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4985 if (stmt_info
&& !STMT_VINFO_VECTORIZABLE (stmt_info
))
4988 /* Scan over all generic vect_recog_xxx_pattern functions. */
4989 for (j
= 0; j
< NUM_PATTERNS
; j
++)
4990 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs
[j
], si
);