1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
37 #include "gimple-iterator.h"
39 #include "tree-vectorizer.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
47 #include "omp-simd-clone.h"
50 /* Return true if we have a useful VR_RANGE range for VAR, storing it
51 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
54 vect_get_range_info (tree var
, wide_int
*min_value
, wide_int
*max_value
)
56 value_range_type vr_type
= get_range_info (var
, min_value
, max_value
);
57 wide_int nonzero
= get_nonzero_bits (var
);
58 signop sgn
= TYPE_SIGN (TREE_TYPE (var
));
59 if (intersect_range_with_nonzero_bits (vr_type
, min_value
, max_value
,
60 nonzero
, sgn
) == VR_RANGE
)
62 if (dump_enabled_p ())
64 dump_generic_expr_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, var
);
65 dump_printf (MSG_NOTE
, " has range [");
66 dump_hex (MSG_NOTE
, *min_value
);
67 dump_printf (MSG_NOTE
, ", ");
68 dump_hex (MSG_NOTE
, *max_value
);
69 dump_printf (MSG_NOTE
, "]\n");
75 if (dump_enabled_p ())
77 dump_generic_expr_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, var
);
78 dump_printf (MSG_NOTE
, " has no range info\n");
84 /* Report that we've found an instance of pattern PATTERN in
88 vect_pattern_detected (const char *name
, gimple
*stmt
)
90 if (dump_enabled_p ())
91 dump_printf_loc (MSG_NOTE
, vect_location
, "%s: detected: %G", name
, stmt
);
94 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
95 return the pattern statement's stmt_vec_info. Set its vector type to
96 VECTYPE if it doesn't have one already. */
99 vect_init_pattern_stmt (gimple
*pattern_stmt
, stmt_vec_info orig_stmt_info
,
102 vec_info
*vinfo
= orig_stmt_info
->vinfo
;
103 stmt_vec_info pattern_stmt_info
= vinfo
->lookup_stmt (pattern_stmt
);
104 if (pattern_stmt_info
== NULL
)
105 pattern_stmt_info
= orig_stmt_info
->vinfo
->add_stmt (pattern_stmt
);
106 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt_info
->stmt
));
108 pattern_stmt_info
->pattern_stmt_p
= true;
109 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt_info
;
110 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
111 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
112 if (!STMT_VINFO_VECTYPE (pattern_stmt_info
))
113 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vectype
;
114 return pattern_stmt_info
;
117 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
118 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
122 vect_set_pattern_stmt (gimple
*pattern_stmt
, stmt_vec_info orig_stmt_info
,
125 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
126 STMT_VINFO_RELATED_STMT (orig_stmt_info
)
127 = vect_init_pattern_stmt (pattern_stmt
, orig_stmt_info
, vectype
);
130 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
131 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
132 be different from the vector type of the final pattern statement. */
135 append_pattern_def_seq (stmt_vec_info stmt_info
, gimple
*new_stmt
,
136 tree vectype
= NULL_TREE
)
138 vec_info
*vinfo
= stmt_info
->vinfo
;
141 stmt_vec_info new_stmt_info
= vinfo
->add_stmt (new_stmt
);
142 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
144 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
148 /* The caller wants to perform new operations on vect_external variable
149 VAR, so that the result of the operations would also be vect_external.
150 Return the edge on which the operations can be performed, if one exists.
151 Return null if the operations should instead be treated as part of
152 the pattern that needs them. */
155 vect_get_external_def_edge (vec_info
*vinfo
, tree var
)
158 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
160 e
= loop_preheader_edge (loop_vinfo
->loop
);
161 if (!SSA_NAME_IS_DEFAULT_DEF (var
))
163 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
165 || !dominated_by_p (CDI_DOMINATORS
, e
->dest
, bb
))
172 /* Return true if the target supports a vector version of CODE,
173 where CODE is known to map to a direct optab. ITYPE specifies
174 the type of (some of) the scalar inputs and OTYPE specifies the
175 type of the scalar result.
177 If CODE allows the inputs and outputs to have different type
178 (such as for WIDEN_SUM_EXPR), it is the input mode rather
179 than the output mode that determines the appropriate target pattern.
180 Operand 0 of the target pattern then specifies the mode that the output
183 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
184 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
188 vect_supportable_direct_optab_p (tree otype
, tree_code code
,
189 tree itype
, tree
*vecotype_out
,
190 tree
*vecitype_out
= NULL
)
192 tree vecitype
= get_vectype_for_scalar_type (itype
);
196 tree vecotype
= get_vectype_for_scalar_type (otype
);
200 optab optab
= optab_for_tree_code (code
, vecitype
, optab_default
);
204 insn_code icode
= optab_handler (optab
, TYPE_MODE (vecitype
));
205 if (icode
== CODE_FOR_nothing
206 || insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (vecotype
))
209 *vecotype_out
= vecotype
;
211 *vecitype_out
= vecitype
;
215 /* Round bit precision PRECISION up to a full element. */
218 vect_element_precision (unsigned int precision
)
220 precision
= 1 << ceil_log2 (precision
);
221 return MAX (precision
, BITS_PER_UNIT
);
224 /* If OP is defined by a statement that's being considered for vectorization,
225 return information about that statement, otherwise return NULL. */
228 vect_get_internal_def (vec_info
*vinfo
, tree op
)
230 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (op
);
232 && STMT_VINFO_DEF_TYPE (def_stmt_info
) == vect_internal_def
)
233 return def_stmt_info
;
237 /* Check whether NAME, an ssa-name used in STMT_VINFO,
238 is a result of a type promotion, such that:
239 DEF_STMT: NAME = NOP (name0)
240 If CHECK_SIGN is TRUE, check that either both types are signed or both are
244 type_conversion_p (tree name
, stmt_vec_info stmt_vinfo
, bool check_sign
,
245 tree
*orig_type
, gimple
**def_stmt
, bool *promotion
)
247 tree type
= TREE_TYPE (name
);
249 enum vect_def_type dt
;
251 stmt_vec_info def_stmt_info
;
252 if (!vect_is_simple_use (name
, stmt_vinfo
->vinfo
, &dt
, &def_stmt_info
,
256 if (dt
!= vect_internal_def
257 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
263 if (!is_gimple_assign (*def_stmt
))
266 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt
)))
269 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
271 *orig_type
= TREE_TYPE (oprnd0
);
272 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*orig_type
)
273 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*orig_type
)) && check_sign
))
276 if (TYPE_PRECISION (type
) >= (TYPE_PRECISION (*orig_type
) * 2))
281 if (!vect_is_simple_use (oprnd0
, stmt_vinfo
->vinfo
, &dt
))
287 /* Holds information about an input operand after some sign changes
288 and type promotions have been peeled away. */
289 struct vect_unpromoted_value
{
290 vect_unpromoted_value ();
292 void set_op (tree
, vect_def_type
, stmt_vec_info
= NULL
);
294 /* The value obtained after peeling away zero or more casts. */
297 /* The type of OP. */
300 /* The definition type of OP. */
303 /* If OP is the result of peeling at least one cast, and if the cast
304 of OP itself is a vectorizable statement, CASTER identifies that
305 statement, otherwise it is null. */
306 stmt_vec_info caster
;
309 inline vect_unpromoted_value::vect_unpromoted_value ()
312 dt (vect_uninitialized_def
),
317 /* Set the operand to OP_IN, its definition type to DT_IN, and the
318 statement that casts it to CASTER_IN. */
321 vect_unpromoted_value::set_op (tree op_in
, vect_def_type dt_in
,
322 stmt_vec_info caster_in
)
325 type
= TREE_TYPE (op
);
330 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
331 to reach some vectorizable inner operand OP', continuing as long as it
332 is possible to convert OP' back to OP using a possible sign change
333 followed by a possible promotion P. Return this OP', or null if OP is
334 not a vectorizable SSA name. If there is a promotion P, describe its
335 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
336 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
337 have more than one user.
339 A successful return means that it is possible to go from OP' to OP
340 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
341 whereas the cast from UNPROM to OP might be a promotion, a sign
346 signed short *ptr = ...;
347 signed short C = *ptr;
348 unsigned short B = (unsigned short) C; // sign change
349 signed int A = (signed int) B; // unsigned promotion
350 ...possible other uses of A...
351 unsigned int OP = (unsigned int) A; // sign change
353 In this case it's possible to go directly from C to OP using:
355 OP = (unsigned int) (unsigned short) C;
356 +------------+ +--------------+
357 promotion sign change
359 so OP' would be C. The input to the promotion is B, so UNPROM
363 vect_look_through_possible_promotion (vec_info
*vinfo
, tree op
,
364 vect_unpromoted_value
*unprom
,
365 bool *single_use_p
= NULL
)
367 tree res
= NULL_TREE
;
368 tree op_type
= TREE_TYPE (op
);
369 unsigned int orig_precision
= TYPE_PRECISION (op_type
);
370 stmt_vec_info caster
= NULL
;
371 while (TREE_CODE (op
) == SSA_NAME
&& INTEGRAL_TYPE_P (op_type
))
373 /* See whether OP is simple enough to vectorize. */
374 stmt_vec_info def_stmt_info
;
377 if (!vect_is_simple_use (op
, vinfo
, &dt
, &def_stmt_info
, &def_stmt
))
380 /* If OP is the input of a demotion, skip over it to see whether
381 OP is itself the result of a promotion. If so, the combined
382 effect of the promotion and the demotion might fit the required
383 pattern, otherwise neither operation fits.
385 This copes with cases such as the result of an arithmetic
386 operation being truncated before being stored, and where that
387 arithmetic operation has been recognized as an over-widened one. */
388 if (TYPE_PRECISION (op_type
) <= orig_precision
)
390 /* Use OP as the UNPROM described above if we haven't yet
391 found a promotion, or if using the new input preserves the
392 sign of the previous promotion. */
394 || TYPE_PRECISION (unprom
->type
) == orig_precision
395 || TYPE_SIGN (unprom
->type
) == TYPE_SIGN (op_type
))
396 unprom
->set_op (op
, dt
, caster
);
397 /* Stop if we've already seen a promotion and if this
398 conversion does more than change the sign. */
399 else if (TYPE_PRECISION (op_type
)
400 != TYPE_PRECISION (unprom
->type
))
403 /* The sequence now extends to OP. */
407 /* See whether OP is defined by a cast. Record it as CASTER if
408 the cast is potentially vectorizable. */
411 caster
= def_stmt_info
;
413 /* Ignore pattern statements, since we don't link uses for them. */
416 && !STMT_VINFO_RELATED_STMT (caster
)
417 && !has_single_use (res
))
418 *single_use_p
= false;
420 gassign
*assign
= dyn_cast
<gassign
*> (def_stmt
);
421 if (!assign
|| !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt
)))
424 /* Continue with the input to the cast. */
425 op
= gimple_assign_rhs1 (def_stmt
);
426 op_type
= TREE_TYPE (op
);
431 /* OP is an integer operand to an operation that returns TYPE, and we
432 want to treat the operation as a widening one. So far we can treat
433 it as widening from *COMMON_TYPE.
435 Return true if OP is suitable for such a widening operation,
436 either widening from *COMMON_TYPE or from some supertype of it.
437 Update *COMMON_TYPE to the supertype in the latter case.
439 SHIFT_P is true if OP is a shift amount. */
442 vect_joust_widened_integer (tree type
, bool shift_p
, tree op
,
445 /* Calculate the minimum precision required by OP, without changing
446 the sign of either operand. */
447 unsigned int precision
;
450 if (!wi::leu_p (wi::to_widest (op
), TYPE_PRECISION (type
) / 2))
452 precision
= TREE_INT_CST_LOW (op
);
456 precision
= wi::min_precision (wi::to_widest (op
),
457 TYPE_SIGN (*common_type
));
458 if (precision
* 2 > TYPE_PRECISION (type
))
462 /* If OP requires a wider type, switch to that type. The checks
463 above ensure that this is still narrower than the result. */
464 precision
= vect_element_precision (precision
);
465 if (TYPE_PRECISION (*common_type
) < precision
)
466 *common_type
= build_nonstandard_integer_type
467 (precision
, TYPE_UNSIGNED (*common_type
));
471 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
472 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
475 vect_joust_widened_type (tree type
, tree new_type
, tree
*common_type
)
477 if (types_compatible_p (*common_type
, new_type
))
480 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
481 if ((TYPE_PRECISION (new_type
) < TYPE_PRECISION (*common_type
))
482 && (TYPE_UNSIGNED (new_type
) || !TYPE_UNSIGNED (*common_type
)))
485 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
486 if (TYPE_PRECISION (*common_type
) < TYPE_PRECISION (new_type
)
487 && (TYPE_UNSIGNED (*common_type
) || !TYPE_UNSIGNED (new_type
)))
489 *common_type
= new_type
;
493 /* We have mismatched signs, with the signed type being
494 no wider than the unsigned type. In this case we need
495 a wider signed type. */
496 unsigned int precision
= MAX (TYPE_PRECISION (*common_type
),
497 TYPE_PRECISION (new_type
));
499 if (precision
* 2 > TYPE_PRECISION (type
))
502 *common_type
= build_nonstandard_integer_type (precision
, false);
506 /* Check whether STMT_INFO can be viewed as a tree of integer operations
507 in which each node either performs CODE or WIDENED_CODE, and where
508 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
509 specifies the maximum number of leaf operands. SHIFT_P says whether
510 CODE and WIDENED_CODE are some sort of shift.
512 If STMT_INFO is such a tree, return the number of leaf operands
513 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
514 to a type that (a) is narrower than the result of STMT_INFO and
515 (b) can hold all leaf operand values.
517 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
521 vect_widened_op_tree (stmt_vec_info stmt_info
, tree_code code
,
522 tree_code widened_code
, bool shift_p
,
523 unsigned int max_nops
,
524 vect_unpromoted_value
*unprom
, tree
*common_type
)
526 /* Check for an integer operation with the right code. */
527 vec_info
*vinfo
= stmt_info
->vinfo
;
528 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
532 tree_code rhs_code
= gimple_assign_rhs_code (assign
);
533 if (rhs_code
!= code
&& rhs_code
!= widened_code
)
536 tree type
= gimple_expr_type (assign
);
537 if (!INTEGRAL_TYPE_P (type
))
540 /* Assume that both operands will be leaf operands. */
543 /* Check the operands. */
544 unsigned int next_op
= 0;
545 for (unsigned int i
= 0; i
< 2; ++i
)
547 vect_unpromoted_value
*this_unprom
= &unprom
[next_op
];
548 unsigned int nops
= 1;
549 tree op
= gimple_op (assign
, i
+ 1);
550 if (i
== 1 && TREE_CODE (op
) == INTEGER_CST
)
552 /* We already have a common type from earlier operands.
553 Update it to account for OP. */
554 this_unprom
->set_op (op
, vect_constant_def
);
555 if (!vect_joust_widened_integer (type
, shift_p
, op
, common_type
))
560 /* Only allow shifts by constants. */
561 if (shift_p
&& i
== 1)
564 if (!vect_look_through_possible_promotion (stmt_info
->vinfo
, op
,
568 if (TYPE_PRECISION (this_unprom
->type
) == TYPE_PRECISION (type
))
570 /* The operand isn't widened. If STMT_INFO has the code
571 for an unwidened operation, recursively check whether
572 this operand is a node of the tree. */
575 || this_unprom
->dt
!= vect_internal_def
)
578 /* Give back the leaf slot allocated above now that we're
579 not treating this as a leaf operand. */
582 /* Recursively process the definition of the operand. */
583 stmt_vec_info def_stmt_info
584 = vinfo
->lookup_def (this_unprom
->op
);
585 nops
= vect_widened_op_tree (def_stmt_info
, code
, widened_code
,
586 shift_p
, max_nops
, this_unprom
,
595 /* Make sure that the operand is narrower than the result. */
596 if (TYPE_PRECISION (this_unprom
->type
) * 2
597 > TYPE_PRECISION (type
))
600 /* Update COMMON_TYPE for the new operand. */
602 *common_type
= this_unprom
->type
;
603 else if (!vect_joust_widened_type (type
, this_unprom
->type
,
613 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
614 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
617 vect_recog_temp_ssa_var (tree type
, gimple
*stmt
)
619 return make_temp_ssa_name (type
, stmt
, "patt");
622 /* STMT2_INFO describes a type conversion that could be split into STMT1
623 followed by a version of STMT2_INFO that takes NEW_RHS as its first
624 input. Try to do this using pattern statements, returning true on
628 vect_split_statement (stmt_vec_info stmt2_info
, tree new_rhs
,
629 gimple
*stmt1
, tree vectype
)
631 if (is_pattern_stmt_p (stmt2_info
))
633 /* STMT2_INFO is part of a pattern. Get the statement to which
634 the pattern is attached. */
635 stmt_vec_info orig_stmt2_info
= STMT_VINFO_RELATED_STMT (stmt2_info
);
636 vect_init_pattern_stmt (stmt1
, orig_stmt2_info
, vectype
);
638 if (dump_enabled_p ())
639 dump_printf_loc (MSG_NOTE
, vect_location
,
640 "Splitting pattern statement: %G", stmt2_info
->stmt
);
642 /* Since STMT2_INFO is a pattern statement, we can change it
643 in-situ without worrying about changing the code for the
645 gimple_assign_set_rhs1 (stmt2_info
->stmt
, new_rhs
);
647 if (dump_enabled_p ())
649 dump_printf_loc (MSG_NOTE
, vect_location
, "into: %G", stmt1
);
650 dump_printf_loc (MSG_NOTE
, vect_location
, "and: %G",
654 gimple_seq
*def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info
);
655 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info
) == stmt2_info
)
656 /* STMT2_INFO is the actual pattern statement. Add STMT1
657 to the end of the definition sequence. */
658 gimple_seq_add_stmt_without_update (def_seq
, stmt1
);
661 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
663 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt2_info
->stmt
, def_seq
);
664 gsi_insert_before_without_update (&gsi
, stmt1
, GSI_SAME_STMT
);
670 /* STMT2_INFO doesn't yet have a pattern. Try to create a
671 two-statement pattern now. */
672 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info
));
673 tree lhs_type
= TREE_TYPE (gimple_get_lhs (stmt2_info
->stmt
));
674 tree lhs_vectype
= get_vectype_for_scalar_type (lhs_type
);
678 if (dump_enabled_p ())
679 dump_printf_loc (MSG_NOTE
, vect_location
,
680 "Splitting statement: %G", stmt2_info
->stmt
);
682 /* Add STMT1 as a singleton pattern definition sequence. */
683 gimple_seq
*def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info
);
684 vect_init_pattern_stmt (stmt1
, stmt2_info
, vectype
);
685 gimple_seq_add_stmt_without_update (def_seq
, stmt1
);
687 /* Build the second of the two pattern statements. */
688 tree new_lhs
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
689 gassign
*new_stmt2
= gimple_build_assign (new_lhs
, NOP_EXPR
, new_rhs
);
690 vect_set_pattern_stmt (new_stmt2
, stmt2_info
, lhs_vectype
);
692 if (dump_enabled_p ())
694 dump_printf_loc (MSG_NOTE
, vect_location
,
695 "into pattern statements: %G", stmt1
);
696 dump_printf_loc (MSG_NOTE
, vect_location
, "and: %G", new_stmt2
);
703 /* Convert UNPROM to TYPE and return the result, adding new statements
704 to STMT_INFO's pattern definition statements if no better way is
705 available. VECTYPE is the vector form of TYPE. */
708 vect_convert_input (stmt_vec_info stmt_info
, tree type
,
709 vect_unpromoted_value
*unprom
, tree vectype
)
711 /* Check for a no-op conversion. */
712 if (types_compatible_p (type
, TREE_TYPE (unprom
->op
)))
715 /* Allow the caller to create constant vect_unpromoted_values. */
716 if (TREE_CODE (unprom
->op
) == INTEGER_CST
)
717 return wide_int_to_tree (type
, wi::to_widest (unprom
->op
));
719 /* See if we can reuse an existing result. */
722 tree lhs
= gimple_get_lhs (unprom
->caster
->stmt
);
723 if (types_compatible_p (TREE_TYPE (lhs
), type
))
727 /* We need a new conversion statement. */
728 tree new_op
= vect_recog_temp_ssa_var (type
, NULL
);
729 gassign
*new_stmt
= gimple_build_assign (new_op
, NOP_EXPR
, unprom
->op
);
731 /* If the operation is the input to a vectorizable cast, try splitting
732 that cast into two, taking the required result as a mid-way point. */
735 tree lhs
= gimple_get_lhs (unprom
->caster
->stmt
);
736 if (TYPE_PRECISION (TREE_TYPE (lhs
)) > TYPE_PRECISION (type
)
737 && TYPE_PRECISION (type
) > TYPE_PRECISION (unprom
->type
)
738 && (TYPE_UNSIGNED (unprom
->type
) || !TYPE_UNSIGNED (type
))
739 && vect_split_statement (unprom
->caster
, new_op
, new_stmt
, vectype
))
743 /* If OP is an external value, see if we can insert the new statement
744 on an incoming edge. */
745 if (unprom
->dt
== vect_external_def
)
746 if (edge e
= vect_get_external_def_edge (stmt_info
->vinfo
, unprom
->op
))
748 basic_block new_bb
= gsi_insert_on_edge_immediate (e
, new_stmt
);
749 gcc_assert (!new_bb
);
753 /* As a (common) last resort, add the statement to the pattern itself. */
754 append_pattern_def_seq (stmt_info
, new_stmt
, vectype
);
758 /* Invoke vect_convert_input for N elements of UNPROM and store the
759 result in the corresponding elements of RESULT. */
762 vect_convert_inputs (stmt_vec_info stmt_info
, unsigned int n
,
763 tree
*result
, tree type
, vect_unpromoted_value
*unprom
,
766 for (unsigned int i
= 0; i
< n
; ++i
)
769 for (j
= 0; j
< i
; ++j
)
770 if (unprom
[j
].op
== unprom
[i
].op
)
773 result
[i
] = result
[j
];
775 result
[i
] = vect_convert_input (stmt_info
, type
, &unprom
[i
], vectype
);
779 /* The caller has created a (possibly empty) sequence of pattern definition
780 statements followed by a single statement PATTERN_STMT. Cast the result
781 of this final statement to TYPE. If a new statement is needed, add
782 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
783 and return the new statement, otherwise return PATTERN_STMT as-is.
784 VECITYPE is the vector form of PATTERN_STMT's result type. */
787 vect_convert_output (stmt_vec_info stmt_info
, tree type
, gimple
*pattern_stmt
,
790 tree lhs
= gimple_get_lhs (pattern_stmt
);
791 if (!types_compatible_p (type
, TREE_TYPE (lhs
)))
793 append_pattern_def_seq (stmt_info
, pattern_stmt
, vecitype
);
794 tree cast_var
= vect_recog_temp_ssa_var (type
, NULL
);
795 pattern_stmt
= gimple_build_assign (cast_var
, NOP_EXPR
, lhs
);
800 /* Return true if STMT_VINFO describes a reduction for which reassociation
801 is allowed. If STMT_INFO is part of a group, assume that it's part of
802 a reduction chain and optimistically assume that all statements
803 except the last allow reassociation. */
806 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo
)
808 return (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
809 ? STMT_VINFO_REDUC_TYPE (stmt_vinfo
) != FOLD_LEFT_REDUCTION
810 : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo
) != NULL
);
813 /* As above, but also require it to have code CODE and to be a reduction
814 in the outermost loop. When returning true, store the operands in
815 *OP0_OUT and *OP1_OUT. */
818 vect_reassociating_reduction_p (stmt_vec_info stmt_info
, tree_code code
,
819 tree
*op0_out
, tree
*op1_out
)
821 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
825 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
826 if (!assign
|| gimple_assign_rhs_code (assign
) != code
)
829 /* We don't allow changing the order of the computation in the inner-loop
830 when doing outer-loop vectorization. */
831 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
832 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
))
835 if (!vect_reassociating_reduction_p (stmt_info
))
838 *op0_out
= gimple_assign_rhs1 (assign
);
839 *op1_out
= gimple_assign_rhs2 (assign
);
843 /* Function vect_recog_dot_prod_pattern
845 Try to find the following pattern:
851 sum_0 = phi <init, sum_1>
854 S3 x_T = (TYPE1) x_t;
855 S4 y_T = (TYPE1) y_t;
857 [S6 prod = (TYPE2) prod; #optional]
858 S7 sum_1 = prod + sum_0;
860 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
861 same size of 'TYPE1' or bigger. This is a special case of a reduction
866 * STMT_VINFO: The stmt from which the pattern search begins. In the
867 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
872 * TYPE_OUT: The type of the output of this pattern.
874 * Return value: A new stmt that will be used to replace the sequence of
875 stmts that constitute the pattern. In this case it will be:
876 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
878 Note: The dot-prod idiom is a widening reduction pattern that is
879 vectorized without preserving all the intermediate results. It
880 produces only N/2 (widened) results (by summing up pairs of
881 intermediate results) rather than all N results. Therefore, we
882 cannot allow this pattern when we want to get all the results and in
883 the correct order (as is the case when this computation is in an
884 inner-loop nested in an outer-loop that us being vectorized). */
887 vect_recog_dot_prod_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
890 gimple
*last_stmt
= stmt_vinfo
->stmt
;
891 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
892 tree type
, half_type
;
893 gimple
*pattern_stmt
;
896 /* Look for the following pattern
900 DDPROD = (TYPE2) DPROD;
901 sum_1 = DDPROD + sum_0;
903 - DX is double the size of X
904 - DY is double the size of Y
905 - DX, DY, DPROD all have the same type
906 - sum is the same size of DPROD or bigger
907 - sum has been recognized as a reduction variable.
909 This is equivalent to:
910 DPROD = X w* Y; #widen mult
911 sum_1 = DPROD w+ sum_0; #widen summation
913 DPROD = X w* Y; #widen mult
914 sum_1 = DPROD + sum_0; #summation
917 /* Starting from LAST_STMT, follow the defs of its uses in search
918 of the above pattern. */
920 if (!vect_reassociating_reduction_p (stmt_vinfo
, PLUS_EXPR
,
924 type
= gimple_expr_type (last_stmt
);
926 vect_unpromoted_value unprom_mult
;
927 oprnd0
= vect_look_through_possible_promotion (vinfo
, oprnd0
, &unprom_mult
);
929 /* So far so good. Since last_stmt was detected as a (summation) reduction,
930 we know that oprnd1 is the reduction variable (defined by a loop-header
931 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
932 Left to check that oprnd0 is defined by a (widen_)mult_expr */
936 stmt_vec_info mult_vinfo
= vect_get_internal_def (vinfo
, oprnd0
);
940 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
941 inside the loop (in case we are analyzing an outer-loop). */
942 vect_unpromoted_value unprom0
[2];
943 if (!vect_widened_op_tree (mult_vinfo
, MULT_EXPR
, WIDEN_MULT_EXPR
,
944 false, 2, unprom0
, &half_type
))
947 /* If there are two widening operations, make sure they agree on
948 the sign of the extension. */
949 if (TYPE_PRECISION (unprom_mult
.type
) != TYPE_PRECISION (type
)
950 && TYPE_SIGN (unprom_mult
.type
) != TYPE_SIGN (half_type
))
953 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt
);
956 if (!vect_supportable_direct_optab_p (type
, DOT_PROD_EXPR
, half_type
,
957 type_out
, &half_vectype
))
960 /* Get the inputs in the appropriate types. */
962 vect_convert_inputs (stmt_vinfo
, 2, mult_oprnd
, half_type
,
963 unprom0
, half_vectype
);
965 var
= vect_recog_temp_ssa_var (type
, NULL
);
966 pattern_stmt
= gimple_build_assign (var
, DOT_PROD_EXPR
,
967 mult_oprnd
[0], mult_oprnd
[1], oprnd1
);
973 /* Function vect_recog_sad_pattern
975 Try to find the following Sum of Absolute Difference (SAD) pattern:
978 signed TYPE1 diff, abs_diff;
981 sum_0 = phi <init, sum_1>
984 S3 x_T = (TYPE1) x_t;
985 S4 y_T = (TYPE1) y_t;
987 S6 abs_diff = ABS_EXPR <diff>;
988 [S7 abs_diff = (TYPE2) abs_diff; #optional]
989 S8 sum_1 = abs_diff + sum_0;
991 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
992 same size of 'TYPE1' or bigger. This is a special case of a reduction
997 * STMT_VINFO: The stmt from which the pattern search begins. In the
998 example, when this function is called with S8, the pattern
999 {S3,S4,S5,S6,S7,S8} will be detected.
1003 * TYPE_OUT: The type of the output of this pattern.
1005 * Return value: A new stmt that will be used to replace the sequence of
1006 stmts that constitute the pattern. In this case it will be:
1007 SAD_EXPR <x_t, y_t, sum_0>
1011 vect_recog_sad_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
1013 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1014 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
1017 /* Look for the following pattern
1021 DAD = ABS_EXPR <DDIFF>;
1022 DDPROD = (TYPE2) DPROD;
1023 sum_1 = DAD + sum_0;
1025 - DX is at least double the size of X
1026 - DY is at least double the size of Y
1027 - DX, DY, DDIFF, DAD all have the same type
1028 - sum is the same size of DAD or bigger
1029 - sum has been recognized as a reduction variable.
1031 This is equivalent to:
1032 DDIFF = X w- Y; #widen sub
1033 DAD = ABS_EXPR <DDIFF>;
1034 sum_1 = DAD w+ sum_0; #widen summation
1036 DDIFF = X w- Y; #widen sub
1037 DAD = ABS_EXPR <DDIFF>;
1038 sum_1 = DAD + sum_0; #summation
1041 /* Starting from LAST_STMT, follow the defs of its uses in search
1042 of the above pattern. */
1044 tree plus_oprnd0
, plus_oprnd1
;
1045 if (!vect_reassociating_reduction_p (stmt_vinfo
, PLUS_EXPR
,
1046 &plus_oprnd0
, &plus_oprnd1
))
1049 tree sum_type
= gimple_expr_type (last_stmt
);
1051 /* Any non-truncating sequence of conversions is OK here, since
1052 with a successful match, the result of the ABS(U) is known to fit
1053 within the nonnegative range of the result type. (It cannot be the
1054 negative of the minimum signed value due to the range of the widening
1056 vect_unpromoted_value unprom_abs
;
1057 plus_oprnd0
= vect_look_through_possible_promotion (vinfo
, plus_oprnd0
,
1060 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1061 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1062 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1063 Then check that plus_oprnd0 is defined by an abs_expr. */
1068 stmt_vec_info abs_stmt_vinfo
= vect_get_internal_def (vinfo
, plus_oprnd0
);
1069 if (!abs_stmt_vinfo
)
1072 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1073 inside the loop (in case we are analyzing an outer-loop). */
1074 gassign
*abs_stmt
= dyn_cast
<gassign
*> (abs_stmt_vinfo
->stmt
);
1076 || (gimple_assign_rhs_code (abs_stmt
) != ABS_EXPR
1077 && gimple_assign_rhs_code (abs_stmt
) != ABSU_EXPR
))
1080 tree abs_oprnd
= gimple_assign_rhs1 (abs_stmt
);
1081 tree abs_type
= TREE_TYPE (abs_oprnd
);
1082 if (TYPE_UNSIGNED (abs_type
))
1085 /* Peel off conversions from the ABS input. This can involve sign
1086 changes (e.g. from an unsigned subtraction to a signed ABS input)
1087 or signed promotion, but it can't include unsigned promotion.
1088 (Note that ABS of an unsigned promotion should have been folded
1089 away before now anyway.) */
1090 vect_unpromoted_value unprom_diff
;
1091 abs_oprnd
= vect_look_through_possible_promotion (vinfo
, abs_oprnd
,
1095 if (TYPE_PRECISION (unprom_diff
.type
) != TYPE_PRECISION (abs_type
)
1096 && TYPE_UNSIGNED (unprom_diff
.type
))
1099 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1100 stmt_vec_info diff_stmt_vinfo
= vect_get_internal_def (vinfo
, abs_oprnd
);
1101 if (!diff_stmt_vinfo
)
1104 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1105 inside the loop (in case we are analyzing an outer-loop). */
1106 vect_unpromoted_value unprom
[2];
1107 if (!vect_widened_op_tree (diff_stmt_vinfo
, MINUS_EXPR
, MINUS_EXPR
,
1108 false, 2, unprom
, &half_type
))
1111 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt
);
1114 if (!vect_supportable_direct_optab_p (sum_type
, SAD_EXPR
, half_type
,
1115 type_out
, &half_vectype
))
1118 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1120 vect_convert_inputs (stmt_vinfo
, 2, sad_oprnd
, half_type
,
1121 unprom
, half_vectype
);
1123 tree var
= vect_recog_temp_ssa_var (sum_type
, NULL
);
1124 gimple
*pattern_stmt
= gimple_build_assign (var
, SAD_EXPR
, sad_oprnd
[0],
1125 sad_oprnd
[1], plus_oprnd1
);
1127 return pattern_stmt
;
1130 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1131 so that it can be treated as though it had the form:
1135 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1136 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1137 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1138 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1139 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1141 Try to replace the pattern with:
1145 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1146 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1147 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1148 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1150 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1152 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1153 name of the pattern being matched, for dump purposes. */
1156 vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info
, tree
*type_out
,
1157 tree_code orig_code
, tree_code wide_code
,
1158 bool shift_p
, const char *name
)
1160 gimple
*last_stmt
= last_stmt_info
->stmt
;
1162 vect_unpromoted_value unprom
[2];
1164 if (!vect_widened_op_tree (last_stmt_info
, orig_code
, orig_code
,
1165 shift_p
, 2, unprom
, &half_type
))
1168 /* Pattern detected. */
1169 vect_pattern_detected (name
, last_stmt
);
1171 tree type
= gimple_expr_type (last_stmt
);
1173 if (TYPE_PRECISION (type
) != TYPE_PRECISION (half_type
) * 2
1174 || TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type
))
1175 itype
= build_nonstandard_integer_type (TYPE_PRECISION (half_type
) * 2,
1176 TYPE_UNSIGNED (half_type
));
1178 /* Check target support */
1179 tree vectype
= get_vectype_for_scalar_type (half_type
);
1180 tree vecitype
= get_vectype_for_scalar_type (itype
);
1181 enum tree_code dummy_code
;
1183 auto_vec
<tree
> dummy_vec
;
1186 || !supportable_widening_operation (wide_code
, last_stmt_info
,
1188 &dummy_code
, &dummy_code
,
1189 &dummy_int
, &dummy_vec
))
1192 *type_out
= get_vectype_for_scalar_type (type
);
1197 vect_convert_inputs (last_stmt_info
, 2, oprnd
, half_type
, unprom
, vectype
);
1199 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
1200 gimple
*pattern_stmt
= gimple_build_assign (var
, wide_code
,
1201 oprnd
[0], oprnd
[1]);
1203 return vect_convert_output (last_stmt_info
, type
, pattern_stmt
, vecitype
);
1206 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1207 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1210 vect_recog_widen_mult_pattern (stmt_vec_info last_stmt_info
, tree
*type_out
)
1212 return vect_recog_widen_op_pattern (last_stmt_info
, type_out
, MULT_EXPR
,
1213 WIDEN_MULT_EXPR
, false,
1214 "vect_recog_widen_mult_pattern");
1217 /* Function vect_recog_pow_pattern
1219 Try to find the following pattern:
1223 with POW being one of pow, powf, powi, powif and N being
1228 * STMT_VINFO: The stmt from which the pattern search begins.
1232 * TYPE_OUT: The type of the output of this pattern.
1234 * Return value: A new stmt that will be used to replace the sequence of
1235 stmts that constitute the pattern. In this case it will be:
1242 vect_recog_pow_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
1244 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1249 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
1252 switch (gimple_call_combined_fn (last_stmt
))
1262 base
= gimple_call_arg (last_stmt
, 0);
1263 exp
= gimple_call_arg (last_stmt
, 1);
1264 if (TREE_CODE (exp
) != REAL_CST
1265 && TREE_CODE (exp
) != INTEGER_CST
)
1267 if (flag_unsafe_math_optimizations
1268 && TREE_CODE (base
) == REAL_CST
1269 && !gimple_call_internal_p (last_stmt
))
1271 combined_fn log_cfn
;
1272 built_in_function exp_bfn
;
1273 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt
)))
1276 log_cfn
= CFN_BUILT_IN_LOG
;
1277 exp_bfn
= BUILT_IN_EXP
;
1280 log_cfn
= CFN_BUILT_IN_LOGF
;
1281 exp_bfn
= BUILT_IN_EXPF
;
1284 log_cfn
= CFN_BUILT_IN_LOGL
;
1285 exp_bfn
= BUILT_IN_EXPL
;
1290 tree logc
= fold_const_call (log_cfn
, TREE_TYPE (base
), base
);
1291 tree exp_decl
= builtin_decl_implicit (exp_bfn
);
1292 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1293 does that, but if C is a power of 2, we want to use
1294 exp2 (log2 (C) * x) in the non-vectorized version, but for
1295 vectorization we don't have vectorized exp2. */
1297 && TREE_CODE (logc
) == REAL_CST
1299 && lookup_attribute ("omp declare simd",
1300 DECL_ATTRIBUTES (exp_decl
)))
1302 cgraph_node
*node
= cgraph_node::get_create (exp_decl
);
1303 if (node
->simd_clones
== NULL
)
1305 if (targetm
.simd_clone
.compute_vecsize_and_simdlen
== NULL
1306 || node
->definition
)
1308 expand_simd_clones (node
);
1309 if (node
->simd_clones
== NULL
)
1312 *type_out
= get_vectype_for_scalar_type (TREE_TYPE (base
));
1315 tree def
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1316 gimple
*g
= gimple_build_assign (def
, MULT_EXPR
, exp
, logc
);
1317 append_pattern_def_seq (stmt_vinfo
, g
);
1318 tree res
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1319 g
= gimple_build_call (exp_decl
, 1, def
);
1320 gimple_call_set_lhs (g
, res
);
1328 /* We now have a pow or powi builtin function call with a constant
1331 /* Catch squaring. */
1332 if ((tree_fits_shwi_p (exp
)
1333 && tree_to_shwi (exp
) == 2)
1334 || (TREE_CODE (exp
) == REAL_CST
1335 && real_equal (&TREE_REAL_CST (exp
), &dconst2
)))
1337 if (!vect_supportable_direct_optab_p (TREE_TYPE (base
), MULT_EXPR
,
1338 TREE_TYPE (base
), type_out
))
1341 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1342 stmt
= gimple_build_assign (var
, MULT_EXPR
, base
, base
);
1346 /* Catch square root. */
1347 if (TREE_CODE (exp
) == REAL_CST
1348 && real_equal (&TREE_REAL_CST (exp
), &dconsthalf
))
1350 *type_out
= get_vectype_for_scalar_type (TREE_TYPE (base
));
1352 && direct_internal_fn_supported_p (IFN_SQRT
, *type_out
,
1353 OPTIMIZE_FOR_SPEED
))
1355 gcall
*stmt
= gimple_build_call_internal (IFN_SQRT
, 1, base
);
1356 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
1357 gimple_call_set_lhs (stmt
, var
);
1358 gimple_call_set_nothrow (stmt
, true);
1367 /* Function vect_recog_widen_sum_pattern
1369 Try to find the following pattern:
1372 TYPE x_T, sum = init;
1374 sum_0 = phi <init, sum_1>
1376 S2 x_T = (TYPE) x_t;
1377 S3 sum_1 = x_T + sum_0;
1379 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1380 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1381 a special case of a reduction computation.
1385 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1386 when this function is called with S3, the pattern {S2,S3} will be detected.
1390 * TYPE_OUT: The type of the output of this pattern.
1392 * Return value: A new stmt that will be used to replace the sequence of
1393 stmts that constitute the pattern. In this case it will be:
1394 WIDEN_SUM <x_t, sum_0>
1396 Note: The widening-sum idiom is a widening reduction pattern that is
1397 vectorized without preserving all the intermediate results. It
1398 produces only N/2 (widened) results (by summing up pairs of
1399 intermediate results) rather than all N results. Therefore, we
1400 cannot allow this pattern when we want to get all the results and in
1401 the correct order (as is the case when this computation is in an
1402 inner-loop nested in an outer-loop that us being vectorized). */
1405 vect_recog_widen_sum_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
1407 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1408 tree oprnd0
, oprnd1
;
1409 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
1411 gimple
*pattern_stmt
;
1414 /* Look for the following pattern
1417 In which DX is at least double the size of X, and sum_1 has been
1418 recognized as a reduction variable.
1421 /* Starting from LAST_STMT, follow the defs of its uses in search
1422 of the above pattern. */
1424 if (!vect_reassociating_reduction_p (stmt_vinfo
, PLUS_EXPR
,
1428 type
= gimple_expr_type (last_stmt
);
1430 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1431 we know that oprnd1 is the reduction variable (defined by a loop-header
1432 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1433 Left to check that oprnd0 is defined by a cast from type 'type' to type
1436 vect_unpromoted_value unprom0
;
1437 if (!vect_look_through_possible_promotion (vinfo
, oprnd0
, &unprom0
)
1438 || TYPE_PRECISION (unprom0
.type
) * 2 > TYPE_PRECISION (type
))
1441 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt
);
1443 if (!vect_supportable_direct_optab_p (type
, WIDEN_SUM_EXPR
, unprom0
.type
,
1447 var
= vect_recog_temp_ssa_var (type
, NULL
);
1448 pattern_stmt
= gimple_build_assign (var
, WIDEN_SUM_EXPR
, unprom0
.op
, oprnd1
);
1450 return pattern_stmt
;
1453 /* Recognize cases in which an operation is performed in one type WTYPE
1454 but could be done more efficiently in a narrower type NTYPE. For example,
1457 ATYPE a; // narrower than NTYPE
1458 BTYPE b; // narrower than NTYPE
1459 WTYPE aw = (WTYPE) a;
1460 WTYPE bw = (WTYPE) b;
1461 WTYPE res = aw + bw; // only uses of aw and bw
1463 then it would be more efficient to do:
1465 NTYPE an = (NTYPE) a;
1466 NTYPE bn = (NTYPE) b;
1467 NTYPE resn = an + bn;
1468 WTYPE res = (WTYPE) resn;
1470 Other situations include things like:
1472 ATYPE a; // NTYPE or narrower
1473 WTYPE aw = (WTYPE) a;
1476 when only "(NTYPE) res" is significant. In that case it's more efficient
1477 to truncate "b" and do the operation on NTYPE instead:
1479 NTYPE an = (NTYPE) a;
1480 NTYPE bn = (NTYPE) b; // truncation
1481 NTYPE resn = an + bn;
1482 WTYPE res = (WTYPE) resn;
1484 All users of "res" should then use "resn" instead, making the final
1485 statement dead (not marked as relevant). The final statement is still
1486 needed to maintain the type correctness of the IR.
1488 vect_determine_precisions has already determined the minimum
1489 precison of the operation and the minimum precision required
1490 by users of the result. */
1493 vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info
, tree
*type_out
)
1495 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
1499 /* See whether we have found that this operation can be done on a
1500 narrower type without changing its semantics. */
1501 unsigned int new_precision
= last_stmt_info
->operation_precision
;
1505 vec_info
*vinfo
= last_stmt_info
->vinfo
;
1506 tree lhs
= gimple_assign_lhs (last_stmt
);
1507 tree type
= TREE_TYPE (lhs
);
1508 tree_code code
= gimple_assign_rhs_code (last_stmt
);
1510 /* Keep the first operand of a COND_EXPR as-is: only the other two
1511 operands are interesting. */
1512 unsigned int first_op
= (code
== COND_EXPR
? 2 : 1);
1514 /* Check the operands. */
1515 unsigned int nops
= gimple_num_ops (last_stmt
) - first_op
;
1516 auto_vec
<vect_unpromoted_value
, 3> unprom (nops
);
1517 unprom
.quick_grow (nops
);
1518 unsigned int min_precision
= 0;
1519 bool single_use_p
= false;
1520 for (unsigned int i
= 0; i
< nops
; ++i
)
1522 tree op
= gimple_op (last_stmt
, first_op
+ i
);
1523 if (TREE_CODE (op
) == INTEGER_CST
)
1524 unprom
[i
].set_op (op
, vect_constant_def
);
1525 else if (TREE_CODE (op
) == SSA_NAME
)
1527 bool op_single_use_p
= true;
1528 if (!vect_look_through_possible_promotion (vinfo
, op
, &unprom
[i
],
1533 (1) N bits of the result are needed;
1534 (2) all inputs are widened from M<N bits; and
1535 (3) one operand OP is a single-use SSA name
1537 we can shift the M->N widening from OP to the output
1538 without changing the number or type of extensions involved.
1539 This then reduces the number of copies of STMT_INFO.
1541 If instead of (3) more than one operand is a single-use SSA name,
1542 shifting the extension to the output is even more of a win.
1546 (1) N bits of the result are needed;
1547 (2) one operand OP2 is widened from M2<N bits;
1548 (3) another operand OP1 is widened from M1<M2 bits; and
1549 (4) both OP1 and OP2 are single-use
1551 the choice is between:
1553 (a) truncating OP2 to M1, doing the operation on M1,
1554 and then widening the result to N
1556 (b) widening OP1 to M2, doing the operation on M2, and then
1557 widening the result to N
1559 Both shift the M2->N widening of the inputs to the output.
1560 (a) additionally shifts the M1->M2 widening to the output;
1561 it requires fewer copies of STMT_INFO but requires an extra
1564 Which is better will depend on the complexity and cost of
1565 STMT_INFO, which is hard to predict at this stage. However,
1566 a clear tie-breaker in favor of (b) is the fact that the
1567 truncation in (a) increases the length of the operation chain.
1569 If instead of (4) only one of OP1 or OP2 is single-use,
1570 (b) is still a win over doing the operation in N bits:
1571 it still shifts the M2->N widening on the single-use operand
1572 to the output and reduces the number of STMT_INFO copies.
1574 If neither operand is single-use then operating on fewer than
1575 N bits might lead to more extensions overall. Whether it does
1576 or not depends on global information about the vectorization
1577 region, and whether that's a good trade-off would again
1578 depend on the complexity and cost of the statements involved,
1579 as well as things like register pressure that are not normally
1580 modelled at this stage. We therefore ignore these cases
1581 and just optimize the clear single-use wins above.
1583 Thus we take the maximum precision of the unpromoted operands
1584 and record whether any operand is single-use. */
1585 if (unprom
[i
].dt
== vect_internal_def
)
1587 min_precision
= MAX (min_precision
,
1588 TYPE_PRECISION (unprom
[i
].type
));
1589 single_use_p
|= op_single_use_p
;
1594 /* Although the operation could be done in operation_precision, we have
1595 to balance that against introducing extra truncations or extensions.
1596 Calculate the minimum precision that can be handled efficiently.
1598 The loop above determined that the operation could be handled
1599 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1600 extension from the inputs to the output without introducing more
1601 instructions, and would reduce the number of instructions required
1602 for STMT_INFO itself.
1604 vect_determine_precisions has also determined that the result only
1605 needs min_output_precision bits. Truncating by a factor of N times
1606 requires a tree of N - 1 instructions, so if TYPE is N times wider
1607 than min_output_precision, doing the operation in TYPE and truncating
1608 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1611 - truncating the input to a unary operation and doing the operation
1612 in the new type requires at most N - 1 + 1 = N instructions per
1615 - doing the same for a binary operation requires at most
1616 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1618 Both unary and binary operations require fewer instructions than
1619 this if the operands were extended from a suitable truncated form.
1620 Thus there is usually nothing to lose by doing operations in
1621 min_output_precision bits, but there can be something to gain. */
1623 min_precision
= last_stmt_info
->min_output_precision
;
1625 min_precision
= MIN (min_precision
, last_stmt_info
->min_output_precision
);
1627 /* Apply the minimum efficient precision we just calculated. */
1628 if (new_precision
< min_precision
)
1629 new_precision
= min_precision
;
1630 if (new_precision
>= TYPE_PRECISION (type
))
1633 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt
);
1635 *type_out
= get_vectype_for_scalar_type (type
);
1639 /* We've found a viable pattern. Get the new type of the operation. */
1640 bool unsigned_p
= (last_stmt_info
->operation_sign
== UNSIGNED
);
1641 tree new_type
= build_nonstandard_integer_type (new_precision
, unsigned_p
);
1643 /* We specifically don't check here whether the target supports the
1644 new operation, since it might be something that a later pattern
1645 wants to rewrite anyway. If targets have a minimum element size
1646 for some optabs, we should pattern-match smaller ops to larger ops
1647 where beneficial. */
1648 tree new_vectype
= get_vectype_for_scalar_type (new_type
);
1652 if (dump_enabled_p ())
1653 dump_printf_loc (MSG_NOTE
, vect_location
, "demoting %T to %T\n",
1656 /* Calculate the rhs operands for an operation on NEW_TYPE. */
1658 for (unsigned int i
= 1; i
< first_op
; ++i
)
1659 ops
[i
- 1] = gimple_op (last_stmt
, i
);
1660 vect_convert_inputs (last_stmt_info
, nops
, &ops
[first_op
- 1],
1661 new_type
, &unprom
[0], new_vectype
);
1663 /* Use the operation to produce a result of type NEW_TYPE. */
1664 tree new_var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1665 gimple
*pattern_stmt
= gimple_build_assign (new_var
, code
,
1666 ops
[0], ops
[1], ops
[2]);
1667 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
1669 if (dump_enabled_p ())
1670 dump_printf_loc (MSG_NOTE
, vect_location
,
1671 "created pattern stmt: %G", pattern_stmt
);
1673 pattern_stmt
= vect_convert_output (last_stmt_info
, type
,
1674 pattern_stmt
, new_vectype
);
1676 return pattern_stmt
;
1679 /* Recognize the patterns:
1681 ATYPE a; // narrower than TYPE
1682 BTYPE b; // narrower than TYPE
1683 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1684 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1686 where only the bottom half of avg is used. Try to transform them into:
1688 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1689 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1693 TYPE avg = (TYPE) avg';
1695 where NTYPE is no wider than half of TYPE. Since only the bottom half
1696 of avg is used, all or part of the cast of avg' should become redundant. */
1699 vect_recog_average_pattern (stmt_vec_info last_stmt_info
, tree
*type_out
)
1701 /* Check for a shift right by one bit. */
1702 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
1703 vec_info
*vinfo
= last_stmt_info
->vinfo
;
1705 || gimple_assign_rhs_code (last_stmt
) != RSHIFT_EXPR
1706 || !integer_onep (gimple_assign_rhs2 (last_stmt
)))
1709 /* Check that the shift result is wider than the users of the
1710 result need (i.e. that narrowing would be a natural choice). */
1711 tree lhs
= gimple_assign_lhs (last_stmt
);
1712 tree type
= TREE_TYPE (lhs
);
1713 unsigned int target_precision
1714 = vect_element_precision (last_stmt_info
->min_output_precision
);
1715 if (!INTEGRAL_TYPE_P (type
) || target_precision
>= TYPE_PRECISION (type
))
1718 /* Get the definition of the shift input. */
1719 tree rshift_rhs
= gimple_assign_rhs1 (last_stmt
);
1720 stmt_vec_info plus_stmt_info
= vect_get_internal_def (vinfo
, rshift_rhs
);
1721 if (!plus_stmt_info
)
1724 /* Check whether the shift input can be seen as a tree of additions on
1725 2 or 3 widened inputs.
1727 Note that the pattern should be a win even if the result of one or
1728 more additions is reused elsewhere: if the pattern matches, we'd be
1729 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
1730 internal_fn ifn
= IFN_AVG_FLOOR
;
1731 vect_unpromoted_value unprom
[3];
1733 unsigned int nops
= vect_widened_op_tree (plus_stmt_info
, PLUS_EXPR
,
1734 PLUS_EXPR
, false, 3,
1740 /* Check that one operand is 1. */
1742 for (i
= 0; i
< 3; ++i
)
1743 if (integer_onep (unprom
[i
].op
))
1747 /* Throw away the 1 operand and keep the other two. */
1749 unprom
[i
] = unprom
[2];
1753 vect_pattern_detected ("vect_recog_average_pattern", last_stmt
);
1757 (a) the operation can be viewed as:
1759 TYPE widened0 = (TYPE) UNPROM[0];
1760 TYPE widened1 = (TYPE) UNPROM[1];
1761 TYPE tmp1 = widened0 + widened1 {+ 1};
1762 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
1764 (b) the first two statements are equivalent to:
1766 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
1767 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
1769 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
1772 (d) all the operations can be performed correctly at twice the width of
1773 NEW_TYPE, due to the nature of the average operation; and
1775 (e) users of the result of the right shift need only TARGET_PRECISION
1776 bits, where TARGET_PRECISION is no more than half of TYPE's
1779 Under these circumstances, the only situation in which NEW_TYPE
1780 could be narrower than TARGET_PRECISION is if widened0, widened1
1781 and an addition result are all used more than once. Thus we can
1782 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
1783 as "free", whereas widening the result of the average instruction
1784 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
1785 therefore better not to go narrower than TARGET_PRECISION. */
1786 if (TYPE_PRECISION (new_type
) < target_precision
)
1787 new_type
= build_nonstandard_integer_type (target_precision
,
1788 TYPE_UNSIGNED (new_type
));
1790 /* Check for target support. */
1791 tree new_vectype
= get_vectype_for_scalar_type (new_type
);
1793 || !direct_internal_fn_supported_p (ifn
, new_vectype
,
1794 OPTIMIZE_FOR_SPEED
))
1797 /* The IR requires a valid vector type for the cast result, even though
1798 it's likely to be discarded. */
1799 *type_out
= get_vectype_for_scalar_type (type
);
1803 /* Generate the IFN_AVG* call. */
1804 tree new_var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1806 vect_convert_inputs (last_stmt_info
, 2, new_ops
, new_type
,
1807 unprom
, new_vectype
);
1808 gcall
*average_stmt
= gimple_build_call_internal (ifn
, 2, new_ops
[0],
1810 gimple_call_set_lhs (average_stmt
, new_var
);
1811 gimple_set_location (average_stmt
, gimple_location (last_stmt
));
1813 if (dump_enabled_p ())
1814 dump_printf_loc (MSG_NOTE
, vect_location
,
1815 "created pattern stmt: %G", average_stmt
);
1817 return vect_convert_output (last_stmt_info
, type
, average_stmt
, new_vectype
);
1820 /* Recognize cases in which the input to a cast is wider than its
1821 output, and the input is fed by a widening operation. Fold this
1822 by removing the unnecessary intermediate widening. E.g.:
1825 unsigned int b = (unsigned int) a;
1826 unsigned short c = (unsigned short) b;
1830 unsigned short c = (unsigned short) a;
1832 Although this is rare in input IR, it is an expected side-effect
1833 of the over-widening pattern above.
1835 This is beneficial also for integer-to-float conversions, if the
1836 widened integer has more bits than the float, and if the unwidened
1840 vect_recog_cast_forwprop_pattern (stmt_vec_info last_stmt_info
, tree
*type_out
)
1842 /* Check for a cast, including an integer-to-float conversion. */
1843 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
1846 tree_code code
= gimple_assign_rhs_code (last_stmt
);
1847 if (!CONVERT_EXPR_CODE_P (code
) && code
!= FLOAT_EXPR
)
1850 /* Make sure that the rhs is a scalar with a natural bitsize. */
1851 tree lhs
= gimple_assign_lhs (last_stmt
);
1854 tree lhs_type
= TREE_TYPE (lhs
);
1855 scalar_mode lhs_mode
;
1856 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type
)
1857 || !is_a
<scalar_mode
> (TYPE_MODE (lhs_type
), &lhs_mode
))
1860 /* Check for a narrowing operation (from a vector point of view). */
1861 tree rhs
= gimple_assign_rhs1 (last_stmt
);
1862 tree rhs_type
= TREE_TYPE (rhs
);
1863 if (!INTEGRAL_TYPE_P (rhs_type
)
1864 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type
)
1865 || TYPE_PRECISION (rhs_type
) <= GET_MODE_BITSIZE (lhs_mode
))
1868 /* Try to find an unpromoted input. */
1869 vec_info
*vinfo
= last_stmt_info
->vinfo
;
1870 vect_unpromoted_value unprom
;
1871 if (!vect_look_through_possible_promotion (vinfo
, rhs
, &unprom
)
1872 || TYPE_PRECISION (unprom
.type
) >= TYPE_PRECISION (rhs_type
))
1875 /* If the bits above RHS_TYPE matter, make sure that they're the
1876 same when extending from UNPROM as they are when extending from RHS. */
1877 if (!INTEGRAL_TYPE_P (lhs_type
)
1878 && TYPE_SIGN (rhs_type
) != TYPE_SIGN (unprom
.type
))
1881 /* We can get the same result by casting UNPROM directly, to avoid
1882 the unnecessary widening and narrowing. */
1883 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt
);
1885 *type_out
= get_vectype_for_scalar_type (lhs_type
);
1889 tree new_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
1890 gimple
*pattern_stmt
= gimple_build_assign (new_var
, code
, unprom
.op
);
1891 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
1893 return pattern_stmt
;
1896 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
1897 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
1900 vect_recog_widen_shift_pattern (stmt_vec_info last_stmt_info
, tree
*type_out
)
1902 return vect_recog_widen_op_pattern (last_stmt_info
, type_out
, LSHIFT_EXPR
,
1903 WIDEN_LSHIFT_EXPR
, true,
1904 "vect_recog_widen_shift_pattern");
1907 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1911 S0 a_t = b_t r<< c_t;
1915 * STMT_VINFO: The stmt from which the pattern search begins,
1916 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1920 S2 e_t = d_t & (B - 1);
1921 S3 f_t = b_t << c_t;
1922 S4 g_t = b_t >> e_t;
1925 where B is element bitsize of type.
1929 * TYPE_OUT: The type of the output of this pattern.
1931 * Return value: A new stmt that will be used to replace the rotate
1935 vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
1937 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1938 tree oprnd0
, oprnd1
, lhs
, var
, var1
, var2
, vectype
, type
, stype
, def
, def2
;
1939 gimple
*pattern_stmt
, *def_stmt
;
1940 enum tree_code rhs_code
;
1941 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
1942 enum vect_def_type dt
;
1943 optab optab1
, optab2
;
1944 edge ext_def
= NULL
;
1946 if (!is_gimple_assign (last_stmt
))
1949 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1959 lhs
= gimple_assign_lhs (last_stmt
);
1960 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1961 type
= TREE_TYPE (oprnd0
);
1962 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1963 if (TREE_CODE (oprnd0
) != SSA_NAME
1964 || TYPE_PRECISION (TREE_TYPE (lhs
)) != TYPE_PRECISION (type
)
1965 || !INTEGRAL_TYPE_P (type
)
1966 || !TYPE_UNSIGNED (type
))
1969 stmt_vec_info def_stmt_info
;
1970 if (!vect_is_simple_use (oprnd1
, vinfo
, &dt
, &def_stmt_info
, &def_stmt
))
1973 if (dt
!= vect_internal_def
1974 && dt
!= vect_constant_def
1975 && dt
!= vect_external_def
)
1978 vectype
= get_vectype_for_scalar_type (type
);
1979 if (vectype
== NULL_TREE
)
1982 /* If vector/vector or vector/scalar rotate is supported by the target,
1983 don't do anything here. */
1984 optab1
= optab_for_tree_code (rhs_code
, vectype
, optab_vector
);
1986 && optab_handler (optab1
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1989 if (is_a
<bb_vec_info
> (vinfo
) || dt
!= vect_internal_def
)
1991 optab2
= optab_for_tree_code (rhs_code
, vectype
, optab_scalar
);
1993 && optab_handler (optab2
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1997 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1998 don't do anything here either. */
1999 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_vector
);
2000 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_vector
);
2002 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
2004 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
2006 if (! is_a
<bb_vec_info
> (vinfo
) && dt
== vect_internal_def
)
2008 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_scalar
);
2009 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_scalar
);
2011 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
2013 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
2017 *type_out
= vectype
;
2019 if (dt
== vect_external_def
2020 && TREE_CODE (oprnd1
) == SSA_NAME
)
2021 ext_def
= vect_get_external_def_edge (vinfo
, oprnd1
);
2024 scalar_int_mode mode
= SCALAR_INT_TYPE_MODE (type
);
2025 if (TREE_CODE (oprnd1
) == INTEGER_CST
2026 || TYPE_MODE (TREE_TYPE (oprnd1
)) == mode
)
2028 else if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
2030 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
2031 if (TYPE_MODE (TREE_TYPE (rhs1
)) == mode
2032 && TYPE_PRECISION (TREE_TYPE (rhs1
))
2033 == TYPE_PRECISION (type
))
2037 if (def
== NULL_TREE
)
2039 def
= vect_recog_temp_ssa_var (type
, NULL
);
2040 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
2044 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
2045 gcc_assert (!new_bb
);
2048 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2050 stype
= TREE_TYPE (def
);
2051 scalar_int_mode smode
= SCALAR_INT_TYPE_MODE (stype
);
2053 if (TREE_CODE (def
) == INTEGER_CST
)
2055 if (!tree_fits_uhwi_p (def
)
2056 || tree_to_uhwi (def
) >= GET_MODE_PRECISION (mode
)
2057 || integer_zerop (def
))
2059 def2
= build_int_cst (stype
,
2060 GET_MODE_PRECISION (mode
) - tree_to_uhwi (def
));
2064 tree vecstype
= get_vectype_for_scalar_type (stype
);
2066 if (vecstype
== NULL_TREE
)
2068 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
2069 def_stmt
= gimple_build_assign (def2
, NEGATE_EXPR
, def
);
2073 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
2074 gcc_assert (!new_bb
);
2077 append_pattern_def_seq (stmt_vinfo
, def_stmt
, vecstype
);
2079 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
2080 tree mask
= build_int_cst (stype
, GET_MODE_PRECISION (smode
) - 1);
2081 def_stmt
= gimple_build_assign (def2
, BIT_AND_EXPR
,
2082 gimple_assign_lhs (def_stmt
), mask
);
2086 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
2087 gcc_assert (!new_bb
);
2090 append_pattern_def_seq (stmt_vinfo
, def_stmt
, vecstype
);
2093 var1
= vect_recog_temp_ssa_var (type
, NULL
);
2094 def_stmt
= gimple_build_assign (var1
, rhs_code
== LROTATE_EXPR
2095 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
2097 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2099 var2
= vect_recog_temp_ssa_var (type
, NULL
);
2100 def_stmt
= gimple_build_assign (var2
, rhs_code
== LROTATE_EXPR
2101 ? RSHIFT_EXPR
: LSHIFT_EXPR
,
2103 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2105 /* Pattern detected. */
2106 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt
);
2108 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2109 var
= vect_recog_temp_ssa_var (type
, NULL
);
2110 pattern_stmt
= gimple_build_assign (var
, BIT_IOR_EXPR
, var1
, var2
);
2112 return pattern_stmt
;
2115 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2123 S3 res_T = b_T op a_t;
2125 where type 'TYPE' is a type with different size than 'type',
2126 and op is <<, >> or rotate.
2131 TYPE b_T, c_T, res_T;
2134 S1 a_t = (type) c_T;
2136 S3 res_T = b_T op a_t;
2140 * STMT_VINFO: The stmt from which the pattern search begins,
2141 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2142 with a shift/rotate which has same type on both operands, in the
2143 second case just b_T op c_T, in the first case with added cast
2144 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2148 * TYPE_OUT: The type of the output of this pattern.
2150 * Return value: A new stmt that will be used to replace the shift/rotate
2154 vect_recog_vector_vector_shift_pattern (stmt_vec_info stmt_vinfo
,
2157 gimple
*last_stmt
= stmt_vinfo
->stmt
;
2158 tree oprnd0
, oprnd1
, lhs
, var
;
2159 gimple
*pattern_stmt
;
2160 enum tree_code rhs_code
;
2161 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
2163 if (!is_gimple_assign (last_stmt
))
2166 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2178 lhs
= gimple_assign_lhs (last_stmt
);
2179 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2180 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2181 if (TREE_CODE (oprnd0
) != SSA_NAME
2182 || TREE_CODE (oprnd1
) != SSA_NAME
2183 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
2184 || !type_has_mode_precision_p (TREE_TYPE (oprnd1
))
2185 || TYPE_PRECISION (TREE_TYPE (lhs
))
2186 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2189 stmt_vec_info def_vinfo
= vect_get_internal_def (vinfo
, oprnd1
);
2193 *type_out
= get_vectype_for_scalar_type (TREE_TYPE (oprnd0
));
2194 if (*type_out
== NULL_TREE
)
2197 tree def
= NULL_TREE
;
2198 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_vinfo
->stmt
);
2199 if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
2201 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
2202 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
2203 && TYPE_PRECISION (TREE_TYPE (rhs1
))
2204 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2206 if (TYPE_PRECISION (TREE_TYPE (oprnd1
))
2207 >= TYPE_PRECISION (TREE_TYPE (rhs1
)))
2212 = build_low_bits_mask (TREE_TYPE (rhs1
),
2213 TYPE_PRECISION (TREE_TYPE (oprnd1
)));
2214 def
= vect_recog_temp_ssa_var (TREE_TYPE (rhs1
), NULL
);
2215 def_stmt
= gimple_build_assign (def
, BIT_AND_EXPR
, rhs1
, mask
);
2216 tree vecstype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
2217 append_pattern_def_seq (stmt_vinfo
, def_stmt
, vecstype
);
2222 if (def
== NULL_TREE
)
2224 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2225 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
2226 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2229 /* Pattern detected. */
2230 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt
);
2232 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2233 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2234 pattern_stmt
= gimple_build_assign (var
, rhs_code
, oprnd0
, def
);
2236 return pattern_stmt
;
2239 /* Return true iff the target has a vector optab implementing the operation
2240 CODE on type VECTYPE. */
2243 target_has_vecop_for_code (tree_code code
, tree vectype
)
2245 optab voptab
= optab_for_tree_code (code
, vectype
, optab_vector
);
2247 && optab_handler (voptab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
;
2250 /* Verify that the target has optabs of VECTYPE to perform all the steps
2251 needed by the multiplication-by-immediate synthesis algorithm described by
2252 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2253 present. Return true iff the target supports all the steps. */
2256 target_supports_mult_synth_alg (struct algorithm
*alg
, mult_variant var
,
2257 tree vectype
, bool synth_shift_p
)
2259 if (alg
->op
[0] != alg_zero
&& alg
->op
[0] != alg_m
)
2262 bool supports_vminus
= target_has_vecop_for_code (MINUS_EXPR
, vectype
);
2263 bool supports_vplus
= target_has_vecop_for_code (PLUS_EXPR
, vectype
);
2265 if (var
== negate_variant
2266 && !target_has_vecop_for_code (NEGATE_EXPR
, vectype
))
2269 /* If we must synthesize shifts with additions make sure that vector
2270 addition is available. */
2271 if ((var
== add_variant
|| synth_shift_p
) && !supports_vplus
)
2274 for (int i
= 1; i
< alg
->ops
; i
++)
2282 case alg_add_factor
:
2283 if (!supports_vplus
)
2288 case alg_sub_factor
:
2289 if (!supports_vminus
)
2295 case alg_impossible
:
2305 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2306 putting the final result in DEST. Append all statements but the last into
2307 VINFO. Return the last statement. */
2310 synth_lshift_by_additions (tree dest
, tree op
, HOST_WIDE_INT amnt
,
2311 stmt_vec_info vinfo
)
2314 tree itype
= TREE_TYPE (op
);
2316 gcc_assert (amnt
>= 0);
2317 for (i
= 0; i
< amnt
; i
++)
2319 tree tmp_var
= (i
< amnt
- 1) ? vect_recog_temp_ssa_var (itype
, NULL
)
2322 = gimple_build_assign (tmp_var
, PLUS_EXPR
, prev_res
, prev_res
);
2325 append_pattern_def_seq (vinfo
, stmt
);
2333 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2334 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2335 the process if necessary. Append the resulting assignment statements
2336 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2337 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2338 left shifts using additions. */
2341 apply_binop_and_append_stmt (tree_code code
, tree op1
, tree op2
,
2342 stmt_vec_info stmt_vinfo
, bool synth_shift_p
)
2344 if (integer_zerop (op2
)
2345 && (code
== LSHIFT_EXPR
2346 || code
== PLUS_EXPR
))
2348 gcc_assert (TREE_CODE (op1
) == SSA_NAME
);
2353 tree itype
= TREE_TYPE (op1
);
2354 tree tmp_var
= vect_recog_temp_ssa_var (itype
, NULL
);
2356 if (code
== LSHIFT_EXPR
2359 stmt
= synth_lshift_by_additions (tmp_var
, op1
, TREE_INT_CST_LOW (op2
),
2361 append_pattern_def_seq (stmt_vinfo
, stmt
);
2365 stmt
= gimple_build_assign (tmp_var
, code
, op1
, op2
);
2366 append_pattern_def_seq (stmt_vinfo
, stmt
);
2370 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2371 and simple arithmetic operations to be vectorized. Record the statements
2372 produced in STMT_VINFO and return the last statement in the sequence or
2373 NULL if it's not possible to synthesize such a multiplication.
2374 This function mirrors the behavior of expand_mult_const in expmed.c but
2375 works on tree-ssa form. */
2378 vect_synth_mult_by_constant (tree op
, tree val
,
2379 stmt_vec_info stmt_vinfo
)
2381 tree itype
= TREE_TYPE (op
);
2382 machine_mode mode
= TYPE_MODE (itype
);
2383 struct algorithm alg
;
2384 mult_variant variant
;
2385 if (!tree_fits_shwi_p (val
))
2388 /* Multiplication synthesis by shifts, adds and subs can introduce
2389 signed overflow where the original operation didn't. Perform the
2390 operations on an unsigned type and cast back to avoid this.
2391 In the future we may want to relax this for synthesis algorithms
2392 that we can prove do not cause unexpected overflow. */
2393 bool cast_to_unsigned_p
= !TYPE_OVERFLOW_WRAPS (itype
);
2395 tree multtype
= cast_to_unsigned_p
? unsigned_type_for (itype
) : itype
;
2397 /* Targets that don't support vector shifts but support vector additions
2398 can synthesize shifts that way. */
2399 bool synth_shift_p
= !vect_supportable_shift (LSHIFT_EXPR
, multtype
);
2401 HOST_WIDE_INT hwval
= tree_to_shwi (val
);
2402 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2403 The vectorizer's benefit analysis will decide whether it's beneficial
2405 bool possible
= choose_mult_variant (mode
, hwval
, &alg
,
2406 &variant
, MAX_COST
);
2410 tree vectype
= get_vectype_for_scalar_type (multtype
);
2413 || !target_supports_mult_synth_alg (&alg
, variant
,
2414 vectype
, synth_shift_p
))
2419 /* Clear out the sequence of statements so we can populate it below. */
2420 gimple
*stmt
= NULL
;
2422 if (cast_to_unsigned_p
)
2424 tree tmp_op
= vect_recog_temp_ssa_var (multtype
, NULL
);
2425 stmt
= gimple_build_assign (tmp_op
, CONVERT_EXPR
, op
);
2426 append_pattern_def_seq (stmt_vinfo
, stmt
);
2430 if (alg
.op
[0] == alg_zero
)
2431 accumulator
= build_int_cst (multtype
, 0);
2435 bool needs_fixup
= (variant
== negate_variant
)
2436 || (variant
== add_variant
);
2438 for (int i
= 1; i
< alg
.ops
; i
++)
2440 tree shft_log
= build_int_cst (multtype
, alg
.log
[i
]);
2441 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
2442 tree tmp_var
= NULL_TREE
;
2449 = synth_lshift_by_additions (accum_tmp
, accumulator
, alg
.log
[i
],
2452 stmt
= gimple_build_assign (accum_tmp
, LSHIFT_EXPR
, accumulator
,
2457 = apply_binop_and_append_stmt (LSHIFT_EXPR
, op
, shft_log
,
2458 stmt_vinfo
, synth_shift_p
);
2459 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
,
2463 tmp_var
= apply_binop_and_append_stmt (LSHIFT_EXPR
, op
,
2464 shft_log
, stmt_vinfo
,
2466 /* In some algorithms the first step involves zeroing the
2467 accumulator. If subtracting from such an accumulator
2468 just emit the negation directly. */
2469 if (integer_zerop (accumulator
))
2470 stmt
= gimple_build_assign (accum_tmp
, NEGATE_EXPR
, tmp_var
);
2472 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, accumulator
,
2477 = apply_binop_and_append_stmt (LSHIFT_EXPR
, accumulator
, shft_log
,
2478 stmt_vinfo
, synth_shift_p
);
2479 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, tmp_var
, op
);
2483 = apply_binop_and_append_stmt (LSHIFT_EXPR
, accumulator
, shft_log
,
2484 stmt_vinfo
, synth_shift_p
);
2485 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, tmp_var
, op
);
2487 case alg_add_factor
:
2489 = apply_binop_and_append_stmt (LSHIFT_EXPR
, accumulator
, shft_log
,
2490 stmt_vinfo
, synth_shift_p
);
2491 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
,
2494 case alg_sub_factor
:
2496 = apply_binop_and_append_stmt (LSHIFT_EXPR
, accumulator
, shft_log
,
2497 stmt_vinfo
, synth_shift_p
);
2498 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, tmp_var
,
2504 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2505 but rather return it directly. */
2507 if ((i
< alg
.ops
- 1) || needs_fixup
|| cast_to_unsigned_p
)
2508 append_pattern_def_seq (stmt_vinfo
, stmt
);
2509 accumulator
= accum_tmp
;
2511 if (variant
== negate_variant
)
2513 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
2514 stmt
= gimple_build_assign (accum_tmp
, NEGATE_EXPR
, accumulator
);
2515 accumulator
= accum_tmp
;
2516 if (cast_to_unsigned_p
)
2517 append_pattern_def_seq (stmt_vinfo
, stmt
);
2519 else if (variant
== add_variant
)
2521 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
2522 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
, op
);
2523 accumulator
= accum_tmp
;
2524 if (cast_to_unsigned_p
)
2525 append_pattern_def_seq (stmt_vinfo
, stmt
);
2527 /* Move back to a signed if needed. */
2528 if (cast_to_unsigned_p
)
2530 tree accum_tmp
= vect_recog_temp_ssa_var (itype
, NULL
);
2531 stmt
= gimple_build_assign (accum_tmp
, CONVERT_EXPR
, accumulator
);
2537 /* Detect multiplication by constant and convert it into a sequence of
2538 shifts and additions, subtractions, negations. We reuse the
2539 choose_mult_variant algorithms from expmed.c
2543 STMT_VINFO: The stmt from which the pattern search begins,
2548 * TYPE_OUT: The type of the output of this pattern.
2550 * Return value: A new stmt that will be used to replace
2551 the multiplication. */
2554 vect_recog_mult_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
2556 gimple
*last_stmt
= stmt_vinfo
->stmt
;
2557 tree oprnd0
, oprnd1
, vectype
, itype
;
2558 gimple
*pattern_stmt
;
2560 if (!is_gimple_assign (last_stmt
))
2563 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
2566 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2567 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2568 itype
= TREE_TYPE (oprnd0
);
2570 if (TREE_CODE (oprnd0
) != SSA_NAME
2571 || TREE_CODE (oprnd1
) != INTEGER_CST
2572 || !INTEGRAL_TYPE_P (itype
)
2573 || !type_has_mode_precision_p (itype
))
2576 vectype
= get_vectype_for_scalar_type (itype
);
2577 if (vectype
== NULL_TREE
)
2580 /* If the target can handle vectorized multiplication natively,
2581 don't attempt to optimize this. */
2582 optab mul_optab
= optab_for_tree_code (MULT_EXPR
, vectype
, optab_default
);
2583 if (mul_optab
!= unknown_optab
)
2585 machine_mode vec_mode
= TYPE_MODE (vectype
);
2586 int icode
= (int) optab_handler (mul_optab
, vec_mode
);
2587 if (icode
!= CODE_FOR_nothing
)
2591 pattern_stmt
= vect_synth_mult_by_constant (oprnd0
, oprnd1
, stmt_vinfo
);
2595 /* Pattern detected. */
2596 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt
);
2598 *type_out
= vectype
;
2600 return pattern_stmt
;
2603 /* Detect a signed division by a constant that wouldn't be
2604 otherwise vectorized:
2610 where type 'type' is an integral type and N is a constant.
2612 Similarly handle modulo by a constant:
2618 * STMT_VINFO: The stmt from which the pattern search begins,
2619 i.e. the division stmt. S1 is replaced by if N is a power
2620 of two constant and type is signed:
2621 S3 y_t = b_t < 0 ? N - 1 : 0;
2623 S1' a_t = x_t >> log2 (N);
2625 S4 is replaced if N is a power of two constant and
2626 type is signed by (where *_T temporaries have unsigned type):
2627 S9 y_T = b_t < 0 ? -1U : 0U;
2628 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2629 S7 z_t = (type) z_T;
2631 S5 x_t = w_t & (N - 1);
2632 S4' a_t = x_t - z_t;
2636 * TYPE_OUT: The type of the output of this pattern.
2638 * Return value: A new stmt that will be used to replace the division
2639 S1 or modulo S4 stmt. */
2642 vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
2644 gimple
*last_stmt
= stmt_vinfo
->stmt
;
2645 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
2646 gimple
*pattern_stmt
, *def_stmt
;
2647 enum tree_code rhs_code
;
2650 int dummy_int
, prec
;
2652 if (!is_gimple_assign (last_stmt
))
2655 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2658 case TRUNC_DIV_EXPR
:
2659 case EXACT_DIV_EXPR
:
2660 case TRUNC_MOD_EXPR
:
2666 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2667 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2668 itype
= TREE_TYPE (oprnd0
);
2669 if (TREE_CODE (oprnd0
) != SSA_NAME
2670 || TREE_CODE (oprnd1
) != INTEGER_CST
2671 || TREE_CODE (itype
) != INTEGER_TYPE
2672 || !type_has_mode_precision_p (itype
))
2675 scalar_int_mode itype_mode
= SCALAR_INT_TYPE_MODE (itype
);
2676 vectype
= get_vectype_for_scalar_type (itype
);
2677 if (vectype
== NULL_TREE
)
2680 if (optimize_bb_for_size_p (gimple_bb (last_stmt
)))
2682 /* If the target can handle vectorized division or modulo natively,
2683 don't attempt to optimize this, since native division is likely
2684 to give smaller code. */
2685 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
2686 if (optab
!= unknown_optab
)
2688 machine_mode vec_mode
= TYPE_MODE (vectype
);
2689 int icode
= (int) optab_handler (optab
, vec_mode
);
2690 if (icode
!= CODE_FOR_nothing
)
2695 prec
= TYPE_PRECISION (itype
);
2696 if (integer_pow2p (oprnd1
))
2698 if (TYPE_UNSIGNED (itype
) || tree_int_cst_sgn (oprnd1
) != 1)
2701 /* Pattern detected. */
2702 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt
);
2704 cond
= build2 (LT_EXPR
, boolean_type_node
, oprnd0
,
2705 build_int_cst (itype
, 0));
2706 if (rhs_code
== TRUNC_DIV_EXPR
2707 || rhs_code
== EXACT_DIV_EXPR
)
2709 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
2712 = gimple_build_assign (var
, COND_EXPR
, cond
,
2713 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2714 build_int_cst (itype
, 1)),
2715 build_int_cst (itype
, 0));
2716 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2717 var
= vect_recog_temp_ssa_var (itype
, NULL
);
2719 = gimple_build_assign (var
, PLUS_EXPR
, oprnd0
,
2720 gimple_assign_lhs (def_stmt
));
2721 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2723 shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
2725 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2726 RSHIFT_EXPR
, var
, shift
);
2731 if (compare_tree_int (oprnd1
, 2) == 0)
2733 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2734 def_stmt
= gimple_build_assign (signmask
, COND_EXPR
, cond
,
2735 build_int_cst (itype
, 1),
2736 build_int_cst (itype
, 0));
2737 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2742 = build_nonstandard_integer_type (prec
, 1);
2743 tree vecutype
= get_vectype_for_scalar_type (utype
);
2745 = build_int_cst (utype
, GET_MODE_BITSIZE (itype_mode
)
2746 - tree_log2 (oprnd1
));
2747 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
2749 def_stmt
= gimple_build_assign (var
, COND_EXPR
, cond
,
2750 build_int_cst (utype
, -1),
2751 build_int_cst (utype
, 0));
2752 append_pattern_def_seq (stmt_vinfo
, def_stmt
, vecutype
);
2753 var
= vect_recog_temp_ssa_var (utype
, NULL
);
2754 def_stmt
= gimple_build_assign (var
, RSHIFT_EXPR
,
2755 gimple_assign_lhs (def_stmt
),
2757 append_pattern_def_seq (stmt_vinfo
, def_stmt
, vecutype
);
2758 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2760 = gimple_build_assign (signmask
, NOP_EXPR
, var
);
2761 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2764 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2765 PLUS_EXPR
, oprnd0
, signmask
);
2766 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2768 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2769 BIT_AND_EXPR
, gimple_assign_lhs (def_stmt
),
2770 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2771 build_int_cst (itype
, 1)));
2772 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2775 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2776 MINUS_EXPR
, gimple_assign_lhs (def_stmt
),
2780 *type_out
= vectype
;
2781 return pattern_stmt
;
2784 if (prec
> HOST_BITS_PER_WIDE_INT
2785 || integer_zerop (oprnd1
))
2788 if (!can_mult_highpart_p (TYPE_MODE (vectype
), TYPE_UNSIGNED (itype
)))
2791 if (TYPE_UNSIGNED (itype
))
2793 unsigned HOST_WIDE_INT mh
, ml
;
2794 int pre_shift
, post_shift
;
2795 unsigned HOST_WIDE_INT d
= (TREE_INT_CST_LOW (oprnd1
)
2796 & GET_MODE_MASK (itype_mode
));
2797 tree t1
, t2
, t3
, t4
;
2799 if (d
>= (HOST_WIDE_INT_1U
<< (prec
- 1)))
2800 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2803 /* Find a suitable multiplier and right shift count
2804 instead of multiplying with D. */
2805 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
2807 /* If the suggested multiplier is more than SIZE bits, we can do better
2808 for even divisors, using an initial right shift. */
2809 if (mh
!= 0 && (d
& 1) == 0)
2811 pre_shift
= ctz_or_zero (d
);
2812 mh
= choose_multiplier (d
>> pre_shift
, prec
, prec
- pre_shift
,
2813 &ml
, &post_shift
, &dummy_int
);
2821 if (post_shift
- 1 >= prec
)
2824 /* t1 = oprnd0 h* ml;
2828 q = t4 >> (post_shift - 1); */
2829 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2830 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2831 build_int_cst (itype
, ml
));
2832 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2834 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2836 = gimple_build_assign (t2
, MINUS_EXPR
, oprnd0
, t1
);
2837 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2839 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2841 = gimple_build_assign (t3
, RSHIFT_EXPR
, t2
, integer_one_node
);
2842 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2844 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2846 = gimple_build_assign (t4
, PLUS_EXPR
, t1
, t3
);
2848 if (post_shift
!= 1)
2850 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2852 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2854 = gimple_build_assign (q
, RSHIFT_EXPR
, t4
,
2855 build_int_cst (itype
, post_shift
- 1));
2860 pattern_stmt
= def_stmt
;
2865 if (pre_shift
>= prec
|| post_shift
>= prec
)
2868 /* t1 = oprnd0 >> pre_shift;
2870 q = t2 >> post_shift; */
2873 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2875 = gimple_build_assign (t1
, RSHIFT_EXPR
, oprnd0
,
2876 build_int_cst (NULL
, pre_shift
));
2877 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2882 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2883 def_stmt
= gimple_build_assign (t2
, MULT_HIGHPART_EXPR
, t1
,
2884 build_int_cst (itype
, ml
));
2888 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2890 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2892 = gimple_build_assign (q
, RSHIFT_EXPR
, t2
,
2893 build_int_cst (itype
, post_shift
));
2898 pattern_stmt
= def_stmt
;
2903 unsigned HOST_WIDE_INT ml
;
2905 HOST_WIDE_INT d
= TREE_INT_CST_LOW (oprnd1
);
2906 unsigned HOST_WIDE_INT abs_d
;
2908 tree t1
, t2
, t3
, t4
;
2910 /* Give up for -1. */
2914 /* Since d might be INT_MIN, we have to cast to
2915 unsigned HOST_WIDE_INT before negating to avoid
2916 undefined signed overflow. */
2918 ? (unsigned HOST_WIDE_INT
) d
2919 : - (unsigned HOST_WIDE_INT
) d
);
2921 /* n rem d = n rem -d */
2922 if (rhs_code
== TRUNC_MOD_EXPR
&& d
< 0)
2925 oprnd1
= build_int_cst (itype
, abs_d
);
2927 else if (HOST_BITS_PER_WIDE_INT
>= prec
2928 && abs_d
== HOST_WIDE_INT_1U
<< (prec
- 1))
2929 /* This case is not handled correctly below. */
2932 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
, &post_shift
, &dummy_int
);
2933 if (ml
>= HOST_WIDE_INT_1U
<< (prec
- 1))
2936 ml
|= HOST_WIDE_INT_M1U
<< (prec
- 1);
2938 if (post_shift
>= prec
)
2941 /* t1 = oprnd0 h* ml; */
2942 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2943 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2944 build_int_cst (itype
, ml
));
2948 /* t2 = t1 + oprnd0; */
2949 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2950 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2951 def_stmt
= gimple_build_assign (t2
, PLUS_EXPR
, t1
, oprnd0
);
2958 /* t3 = t2 >> post_shift; */
2959 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2960 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2961 def_stmt
= gimple_build_assign (t3
, RSHIFT_EXPR
, t2
,
2962 build_int_cst (itype
, post_shift
));
2967 wide_int oprnd0_min
, oprnd0_max
;
2969 if (get_range_info (oprnd0
, &oprnd0_min
, &oprnd0_max
) == VR_RANGE
)
2971 if (!wi::neg_p (oprnd0_min
, TYPE_SIGN (itype
)))
2973 else if (wi::neg_p (oprnd0_max
, TYPE_SIGN (itype
)))
2977 if (msb
== 0 && d
>= 0)
2981 pattern_stmt
= def_stmt
;
2985 /* t4 = oprnd0 >> (prec - 1);
2986 or if we know from VRP that oprnd0 >= 0
2988 or if we know from VRP that oprnd0 < 0
2990 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2991 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2993 def_stmt
= gimple_build_assign (t4
, INTEGER_CST
,
2994 build_int_cst (itype
, msb
));
2996 def_stmt
= gimple_build_assign (t4
, RSHIFT_EXPR
, oprnd0
,
2997 build_int_cst (itype
, prec
- 1));
2998 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
3000 /* q = t3 - t4; or q = t4 - t3; */
3001 q
= vect_recog_temp_ssa_var (itype
, NULL
);
3002 pattern_stmt
= gimple_build_assign (q
, MINUS_EXPR
, d
< 0 ? t4
: t3
,
3007 if (rhs_code
== TRUNC_MOD_EXPR
)
3011 /* We divided. Now finish by:
3014 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
3016 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
3017 def_stmt
= gimple_build_assign (t1
, MULT_EXPR
, q
, oprnd1
);
3018 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
3020 r
= vect_recog_temp_ssa_var (itype
, NULL
);
3021 pattern_stmt
= gimple_build_assign (r
, MINUS_EXPR
, oprnd0
, t1
);
3024 /* Pattern detected. */
3025 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt
);
3027 *type_out
= vectype
;
3028 return pattern_stmt
;
3031 /* Function vect_recog_mixed_size_cond_pattern
3033 Try to find the following pattern:
3038 S1 a_T = x_t CMP y_t ? b_T : c_T;
3040 where type 'TYPE' is an integral type which has different size
3041 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3042 than 'type', the constants need to fit into an integer type
3043 with the same width as 'type') or results of conversion from 'type'.
3047 * STMT_VINFO: The stmt from which the pattern search begins.
3051 * TYPE_OUT: The type of the output of this pattern.
3053 * Return value: A new stmt that will be used to replace the pattern.
3054 Additionally a def_stmt is added.
3056 a_it = x_t CMP y_t ? b_it : c_it;
3057 a_T = (TYPE) a_it; */
3060 vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
3062 gimple
*last_stmt
= stmt_vinfo
->stmt
;
3063 tree cond_expr
, then_clause
, else_clause
;
3064 tree type
, vectype
, comp_vectype
, itype
= NULL_TREE
, vecitype
;
3065 gimple
*pattern_stmt
, *def_stmt
;
3066 tree orig_type0
= NULL_TREE
, orig_type1
= NULL_TREE
;
3067 gimple
*def_stmt0
= NULL
, *def_stmt1
= NULL
;
3069 tree comp_scalar_type
;
3071 if (!is_gimple_assign (last_stmt
)
3072 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
3073 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
3076 cond_expr
= gimple_assign_rhs1 (last_stmt
);
3077 then_clause
= gimple_assign_rhs2 (last_stmt
);
3078 else_clause
= gimple_assign_rhs3 (last_stmt
);
3080 if (!COMPARISON_CLASS_P (cond_expr
))
3083 comp_scalar_type
= TREE_TYPE (TREE_OPERAND (cond_expr
, 0));
3084 comp_vectype
= get_vectype_for_scalar_type (comp_scalar_type
);
3085 if (comp_vectype
== NULL_TREE
)
3088 type
= gimple_expr_type (last_stmt
);
3089 if (types_compatible_p (type
, comp_scalar_type
)
3090 || ((TREE_CODE (then_clause
) != INTEGER_CST
3091 || TREE_CODE (else_clause
) != INTEGER_CST
)
3092 && !INTEGRAL_TYPE_P (comp_scalar_type
))
3093 || !INTEGRAL_TYPE_P (type
))
3096 if ((TREE_CODE (then_clause
) != INTEGER_CST
3097 && !type_conversion_p (then_clause
, stmt_vinfo
, false, &orig_type0
,
3098 &def_stmt0
, &promotion
))
3099 || (TREE_CODE (else_clause
) != INTEGER_CST
3100 && !type_conversion_p (else_clause
, stmt_vinfo
, false, &orig_type1
,
3101 &def_stmt1
, &promotion
)))
3104 if (orig_type0
&& orig_type1
3105 && !types_compatible_p (orig_type0
, orig_type1
))
3110 if (!types_compatible_p (orig_type0
, comp_scalar_type
))
3112 then_clause
= gimple_assign_rhs1 (def_stmt0
);
3118 if (!types_compatible_p (orig_type1
, comp_scalar_type
))
3120 else_clause
= gimple_assign_rhs1 (def_stmt1
);
3125 HOST_WIDE_INT cmp_mode_size
3126 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype
));
3128 scalar_int_mode type_mode
= SCALAR_INT_TYPE_MODE (type
);
3129 if (GET_MODE_BITSIZE (type_mode
) == cmp_mode_size
)
3132 vectype
= get_vectype_for_scalar_type (type
);
3133 if (vectype
== NULL_TREE
)
3136 if (expand_vec_cond_expr_p (vectype
, comp_vectype
, TREE_CODE (cond_expr
)))
3139 if (itype
== NULL_TREE
)
3140 itype
= build_nonstandard_integer_type (cmp_mode_size
,
3141 TYPE_UNSIGNED (type
));
3143 if (itype
== NULL_TREE
3144 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype
)) != cmp_mode_size
)
3147 vecitype
= get_vectype_for_scalar_type (itype
);
3148 if (vecitype
== NULL_TREE
)
3151 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
, TREE_CODE (cond_expr
)))
3154 if (GET_MODE_BITSIZE (type_mode
) > cmp_mode_size
)
3156 if ((TREE_CODE (then_clause
) == INTEGER_CST
3157 && !int_fits_type_p (then_clause
, itype
))
3158 || (TREE_CODE (else_clause
) == INTEGER_CST
3159 && !int_fits_type_p (else_clause
, itype
)))
3163 def_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3164 COND_EXPR
, unshare_expr (cond_expr
),
3165 fold_convert (itype
, then_clause
),
3166 fold_convert (itype
, else_clause
));
3167 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
3168 NOP_EXPR
, gimple_assign_lhs (def_stmt
));
3170 append_pattern_def_seq (stmt_vinfo
, def_stmt
, vecitype
);
3171 *type_out
= vectype
;
3173 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt
);
3175 return pattern_stmt
;
3179 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3180 true if bool VAR can and should be optimized that way. Assume it shouldn't
3181 in case it's a result of a comparison which can be directly vectorized into
3182 a vector comparison. Fills in STMTS with all stmts visited during the
3186 check_bool_pattern (tree var
, vec_info
*vinfo
, hash_set
<gimple
*> &stmts
)
3189 enum tree_code rhs_code
;
3191 stmt_vec_info def_stmt_info
= vect_get_internal_def (vinfo
, var
);
3195 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_stmt_info
->stmt
);
3199 if (stmts
.contains (def_stmt
))
3202 rhs1
= gimple_assign_rhs1 (def_stmt
);
3203 rhs_code
= gimple_assign_rhs_code (def_stmt
);
3207 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
3212 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
3214 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
3219 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
3226 if (! check_bool_pattern (rhs1
, vinfo
, stmts
)
3227 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt
), vinfo
, stmts
))
3232 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
3234 tree vecitype
, comp_vectype
;
3236 /* If the comparison can throw, then is_gimple_condexpr will be
3237 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3238 if (stmt_could_throw_p (def_stmt
))
3241 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
3242 if (comp_vectype
== NULL_TREE
)
3245 tree mask_type
= get_mask_type_for_scalar_type (TREE_TYPE (rhs1
));
3247 && expand_vec_cmp_expr_p (comp_vectype
, mask_type
, rhs_code
))
3250 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
3252 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
3254 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3255 vecitype
= get_vectype_for_scalar_type (itype
);
3256 if (vecitype
== NULL_TREE
)
3260 vecitype
= comp_vectype
;
3261 if (! expand_vec_cond_expr_p (vecitype
, comp_vectype
, rhs_code
))
3269 bool res
= stmts
.add (def_stmt
);
3270 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3277 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3278 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3279 pattern sequence. */
3282 adjust_bool_pattern_cast (tree type
, tree var
, stmt_vec_info stmt_info
)
3284 gimple
*cast_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
3286 append_pattern_def_seq (stmt_info
, cast_stmt
,
3287 get_vectype_for_scalar_type (type
));
3288 return gimple_assign_lhs (cast_stmt
);
3291 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3292 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3293 type, OUT_TYPE is the desired final integer type of the whole pattern.
3294 STMT_INFO is the info of the pattern root and is where pattern stmts should
3295 be associated with. DEFS is a map of pattern defs. */
3298 adjust_bool_pattern (tree var
, tree out_type
,
3299 stmt_vec_info stmt_info
, hash_map
<tree
, tree
> &defs
)
3301 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
3302 enum tree_code rhs_code
, def_rhs_code
;
3303 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
3305 gimple
*pattern_stmt
, *def_stmt
;
3306 tree trueval
= NULL_TREE
;
3308 rhs1
= gimple_assign_rhs1 (stmt
);
3309 rhs2
= gimple_assign_rhs2 (stmt
);
3310 rhs_code
= gimple_assign_rhs_code (stmt
);
3311 loc
= gimple_location (stmt
);
3316 irhs1
= *defs
.get (rhs1
);
3317 itype
= TREE_TYPE (irhs1
);
3319 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3324 irhs1
= *defs
.get (rhs1
);
3325 itype
= TREE_TYPE (irhs1
);
3327 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3328 BIT_XOR_EXPR
, irhs1
, build_int_cst (itype
, 1));
3332 /* Try to optimize x = y & (a < b ? 1 : 0); into
3333 x = (a < b ? y : 0);
3339 S1 a_b = x1 CMP1 y1;
3340 S2 b_b = x2 CMP2 y2;
3342 S4 d_T = (TYPE) c_b;
3344 we would normally emit:
3346 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3347 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3348 S3' c_T = a_T & b_T;
3351 but we can save one stmt by using the
3352 result of one of the COND_EXPRs in the other COND_EXPR and leave
3353 BIT_AND_EXPR stmt out:
3355 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3356 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3359 At least when VEC_COND_EXPR is implemented using masks
3360 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3361 computes the comparison masks and ands it, in one case with
3362 all ones vector, in the other case with a vector register.
3363 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3364 often more expensive. */
3365 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
3366 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3367 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3369 irhs1
= *defs
.get (rhs1
);
3370 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3371 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3372 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1
))))
3374 rhs_code
= def_rhs_code
;
3376 rhs2
= gimple_assign_rhs2 (def_stmt
);
3381 irhs2
= *defs
.get (rhs2
);
3384 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
3385 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3386 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3388 irhs2
= *defs
.get (rhs2
);
3389 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3390 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
3391 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1
))))
3393 rhs_code
= def_rhs_code
;
3395 rhs2
= gimple_assign_rhs2 (def_stmt
);
3400 irhs1
= *defs
.get (rhs1
);
3406 irhs1
= *defs
.get (rhs1
);
3407 irhs2
= *defs
.get (rhs2
);
3409 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3410 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
3412 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
3413 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
3414 int out_prec
= TYPE_PRECISION (out_type
);
3415 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
3416 irhs2
= adjust_bool_pattern_cast (TREE_TYPE (irhs1
), irhs2
,
3418 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
3419 irhs1
= adjust_bool_pattern_cast (TREE_TYPE (irhs2
), irhs1
,
3423 irhs1
= adjust_bool_pattern_cast (out_type
, irhs1
, stmt_info
);
3424 irhs2
= adjust_bool_pattern_cast (out_type
, irhs2
, stmt_info
);
3427 itype
= TREE_TYPE (irhs1
);
3429 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3430 rhs_code
, irhs1
, irhs2
);
3435 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
3436 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
3437 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
))
3438 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1
)),
3439 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1
)))))
3441 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
3443 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3446 itype
= TREE_TYPE (rhs1
);
3447 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
3448 if (trueval
== NULL_TREE
)
3449 trueval
= build_int_cst (itype
, 1);
3451 gcc_checking_assert (useless_type_conversion_p (itype
,
3452 TREE_TYPE (trueval
)));
3454 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3455 COND_EXPR
, cond_expr
, trueval
,
3456 build_int_cst (itype
, 0));
3460 gimple_set_location (pattern_stmt
, loc
);
3461 append_pattern_def_seq (stmt_info
, pattern_stmt
,
3462 get_vectype_for_scalar_type (itype
));
3463 defs
.put (var
, gimple_assign_lhs (pattern_stmt
));
3466 /* Comparison function to qsort a vector of gimple stmts after UID. */
3469 sort_after_uid (const void *p1
, const void *p2
)
3471 const gimple
*stmt1
= *(const gimple
* const *)p1
;
3472 const gimple
*stmt2
= *(const gimple
* const *)p2
;
3473 return gimple_uid (stmt1
) - gimple_uid (stmt2
);
3476 /* Create pattern stmts for all stmts participating in the bool pattern
3477 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
3478 OUT_TYPE. Return the def of the pattern root. */
3481 adjust_bool_stmts (hash_set
<gimple
*> &bool_stmt_set
,
3482 tree out_type
, stmt_vec_info stmt_info
)
3484 /* Gather original stmts in the bool pattern in their order of appearance
3486 auto_vec
<gimple
*> bool_stmts (bool_stmt_set
.elements ());
3487 for (hash_set
<gimple
*>::iterator i
= bool_stmt_set
.begin ();
3488 i
!= bool_stmt_set
.end (); ++i
)
3489 bool_stmts
.quick_push (*i
);
3490 bool_stmts
.qsort (sort_after_uid
);
3492 /* Now process them in that order, producing pattern stmts. */
3493 hash_map
<tree
, tree
> defs
;
3494 for (unsigned i
= 0; i
< bool_stmts
.length (); ++i
)
3495 adjust_bool_pattern (gimple_assign_lhs (bool_stmts
[i
]),
3496 out_type
, stmt_info
, defs
);
3498 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3499 gimple
*pattern_stmt
3500 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
3501 return gimple_assign_lhs (pattern_stmt
);
3504 /* Helper for search_type_for_mask. */
3507 search_type_for_mask_1 (tree var
, vec_info
*vinfo
,
3508 hash_map
<gimple
*, tree
> &cache
)
3511 enum tree_code rhs_code
;
3512 tree res
= NULL_TREE
, res2
;
3514 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var
)))
3517 stmt_vec_info def_stmt_info
= vect_get_internal_def (vinfo
, var
);
3521 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_stmt_info
->stmt
);
3525 tree
*c
= cache
.get (def_stmt
);
3529 rhs_code
= gimple_assign_rhs_code (def_stmt
);
3530 rhs1
= gimple_assign_rhs1 (def_stmt
);
3537 res
= search_type_for_mask_1 (rhs1
, vinfo
, cache
);
3543 res
= search_type_for_mask_1 (rhs1
, vinfo
, cache
);
3544 res2
= search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt
), vinfo
,
3546 if (!res
|| (res2
&& TYPE_PRECISION (res
) > TYPE_PRECISION (res2
)))
3551 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
3553 tree comp_vectype
, mask_type
;
3555 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
3557 res
= search_type_for_mask_1 (rhs1
, vinfo
, cache
);
3558 res2
= search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt
),
3560 if (!res
|| (res2
&& TYPE_PRECISION (res
) > TYPE_PRECISION (res2
)))
3565 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
3566 if (comp_vectype
== NULL_TREE
)
3572 mask_type
= get_mask_type_for_scalar_type (TREE_TYPE (rhs1
));
3574 || !expand_vec_cmp_expr_p (comp_vectype
, mask_type
, rhs_code
))
3580 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
3581 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
3583 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
3584 res
= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3587 res
= TREE_TYPE (rhs1
);
3591 cache
.put (def_stmt
, res
);
3595 /* Return the proper type for converting bool VAR into
3596 an integer value or NULL_TREE if no such type exists.
3597 The type is chosen so that converted value has the
3598 same number of elements as VAR's vector type. */
3601 search_type_for_mask (tree var
, vec_info
*vinfo
)
3603 hash_map
<gimple
*, tree
> cache
;
3604 return search_type_for_mask_1 (var
, vinfo
, cache
);
3607 /* Function vect_recog_bool_pattern
3609 Try to find pattern like following:
3611 bool a_b, b_b, c_b, d_b, e_b;
3614 S1 a_b = x1 CMP1 y1;
3615 S2 b_b = x2 CMP2 y2;
3617 S4 d_b = x3 CMP3 y3;
3619 S6 f_T = (TYPE) e_b;
3621 where type 'TYPE' is an integral type. Or a similar pattern
3624 S6 f_Y = e_b ? r_Y : s_Y;
3626 as results from if-conversion of a complex condition.
3630 * STMT_VINFO: The stmt at the end from which the pattern
3631 search begins, i.e. cast of a bool to
3636 * TYPE_OUT: The type of the output of this pattern.
3638 * Return value: A new stmt that will be used to replace the pattern.
3640 Assuming size of TYPE is the same as size of all comparisons
3641 (otherwise some casts would be added where needed), the above
3642 sequence we create related pattern stmts:
3643 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3644 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3645 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3646 S5' e_T = c_T | d_T;
3649 Instead of the above S3' we could emit:
3650 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3651 S3' c_T = a_T | b_T;
3652 but the above is more efficient. */
3655 vect_recog_bool_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
3657 gimple
*last_stmt
= stmt_vinfo
->stmt
;
3658 enum tree_code rhs_code
;
3659 tree var
, lhs
, rhs
, vectype
;
3660 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
3661 gimple
*pattern_stmt
;
3663 if (!is_gimple_assign (last_stmt
))
3666 var
= gimple_assign_rhs1 (last_stmt
);
3667 lhs
= gimple_assign_lhs (last_stmt
);
3669 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var
)))
3672 hash_set
<gimple
*> bool_stmts
;
3674 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3675 if (CONVERT_EXPR_CODE_P (rhs_code
))
3677 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3678 || TYPE_PRECISION (TREE_TYPE (lhs
)) == 1)
3680 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3681 if (vectype
== NULL_TREE
)
3684 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
3686 rhs
= adjust_bool_stmts (bool_stmts
, TREE_TYPE (lhs
), stmt_vinfo
);
3687 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3688 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3689 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3692 = gimple_build_assign (lhs
, NOP_EXPR
, rhs
);
3696 tree type
= search_type_for_mask (var
, vinfo
);
3697 tree cst0
, cst1
, tmp
;
3702 /* We may directly use cond with narrowed type to avoid
3703 multiple cond exprs with following result packing and
3704 perform single cond with packed mask instead. In case
3705 of widening we better make cond first and then extract
3707 if (TYPE_MODE (type
) == TYPE_MODE (TREE_TYPE (lhs
)))
3708 type
= TREE_TYPE (lhs
);
3710 cst0
= build_int_cst (type
, 0);
3711 cst1
= build_int_cst (type
, 1);
3712 tmp
= vect_recog_temp_ssa_var (type
, NULL
);
3713 pattern_stmt
= gimple_build_assign (tmp
, COND_EXPR
, var
, cst1
, cst0
);
3715 if (!useless_type_conversion_p (type
, TREE_TYPE (lhs
)))
3717 tree new_vectype
= get_vectype_for_scalar_type (type
);
3718 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
, new_vectype
);
3720 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3721 pattern_stmt
= gimple_build_assign (lhs
, CONVERT_EXPR
, tmp
);
3725 *type_out
= vectype
;
3726 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
3728 return pattern_stmt
;
3730 else if (rhs_code
== COND_EXPR
3731 && TREE_CODE (var
) == SSA_NAME
)
3733 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3734 if (vectype
== NULL_TREE
)
3737 /* Build a scalar type for the boolean result that when
3738 vectorized matches the vector type of the result in
3739 size and number of elements. */
3741 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype
)),
3742 TYPE_VECTOR_SUBPARTS (vectype
));
3745 = build_nonstandard_integer_type (prec
,
3746 TYPE_UNSIGNED (TREE_TYPE (var
)));
3747 if (get_vectype_for_scalar_type (type
) == NULL_TREE
)
3750 if (!check_bool_pattern (var
, vinfo
, bool_stmts
))
3753 rhs
= adjust_bool_stmts (bool_stmts
, type
, stmt_vinfo
);
3755 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3757 = gimple_build_assign (lhs
, COND_EXPR
,
3758 build2 (NE_EXPR
, boolean_type_node
,
3759 rhs
, build_int_cst (type
, 0)),
3760 gimple_assign_rhs2 (last_stmt
),
3761 gimple_assign_rhs3 (last_stmt
));
3762 *type_out
= vectype
;
3763 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
3765 return pattern_stmt
;
3767 else if (rhs_code
== SSA_NAME
3768 && STMT_VINFO_DATA_REF (stmt_vinfo
))
3770 stmt_vec_info pattern_stmt_info
;
3771 vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
3772 gcc_assert (vectype
!= NULL_TREE
);
3773 if (!VECTOR_MODE_P (TYPE_MODE (vectype
)))
3776 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
3777 rhs
= adjust_bool_stmts (bool_stmts
, TREE_TYPE (vectype
), stmt_vinfo
);
3780 tree type
= search_type_for_mask (var
, vinfo
);
3781 tree cst0
, cst1
, new_vectype
;
3786 if (TYPE_MODE (type
) == TYPE_MODE (TREE_TYPE (vectype
)))
3787 type
= TREE_TYPE (vectype
);
3789 cst0
= build_int_cst (type
, 0);
3790 cst1
= build_int_cst (type
, 1);
3791 new_vectype
= get_vectype_for_scalar_type (type
);
3793 rhs
= vect_recog_temp_ssa_var (type
, NULL
);
3794 pattern_stmt
= gimple_build_assign (rhs
, COND_EXPR
, var
, cst1
, cst0
);
3795 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
, new_vectype
);
3798 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
3799 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3801 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3802 gimple
*cast_stmt
= gimple_build_assign (rhs2
, NOP_EXPR
, rhs
);
3803 append_pattern_def_seq (stmt_vinfo
, cast_stmt
);
3806 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3807 pattern_stmt_info
= vinfo
->add_stmt (pattern_stmt
);
3808 vinfo
->move_dr (pattern_stmt_info
, stmt_vinfo
);
3809 *type_out
= vectype
;
3810 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
3812 return pattern_stmt
;
3819 /* A helper for vect_recog_mask_conversion_pattern. Build
3820 conversion of MASK to a type suitable for masking VECTYPE.
3821 Built statement gets required vectype and is appended to
3822 a pattern sequence of STMT_VINFO.
3824 Return converted mask. */
3827 build_mask_conversion (tree mask
, tree vectype
, stmt_vec_info stmt_vinfo
)
3832 masktype
= build_same_sized_truth_vector_type (vectype
);
3833 tmp
= vect_recog_temp_ssa_var (TREE_TYPE (masktype
), NULL
);
3834 stmt
= gimple_build_assign (tmp
, CONVERT_EXPR
, mask
);
3835 append_pattern_def_seq (stmt_vinfo
, stmt
, masktype
);
3841 /* Function vect_recog_mask_conversion_pattern
3843 Try to find statements which require boolean type
3844 converison. Additional conversion statements are
3845 added to handle such cases. For example:
3855 S4 c_1 = m_3 ? c_2 : c_3;
3857 Will be transformed into:
3861 S3'' m_2' = (_Bool[bitsize=32])m_2
3862 S3' m_3' = m_1 & m_2';
3863 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3864 S4' c_1' = m_3'' ? c_2 : c_3; */
3867 vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo
, tree
*type_out
)
3869 gimple
*last_stmt
= stmt_vinfo
->stmt
;
3870 enum tree_code rhs_code
;
3871 tree lhs
= NULL_TREE
, rhs1
, rhs2
, tmp
, rhs1_type
, rhs2_type
;
3872 tree vectype1
, vectype2
;
3873 stmt_vec_info pattern_stmt_info
;
3874 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
3876 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3877 if (is_gimple_call (last_stmt
)
3878 && gimple_call_internal_p (last_stmt
))
3880 gcall
*pattern_stmt
;
3882 internal_fn ifn
= gimple_call_internal_fn (last_stmt
);
3883 int mask_argno
= internal_fn_mask_index (ifn
);
3887 bool store_p
= internal_store_fn_p (ifn
);
3890 int rhs_index
= internal_fn_stored_value_index (ifn
);
3891 tree rhs
= gimple_call_arg (last_stmt
, rhs_index
);
3892 vectype1
= get_vectype_for_scalar_type (TREE_TYPE (rhs
));
3896 lhs
= gimple_call_lhs (last_stmt
);
3897 vectype1
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3900 tree mask_arg
= gimple_call_arg (last_stmt
, mask_argno
);
3901 tree mask_arg_type
= search_type_for_mask (mask_arg
, vinfo
);
3904 vectype2
= get_mask_type_for_scalar_type (mask_arg_type
);
3906 if (!vectype1
|| !vectype2
3907 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1
),
3908 TYPE_VECTOR_SUBPARTS (vectype2
)))
3911 tmp
= build_mask_conversion (mask_arg
, vectype1
, stmt_vinfo
);
3913 auto_vec
<tree
, 8> args
;
3914 unsigned int nargs
= gimple_call_num_args (last_stmt
);
3915 args
.safe_grow (nargs
);
3916 for (unsigned int i
= 0; i
< nargs
; ++i
)
3917 args
[i
] = ((int) i
== mask_argno
3919 : gimple_call_arg (last_stmt
, i
));
3920 pattern_stmt
= gimple_build_call_internal_vec (ifn
, args
);
3924 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3925 gimple_call_set_lhs (pattern_stmt
, lhs
);
3927 gimple_call_set_nothrow (pattern_stmt
, true);
3929 pattern_stmt_info
= vinfo
->add_stmt (pattern_stmt
);
3930 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
3931 vinfo
->move_dr (pattern_stmt_info
, stmt_vinfo
);
3933 *type_out
= vectype1
;
3934 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
3936 return pattern_stmt
;
3939 if (!is_gimple_assign (last_stmt
))
3942 gimple
*pattern_stmt
;
3943 lhs
= gimple_assign_lhs (last_stmt
);
3944 rhs1
= gimple_assign_rhs1 (last_stmt
);
3945 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3947 /* Check for cond expression requiring mask conversion. */
3948 if (rhs_code
== COND_EXPR
)
3950 vectype1
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3952 if (TREE_CODE (rhs1
) == SSA_NAME
)
3954 rhs1_type
= search_type_for_mask (rhs1
, vinfo
);
3958 else if (COMPARISON_CLASS_P (rhs1
))
3960 /* Check whether we're comparing scalar booleans and (if so)
3961 whether a better mask type exists than the mask associated
3962 with boolean-sized elements. This avoids unnecessary packs
3963 and unpacks if the booleans are set from comparisons of
3964 wider types. E.g. in:
3966 int x1, x2, x3, x4, y1, y1;
3968 bool b1 = (x1 == x2);
3969 bool b2 = (x3 == x4);
3970 ... = b1 == b2 ? y1 : y2;
3972 it is better for b1 and b2 to use the mask type associated
3973 with int elements rather bool (byte) elements. */
3974 rhs1_type
= search_type_for_mask (TREE_OPERAND (rhs1
, 0), vinfo
);
3976 rhs1_type
= TREE_TYPE (TREE_OPERAND (rhs1
, 0));
3981 vectype2
= get_mask_type_for_scalar_type (rhs1_type
);
3983 if (!vectype1
|| !vectype2
)
3986 /* Continue if a conversion is needed. Also continue if we have
3987 a comparison whose vector type would normally be different from
3988 VECTYPE2 when considered in isolation. In that case we'll
3989 replace the comparison with an SSA name (so that we can record
3990 its vector type) and behave as though the comparison was an SSA
3991 name from the outset. */
3992 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1
),
3993 TYPE_VECTOR_SUBPARTS (vectype2
))
3994 && (TREE_CODE (rhs1
) == SSA_NAME
3995 || rhs1_type
== TREE_TYPE (TREE_OPERAND (rhs1
, 0))))
3998 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
3999 in place, we can handle it in vectorizable_condition. This avoids
4000 unnecessary promotion stmts and increased vectorization factor. */
4001 if (COMPARISON_CLASS_P (rhs1
)
4002 && INTEGRAL_TYPE_P (rhs1_type
)
4003 && known_le (TYPE_VECTOR_SUBPARTS (vectype1
),
4004 TYPE_VECTOR_SUBPARTS (vectype2
)))
4006 enum vect_def_type dt
;
4007 if (vect_is_simple_use (TREE_OPERAND (rhs1
, 0), vinfo
, &dt
)
4008 && dt
== vect_external_def
4009 && vect_is_simple_use (TREE_OPERAND (rhs1
, 1), vinfo
, &dt
)
4010 && (dt
== vect_external_def
4011 || dt
== vect_constant_def
))
4013 tree wide_scalar_type
= build_nonstandard_integer_type
4014 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1
))),
4015 TYPE_UNSIGNED (rhs1_type
));
4016 tree vectype3
= get_vectype_for_scalar_type (wide_scalar_type
);
4017 if (expand_vec_cond_expr_p (vectype1
, vectype3
, TREE_CODE (rhs1
)))
4022 /* If rhs1 is a comparison we need to move it into a
4023 separate statement. */
4024 if (TREE_CODE (rhs1
) != SSA_NAME
)
4026 tmp
= vect_recog_temp_ssa_var (TREE_TYPE (rhs1
), NULL
);
4027 pattern_stmt
= gimple_build_assign (tmp
, rhs1
);
4029 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
, vectype2
);
4032 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
4033 TYPE_VECTOR_SUBPARTS (vectype2
)))
4034 tmp
= build_mask_conversion (rhs1
, vectype1
, stmt_vinfo
);
4038 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
4039 pattern_stmt
= gimple_build_assign (lhs
, COND_EXPR
, tmp
,
4040 gimple_assign_rhs2 (last_stmt
),
4041 gimple_assign_rhs3 (last_stmt
));
4043 *type_out
= vectype1
;
4044 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
4046 return pattern_stmt
;
4049 /* Now check for binary boolean operations requiring conversion for
4051 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs
)))
4054 if (rhs_code
!= BIT_IOR_EXPR
4055 && rhs_code
!= BIT_XOR_EXPR
4056 && rhs_code
!= BIT_AND_EXPR
4057 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
)
4060 rhs2
= gimple_assign_rhs2 (last_stmt
);
4062 rhs1_type
= search_type_for_mask (rhs1
, vinfo
);
4063 rhs2_type
= search_type_for_mask (rhs2
, vinfo
);
4065 if (!rhs1_type
|| !rhs2_type
4066 || TYPE_PRECISION (rhs1_type
) == TYPE_PRECISION (rhs2_type
))
4069 if (TYPE_PRECISION (rhs1_type
) < TYPE_PRECISION (rhs2_type
))
4071 vectype1
= get_mask_type_for_scalar_type (rhs1_type
);
4074 rhs2
= build_mask_conversion (rhs2
, vectype1
, stmt_vinfo
);
4078 vectype1
= get_mask_type_for_scalar_type (rhs2_type
);
4081 rhs1
= build_mask_conversion (rhs1
, vectype1
, stmt_vinfo
);
4084 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
4085 pattern_stmt
= gimple_build_assign (lhs
, rhs_code
, rhs1
, rhs2
);
4087 *type_out
= vectype1
;
4088 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
4090 return pattern_stmt
;
4093 /* STMT_INFO is a load or store. If the load or store is conditional, return
4094 the boolean condition under which it occurs, otherwise return null. */
4097 vect_get_load_store_mask (stmt_vec_info stmt_info
)
4099 if (gassign
*def_assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
4101 gcc_assert (gimple_assign_single_p (def_assign
));
4105 if (gcall
*def_call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
4107 internal_fn ifn
= gimple_call_internal_fn (def_call
);
4108 int mask_index
= internal_fn_mask_index (ifn
);
4109 return gimple_call_arg (def_call
, mask_index
);
4115 /* Return the scalar offset type that an internal gather/scatter function
4116 should use. GS_INFO describes the gather/scatter operation. */
4119 vect_get_gather_scatter_offset_type (gather_scatter_info
*gs_info
)
4121 tree offset_type
= TREE_TYPE (gs_info
->offset
);
4122 unsigned int element_bits
= tree_to_uhwi (TYPE_SIZE (gs_info
->element_type
));
4124 /* Enforced by vect_check_gather_scatter. */
4125 unsigned int offset_bits
= TYPE_PRECISION (offset_type
);
4126 gcc_assert (element_bits
>= offset_bits
);
4128 /* If the offset is narrower than the elements, extend it according
4130 if (element_bits
> offset_bits
)
4131 return build_nonstandard_integer_type (element_bits
,
4132 TYPE_UNSIGNED (offset_type
));
4137 /* Return MASK if MASK is suitable for masking an operation on vectors
4138 of type VECTYPE, otherwise convert it into such a form and return
4139 the result. Associate any conversion statements with STMT_INFO's
4143 vect_convert_mask_for_vectype (tree mask
, tree vectype
,
4144 stmt_vec_info stmt_info
, vec_info
*vinfo
)
4146 tree mask_type
= search_type_for_mask (mask
, vinfo
);
4149 tree mask_vectype
= get_mask_type_for_scalar_type (mask_type
);
4151 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
),
4152 TYPE_VECTOR_SUBPARTS (mask_vectype
)))
4153 mask
= build_mask_conversion (mask
, vectype
, stmt_info
);
4158 /* Return the equivalent of:
4160 fold_convert (TYPE, VALUE)
4162 with the expectation that the operation will be vectorized.
4163 If new statements are needed, add them as pattern statements
4167 vect_add_conversion_to_pattern (tree type
, tree value
, stmt_vec_info stmt_info
)
4169 if (useless_type_conversion_p (type
, TREE_TYPE (value
)))
4172 tree new_value
= vect_recog_temp_ssa_var (type
, NULL
);
4173 gassign
*conversion
= gimple_build_assign (new_value
, CONVERT_EXPR
, value
);
4174 append_pattern_def_seq (stmt_info
, conversion
,
4175 get_vectype_for_scalar_type (type
));
4179 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4180 internal function. Return the final statement on success and set
4181 *TYPE_OUT to the vector type being loaded or stored.
4183 This function only handles gathers and scatters that were recognized
4184 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4187 vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info
, tree
*type_out
)
4189 /* Currently we only support this for loop vectorization. */
4190 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (stmt_info
->vinfo
);
4194 /* Make sure that we're looking at a gather load or scatter store. */
4195 data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
4196 if (!dr
|| !STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
4199 /* Get the boolean that controls whether the load or store happens.
4200 This is null if the operation is unconditional. */
4201 tree mask
= vect_get_load_store_mask (stmt_info
);
4203 /* Make sure that the target supports an appropriate internal
4204 function for the gather/scatter operation. */
4205 gather_scatter_info gs_info
;
4206 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, &gs_info
)
4210 /* Convert the mask to the right form. */
4211 tree gs_vectype
= get_vectype_for_scalar_type (gs_info
.element_type
);
4213 mask
= vect_convert_mask_for_vectype (mask
, gs_vectype
, stmt_info
,
4216 /* Get the invariant base and non-invariant offset, converting the
4217 latter to the same width as the vector elements. */
4218 tree base
= gs_info
.base
;
4219 tree offset_type
= vect_get_gather_scatter_offset_type (&gs_info
);
4220 tree offset
= vect_add_conversion_to_pattern (offset_type
, gs_info
.offset
,
4223 /* Build the new pattern statement. */
4224 tree scale
= size_int (gs_info
.scale
);
4225 gcall
*pattern_stmt
;
4226 if (DR_IS_READ (dr
))
4229 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 4, base
,
4230 offset
, scale
, mask
);
4232 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 3, base
,
4234 tree load_lhs
= vect_recog_temp_ssa_var (gs_info
.element_type
, NULL
);
4235 gimple_call_set_lhs (pattern_stmt
, load_lhs
);
4239 tree rhs
= vect_get_store_rhs (stmt_info
);
4241 pattern_stmt
= gimple_build_call_internal (IFN_MASK_SCATTER_STORE
, 5,
4242 base
, offset
, scale
, rhs
,
4245 pattern_stmt
= gimple_build_call_internal (IFN_SCATTER_STORE
, 4,
4246 base
, offset
, scale
, rhs
);
4248 gimple_call_set_nothrow (pattern_stmt
, true);
4250 /* Copy across relevant vectorization info and associate DR with the
4251 new pattern statement instead of the original statement. */
4252 stmt_vec_info pattern_stmt_info
= loop_vinfo
->add_stmt (pattern_stmt
);
4253 loop_vinfo
->move_dr (pattern_stmt_info
, stmt_info
);
4255 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4256 *type_out
= vectype
;
4257 vect_pattern_detected ("gather/scatter pattern", stmt_info
->stmt
);
4259 return pattern_stmt
;
4262 /* Return true if TYPE is a non-boolean integer type. These are the types
4263 that we want to consider for narrowing. */
4266 vect_narrowable_type_p (tree type
)
4268 return INTEGRAL_TYPE_P (type
) && !VECT_SCALAR_BOOLEAN_TYPE_P (type
);
4271 /* Return true if the operation given by CODE can be truncated to N bits
4272 when only N bits of the output are needed. This is only true if bit N+1
4273 of the inputs has no effect on the low N bits of the result. */
4276 vect_truncatable_operation_p (tree_code code
)
4294 /* Record that STMT_INFO could be changed from operating on TYPE to
4295 operating on a type with the precision and sign given by PRECISION
4296 and SIGN respectively. PRECISION is an arbitrary bit precision;
4297 it might not be a whole number of bytes. */
4300 vect_set_operation_type (stmt_vec_info stmt_info
, tree type
,
4301 unsigned int precision
, signop sign
)
4303 /* Round the precision up to a whole number of bytes. */
4304 precision
= vect_element_precision (precision
);
4305 if (precision
< TYPE_PRECISION (type
)
4306 && (!stmt_info
->operation_precision
4307 || stmt_info
->operation_precision
> precision
))
4309 stmt_info
->operation_precision
= precision
;
4310 stmt_info
->operation_sign
= sign
;
4314 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4315 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4316 is an arbitrary bit precision; it might not be a whole number of bytes. */
4319 vect_set_min_input_precision (stmt_vec_info stmt_info
, tree type
,
4320 unsigned int min_input_precision
)
4322 /* This operation in isolation only requires the inputs to have
4323 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4324 that MIN_INPUT_PRECISION is a natural precision for the chain
4325 as a whole. E.g. consider something like:
4327 unsigned short *x, *y;
4328 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4330 The right shift can be done on unsigned chars, and only requires the
4331 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4332 approach would mean turning a natural chain of single-vector unsigned
4333 short operations into one that truncates "*x" and then extends
4334 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4335 operation and one vector for each unsigned char operation.
4336 This would be a significant pessimization.
4338 Instead only propagate the maximum of this precision and the precision
4339 required by the users of the result. This means that we don't pessimize
4340 the case above but continue to optimize things like:
4344 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4346 Here we would truncate two vectors of *x to a single vector of
4347 unsigned chars and use single-vector unsigned char operations for
4348 everything else, rather than doing two unsigned short copies of
4349 "(*x & 0xf0) >> 4" and then truncating the result. */
4350 min_input_precision
= MAX (min_input_precision
,
4351 stmt_info
->min_output_precision
);
4353 if (min_input_precision
< TYPE_PRECISION (type
)
4354 && (!stmt_info
->min_input_precision
4355 || stmt_info
->min_input_precision
> min_input_precision
))
4356 stmt_info
->min_input_precision
= min_input_precision
;
4359 /* Subroutine of vect_determine_min_output_precision. Return true if
4360 we can calculate a reduced number of output bits for STMT_INFO,
4361 whose result is LHS. */
4364 vect_determine_min_output_precision_1 (stmt_vec_info stmt_info
, tree lhs
)
4366 /* Take the maximum precision required by users of the result. */
4367 vec_info
*vinfo
= stmt_info
->vinfo
;
4368 unsigned int precision
= 0;
4369 imm_use_iterator iter
;
4371 FOR_EACH_IMM_USE_FAST (use
, iter
, lhs
)
4373 gimple
*use_stmt
= USE_STMT (use
);
4374 if (is_gimple_debug (use_stmt
))
4376 stmt_vec_info use_stmt_info
= vinfo
->lookup_stmt (use_stmt
);
4377 if (!use_stmt_info
|| !use_stmt_info
->min_input_precision
)
4379 /* The input precision recorded for COND_EXPRs applies only to the
4380 "then" and "else" values. */
4381 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
4383 && gimple_assign_rhs_code (assign
) == COND_EXPR
4384 && use
->use
!= gimple_assign_rhs2_ptr (assign
)
4385 && use
->use
!= gimple_assign_rhs3_ptr (assign
))
4387 precision
= MAX (precision
, use_stmt_info
->min_input_precision
);
4390 if (dump_enabled_p ())
4391 dump_printf_loc (MSG_NOTE
, vect_location
,
4392 "only the low %d bits of %T are significant\n",
4394 stmt_info
->min_output_precision
= precision
;
4398 /* Calculate min_output_precision for STMT_INFO. */
4401 vect_determine_min_output_precision (stmt_vec_info stmt_info
)
4403 /* We're only interested in statements with a narrowable result. */
4404 tree lhs
= gimple_get_lhs (stmt_info
->stmt
);
4406 || TREE_CODE (lhs
) != SSA_NAME
4407 || !vect_narrowable_type_p (TREE_TYPE (lhs
)))
4410 if (!vect_determine_min_output_precision_1 (stmt_info
, lhs
))
4411 stmt_info
->min_output_precision
= TYPE_PRECISION (TREE_TYPE (lhs
));
4414 /* Use range information to decide whether STMT (described by STMT_INFO)
4415 could be done in a narrower type. This is effectively a forward
4416 propagation, since it uses context-independent information that applies
4417 to all users of an SSA name. */
4420 vect_determine_precisions_from_range (stmt_vec_info stmt_info
, gassign
*stmt
)
4422 tree lhs
= gimple_assign_lhs (stmt
);
4423 if (!lhs
|| TREE_CODE (lhs
) != SSA_NAME
)
4426 tree type
= TREE_TYPE (lhs
);
4427 if (!vect_narrowable_type_p (type
))
4430 /* First see whether we have any useful range information for the result. */
4431 unsigned int precision
= TYPE_PRECISION (type
);
4432 signop sign
= TYPE_SIGN (type
);
4433 wide_int min_value
, max_value
;
4434 if (!vect_get_range_info (lhs
, &min_value
, &max_value
))
4437 tree_code code
= gimple_assign_rhs_code (stmt
);
4438 unsigned int nops
= gimple_num_ops (stmt
);
4440 if (!vect_truncatable_operation_p (code
))
4441 /* Check that all relevant input operands are compatible, and update
4442 [MIN_VALUE, MAX_VALUE] to include their ranges. */
4443 for (unsigned int i
= 1; i
< nops
; ++i
)
4445 tree op
= gimple_op (stmt
, i
);
4446 if (TREE_CODE (op
) == INTEGER_CST
)
4448 /* Don't require the integer to have RHS_TYPE (which it might
4449 not for things like shift amounts, etc.), but do require it
4451 if (!int_fits_type_p (op
, type
))
4454 min_value
= wi::min (min_value
, wi::to_wide (op
, precision
), sign
);
4455 max_value
= wi::max (max_value
, wi::to_wide (op
, precision
), sign
);
4457 else if (TREE_CODE (op
) == SSA_NAME
)
4459 /* Ignore codes that don't take uniform arguments. */
4460 if (!types_compatible_p (TREE_TYPE (op
), type
))
4463 wide_int op_min_value
, op_max_value
;
4464 if (!vect_get_range_info (op
, &op_min_value
, &op_max_value
))
4467 min_value
= wi::min (min_value
, op_min_value
, sign
);
4468 max_value
= wi::max (max_value
, op_max_value
, sign
);
4474 /* Try to switch signed types for unsigned types if we can.
4475 This is better for two reasons. First, unsigned ops tend
4476 to be cheaper than signed ops. Second, it means that we can
4480 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4485 unsigned short res_1 = (unsigned short) c & 0xff00;
4486 int res = (int) res_1;
4488 where the intermediate result res_1 has unsigned rather than
4490 if (sign
== SIGNED
&& !wi::neg_p (min_value
))
4493 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
4494 unsigned int precision1
= wi::min_precision (min_value
, sign
);
4495 unsigned int precision2
= wi::min_precision (max_value
, sign
);
4496 unsigned int value_precision
= MAX (precision1
, precision2
);
4497 if (value_precision
>= precision
)
4500 if (dump_enabled_p ())
4501 dump_printf_loc (MSG_NOTE
, vect_location
, "can narrow to %s:%d"
4502 " without loss of precision: %G",
4503 sign
== SIGNED
? "signed" : "unsigned",
4504 value_precision
, stmt
);
4506 vect_set_operation_type (stmt_info
, type
, value_precision
, sign
);
4507 vect_set_min_input_precision (stmt_info
, type
, value_precision
);
4510 /* Use information about the users of STMT's result to decide whether
4511 STMT (described by STMT_INFO) could be done in a narrower type.
4512 This is effectively a backward propagation. */
4515 vect_determine_precisions_from_users (stmt_vec_info stmt_info
, gassign
*stmt
)
4517 tree_code code
= gimple_assign_rhs_code (stmt
);
4518 unsigned int opno
= (code
== COND_EXPR
? 2 : 1);
4519 tree type
= TREE_TYPE (gimple_op (stmt
, opno
));
4520 if (!vect_narrowable_type_p (type
))
4523 unsigned int precision
= TYPE_PRECISION (type
);
4524 unsigned int operation_precision
, min_input_precision
;
4528 /* Only the bits that contribute to the output matter. Don't change
4529 the precision of the operation itself. */
4530 operation_precision
= precision
;
4531 min_input_precision
= stmt_info
->min_output_precision
;
4537 tree shift
= gimple_assign_rhs2 (stmt
);
4538 if (TREE_CODE (shift
) != INTEGER_CST
4539 || !wi::ltu_p (wi::to_widest (shift
), precision
))
4541 unsigned int const_shift
= TREE_INT_CST_LOW (shift
);
4542 if (code
== LSHIFT_EXPR
)
4544 /* We need CONST_SHIFT fewer bits of the input. */
4545 operation_precision
= stmt_info
->min_output_precision
;
4546 min_input_precision
= (MAX (operation_precision
, const_shift
)
4551 /* We need CONST_SHIFT extra bits to do the operation. */
4552 operation_precision
= (stmt_info
->min_output_precision
4554 min_input_precision
= operation_precision
;
4560 if (vect_truncatable_operation_p (code
))
4562 /* Input bit N has no effect on output bits N-1 and lower. */
4563 operation_precision
= stmt_info
->min_output_precision
;
4564 min_input_precision
= operation_precision
;
4570 if (operation_precision
< precision
)
4572 if (dump_enabled_p ())
4573 dump_printf_loc (MSG_NOTE
, vect_location
, "can narrow to %s:%d"
4574 " without affecting users: %G",
4575 TYPE_UNSIGNED (type
) ? "unsigned" : "signed",
4576 operation_precision
, stmt
);
4577 vect_set_operation_type (stmt_info
, type
, operation_precision
,
4580 vect_set_min_input_precision (stmt_info
, type
, min_input_precision
);
4583 /* Handle vect_determine_precisions for STMT_INFO, given that we
4584 have already done so for the users of its result. */
4587 vect_determine_stmt_precisions (stmt_vec_info stmt_info
)
4589 vect_determine_min_output_precision (stmt_info
);
4590 if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
4592 vect_determine_precisions_from_range (stmt_info
, stmt
);
4593 vect_determine_precisions_from_users (stmt_info
, stmt
);
4597 /* Walk backwards through the vectorizable region to determine the
4598 values of these fields:
4600 - min_output_precision
4601 - min_input_precision
4602 - operation_precision
4603 - operation_sign. */
4606 vect_determine_precisions (vec_info
*vinfo
)
4608 DUMP_VECT_SCOPE ("vect_determine_precisions");
4610 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
4612 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4613 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
4614 unsigned int nbbs
= loop
->num_nodes
;
4616 for (unsigned int i
= 0; i
< nbbs
; i
++)
4618 basic_block bb
= bbs
[nbbs
- i
- 1];
4619 for (gimple_stmt_iterator si
= gsi_last_bb (bb
);
4620 !gsi_end_p (si
); gsi_prev (&si
))
4621 vect_determine_stmt_precisions
4622 (vinfo
->lookup_stmt (gsi_stmt (si
)));
4627 bb_vec_info bb_vinfo
= as_a
<bb_vec_info
> (vinfo
);
4628 gimple_stmt_iterator si
= bb_vinfo
->region_end
;
4633 si
= gsi_last_bb (bb_vinfo
->bb
);
4636 stmt
= gsi_stmt (si
);
4637 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (stmt
);
4638 if (stmt_info
&& STMT_VINFO_VECTORIZABLE (stmt_info
))
4639 vect_determine_stmt_precisions (stmt_info
);
4641 while (stmt
!= gsi_stmt (bb_vinfo
->region_begin
));
4645 typedef gimple
*(*vect_recog_func_ptr
) (stmt_vec_info
, tree
*);
4647 struct vect_recog_func
4649 vect_recog_func_ptr fn
;
4653 /* Note that ordering matters - the first pattern matching on a stmt is
4654 taken which means usually the more complex one needs to preceed the
4655 less comples onex (widen_sum only after dot_prod or sad for example). */
4656 static vect_recog_func vect_vect_recog_func_ptrs
[] = {
4657 { vect_recog_over_widening_pattern
, "over_widening" },
4658 /* Must come after over_widening, which narrows the shift as much as
4659 possible beforehand. */
4660 { vect_recog_average_pattern
, "average" },
4661 { vect_recog_cast_forwprop_pattern
, "cast_forwprop" },
4662 { vect_recog_widen_mult_pattern
, "widen_mult" },
4663 { vect_recog_dot_prod_pattern
, "dot_prod" },
4664 { vect_recog_sad_pattern
, "sad" },
4665 { vect_recog_widen_sum_pattern
, "widen_sum" },
4666 { vect_recog_pow_pattern
, "pow" },
4667 { vect_recog_widen_shift_pattern
, "widen_shift" },
4668 { vect_recog_rotate_pattern
, "rotate" },
4669 { vect_recog_vector_vector_shift_pattern
, "vector_vector_shift" },
4670 { vect_recog_divmod_pattern
, "divmod" },
4671 { vect_recog_mult_pattern
, "mult" },
4672 { vect_recog_mixed_size_cond_pattern
, "mixed_size_cond" },
4673 { vect_recog_bool_pattern
, "bool" },
4674 /* This must come before mask conversion, and includes the parts
4675 of mask conversion that are needed for gather and scatter
4676 internal functions. */
4677 { vect_recog_gather_scatter_pattern
, "gather_scatter" },
4678 { vect_recog_mask_conversion_pattern
, "mask_conversion" }
4681 const unsigned int NUM_PATTERNS
= ARRAY_SIZE (vect_vect_recog_func_ptrs
);
4683 /* Mark statements that are involved in a pattern. */
4686 vect_mark_pattern_stmts (stmt_vec_info orig_stmt_info
, gimple
*pattern_stmt
,
4687 tree pattern_vectype
)
4689 gimple
*def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
4691 gimple
*orig_pattern_stmt
= NULL
;
4692 if (is_pattern_stmt_p (orig_stmt_info
))
4694 /* We're replacing a statement in an existing pattern definition
4696 orig_pattern_stmt
= orig_stmt_info
->stmt
;
4697 if (dump_enabled_p ())
4698 dump_printf_loc (MSG_NOTE
, vect_location
,
4699 "replacing earlier pattern %G", orig_pattern_stmt
);
4701 /* To keep the book-keeping simple, just swap the lhs of the
4702 old and new statements, so that the old one has a valid but
4704 tree old_lhs
= gimple_get_lhs (orig_pattern_stmt
);
4705 gimple_set_lhs (orig_pattern_stmt
, gimple_get_lhs (pattern_stmt
));
4706 gimple_set_lhs (pattern_stmt
, old_lhs
);
4708 if (dump_enabled_p ())
4709 dump_printf_loc (MSG_NOTE
, vect_location
, "with %G", pattern_stmt
);
4711 /* Switch to the statement that ORIG replaces. */
4712 orig_stmt_info
= STMT_VINFO_RELATED_STMT (orig_stmt_info
);
4714 /* We shouldn't be replacing the main pattern statement. */
4715 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info
)->stmt
4716 != orig_pattern_stmt
);
4720 for (gimple_stmt_iterator si
= gsi_start (def_seq
);
4721 !gsi_end_p (si
); gsi_next (&si
))
4722 vect_init_pattern_stmt (gsi_stmt (si
), orig_stmt_info
, pattern_vectype
);
4724 if (orig_pattern_stmt
)
4726 vect_init_pattern_stmt (pattern_stmt
, orig_stmt_info
, pattern_vectype
);
4728 /* Insert all the new pattern statements before the original one. */
4729 gimple_seq
*orig_def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
4730 gimple_stmt_iterator gsi
= gsi_for_stmt (orig_pattern_stmt
,
4732 gsi_insert_seq_before_without_update (&gsi
, def_seq
, GSI_SAME_STMT
);
4733 gsi_insert_before_without_update (&gsi
, pattern_stmt
, GSI_SAME_STMT
);
4735 /* Remove the pattern statement that this new pattern replaces. */
4736 gsi_remove (&gsi
, false);
4739 vect_set_pattern_stmt (pattern_stmt
, orig_stmt_info
, pattern_vectype
);
4742 /* Function vect_pattern_recog_1
4745 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4746 computation pattern.
4747 STMT_INFO: A stmt from which the pattern search should start.
4749 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
4750 a sequence of statements that has the same functionality and can be
4751 used to replace STMT_INFO. It returns the last statement in the sequence
4752 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
4753 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
4754 statement, having first checked that the target supports the new operation
4757 This function also does some bookkeeping, as explained in the documentation
4758 for vect_recog_pattern. */
4761 vect_pattern_recog_1 (vect_recog_func
*recog_func
, stmt_vec_info stmt_info
)
4763 vec_info
*vinfo
= stmt_info
->vinfo
;
4764 gimple
*pattern_stmt
;
4765 loop_vec_info loop_vinfo
;
4766 tree pattern_vectype
;
4768 /* If this statement has already been replaced with pattern statements,
4769 leave the original statement alone, since the first match wins.
4770 Instead try to match against the definition statements that feed
4771 the main pattern statement. */
4772 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
4774 gimple_stmt_iterator gsi
;
4775 for (gsi
= gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
4776 !gsi_end_p (gsi
); gsi_next (&gsi
))
4777 vect_pattern_recog_1 (recog_func
, vinfo
->lookup_stmt (gsi_stmt (gsi
)));
4781 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
4782 pattern_stmt
= recog_func
->fn (stmt_info
, &pattern_vectype
);
4785 /* Clear any half-formed pattern definition sequence. */
4786 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
4790 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4791 gcc_assert (pattern_vectype
);
4793 /* Found a vectorizable pattern. */
4794 if (dump_enabled_p ())
4795 dump_printf_loc (MSG_NOTE
, vect_location
,
4796 "%s pattern recognized: %G",
4797 recog_func
->name
, pattern_stmt
);
4799 /* Mark the stmts that are involved in the pattern. */
4800 vect_mark_pattern_stmts (stmt_info
, pattern_stmt
, pattern_vectype
);
4802 /* Patterns cannot be vectorized using SLP, because they change the order of
4807 stmt_vec_info
*elem_ptr
;
4808 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo
), ix
, ix2
,
4809 elem_ptr
, *elem_ptr
== stmt_info
);
4814 /* Function vect_pattern_recog
4817 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4820 Output - for each computation idiom that is detected we create a new stmt
4821 that provides the same functionality and that can be vectorized. We
4822 also record some information in the struct_stmt_info of the relevant
4823 stmts, as explained below:
4825 At the entry to this function we have the following stmts, with the
4826 following initial value in the STMT_VINFO fields:
4828 stmt in_pattern_p related_stmt vec_stmt
4829 S1: a_i = .... - - -
4830 S2: a_2 = ..use(a_i).. - - -
4831 S3: a_1 = ..use(a_2).. - - -
4832 S4: a_0 = ..use(a_1).. - - -
4833 S5: ... = ..use(a_0).. - - -
4835 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4836 represented by a single stmt. We then:
4837 - create a new stmt S6 equivalent to the pattern (the stmt is not
4838 inserted into the code)
4839 - fill in the STMT_VINFO fields as follows:
4841 in_pattern_p related_stmt vec_stmt
4842 S1: a_i = .... - - -
4843 S2: a_2 = ..use(a_i).. - - -
4844 S3: a_1 = ..use(a_2).. - - -
4845 S4: a_0 = ..use(a_1).. true S6 -
4846 '---> S6: a_new = .... - S4 -
4847 S5: ... = ..use(a_0).. - - -
4849 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4850 to each other through the RELATED_STMT field).
4852 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4853 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4854 remain irrelevant unless used by stmts other than S4.
4856 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4857 (because they are marked as irrelevant). It will vectorize S6, and record
4858 a pointer to the new vector stmt VS6 from S6 (as usual).
4859 S4 will be skipped, and S5 will be vectorized as usual:
4861 in_pattern_p related_stmt vec_stmt
4862 S1: a_i = .... - - -
4863 S2: a_2 = ..use(a_i).. - - -
4864 S3: a_1 = ..use(a_2).. - - -
4865 > VS6: va_new = .... - - -
4866 S4: a_0 = ..use(a_1).. true S6 VS6
4867 '---> S6: a_new = .... - S4 VS6
4868 > VS5: ... = ..vuse(va_new).. - - -
4869 S5: ... = ..use(a_0).. - - -
4871 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4872 elsewhere), and we'll end up with:
4875 VS5: ... = ..vuse(va_new)..
4877 In case of more than one pattern statements, e.g., widen-mult with
4881 S2 a_T = (TYPE) a_t;
4882 '--> S3: a_it = (interm_type) a_t;
4883 S4 prod_T = a_T * CONST;
4884 '--> S5: prod_T' = a_it w* CONST;
4886 there may be other users of a_T outside the pattern. In that case S2 will
4887 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4888 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4889 be recorded in S3. */
4892 vect_pattern_recog (vec_info
*vinfo
)
4897 gimple_stmt_iterator si
;
4900 vect_determine_precisions (vinfo
);
4902 DUMP_VECT_SCOPE ("vect_pattern_recog");
4904 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
4906 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4907 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
4908 nbbs
= loop
->num_nodes
;
4910 /* Scan through the loop stmts, applying the pattern recognition
4911 functions starting at each stmt visited: */
4912 for (i
= 0; i
< nbbs
; i
++)
4914 basic_block bb
= bbs
[i
];
4915 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
4917 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi_stmt (si
));
4918 /* Scan over all generic vect_recog_xxx_pattern functions. */
4919 for (j
= 0; j
< NUM_PATTERNS
; j
++)
4920 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs
[j
],
4927 bb_vec_info bb_vinfo
= as_a
<bb_vec_info
> (vinfo
);
4928 for (si
= bb_vinfo
->region_begin
;
4929 gsi_stmt (si
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&si
))
4931 gimple
*stmt
= gsi_stmt (si
);
4932 stmt_vec_info stmt_info
= bb_vinfo
->lookup_stmt (stmt
);
4933 if (stmt_info
&& !STMT_VINFO_VECTORIZABLE (stmt_info
))
4936 /* Scan over all generic vect_recog_xxx_pattern functions. */
4937 for (j
= 0; j
< NUM_PATTERNS
; j
++)
4938 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs
[j
], stmt_info
);