1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
37 #include "gimple-iterator.h"
39 #include "tree-vectorizer.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
47 #include "omp-simd-clone.h"
50 /* Pattern recognition functions */
51 static gimple
*vect_recog_widen_sum_pattern (vec
<gimple
*> *, tree
*,
53 static gimple
*vect_recog_widen_mult_pattern (vec
<gimple
*> *, tree
*,
55 static gimple
*vect_recog_dot_prod_pattern (vec
<gimple
*> *, tree
*,
57 static gimple
*vect_recog_sad_pattern (vec
<gimple
*> *, tree
*,
59 static gimple
*vect_recog_pow_pattern (vec
<gimple
*> *, tree
*, tree
*);
60 static gimple
*vect_recog_over_widening_pattern (vec
<gimple
*> *, tree
*,
62 static gimple
*vect_recog_widen_shift_pattern (vec
<gimple
*> *,
64 static gimple
*vect_recog_rotate_pattern (vec
<gimple
*> *, tree
*, tree
*);
65 static gimple
*vect_recog_vector_vector_shift_pattern (vec
<gimple
*> *,
67 static gimple
*vect_recog_divmod_pattern (vec
<gimple
*> *,
70 static gimple
*vect_recog_mult_pattern (vec
<gimple
*> *,
73 static gimple
*vect_recog_mixed_size_cond_pattern (vec
<gimple
*> *,
75 static gimple
*vect_recog_bool_pattern (vec
<gimple
*> *, tree
*, tree
*);
76 static gimple
*vect_recog_mask_conversion_pattern (vec
<gimple
*> *, tree
*, tree
*);
77 static gimple
*vect_recog_gather_scatter_pattern (vec
<gimple
*> *, tree
*,
80 struct vect_recog_func
82 vect_recog_func_ptr fn
;
86 /* Note that ordering matters - the first pattern matching on a stmt
87 is taken which means usually the more complex one needs to preceed
88 the less comples onex (widen_sum only after dot_prod or sad for example). */
89 static vect_recog_func vect_vect_recog_func_ptrs
[NUM_PATTERNS
] = {
90 { vect_recog_widen_mult_pattern
, "widen_mult" },
91 { vect_recog_dot_prod_pattern
, "dot_prod" },
92 { vect_recog_sad_pattern
, "sad" },
93 { vect_recog_widen_sum_pattern
, "widen_sum" },
94 { vect_recog_pow_pattern
, "pow" },
95 { vect_recog_widen_shift_pattern
, "widen_shift" },
96 { vect_recog_over_widening_pattern
, "over_widening" },
97 { vect_recog_rotate_pattern
, "rotate" },
98 { vect_recog_vector_vector_shift_pattern
, "vector_vector_shift" },
99 { vect_recog_divmod_pattern
, "divmod" },
100 { vect_recog_mult_pattern
, "mult" },
101 { vect_recog_mixed_size_cond_pattern
, "mixed_size_cond" },
102 { vect_recog_bool_pattern
, "bool" },
103 /* This must come before mask conversion, and includes the parts
104 of mask conversion that are needed for gather and scatter
105 internal functions. */
106 { vect_recog_gather_scatter_pattern
, "gather_scatter" },
107 { vect_recog_mask_conversion_pattern
, "mask_conversion" }
110 /* Report that we've found an instance of pattern PATTERN in
114 vect_pattern_detected (const char *name
, gimple
*stmt
)
116 if (dump_enabled_p ())
118 dump_printf_loc (MSG_NOTE
, vect_location
, "%s: detected: ", name
);
119 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
124 append_pattern_def_seq (stmt_vec_info stmt_info
, gimple
*stmt
)
126 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
131 new_pattern_def_seq (stmt_vec_info stmt_info
, gimple
*stmt
)
133 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
134 append_pattern_def_seq (stmt_info
, stmt
);
137 /* Check whether STMT2 is in the same loop or basic block as STMT1.
138 Which of the two applies depends on whether we're currently doing
139 loop-based or basic-block-based vectorization, as determined by
140 the vinfo_for_stmt for STMT1 (which must be defined).
142 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
143 to be defined as well. */
146 vect_same_loop_or_bb_p (gimple
*stmt1
, gimple
*stmt2
)
148 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt1
);
149 return vect_stmt_in_region_p (stmt_vinfo
->vinfo
, stmt2
);
152 /* If the LHS of DEF_STMT has a single use, and that statement is
153 in the same loop or basic block, return it. */
156 vect_single_imm_use (gimple
*def_stmt
)
158 tree lhs
= gimple_assign_lhs (def_stmt
);
162 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
165 if (!vect_same_loop_or_bb_p (def_stmt
, use_stmt
))
171 /* If OP is defined by a statement that's being considered for vectorization,
172 return information about that statement, otherwise return NULL. */
175 vect_get_internal_def (vec_info
*vinfo
, tree op
)
179 if (TREE_CODE (op
) != SSA_NAME
180 || !vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
)
181 || dt
!= vect_internal_def
)
184 return vinfo_for_stmt (def_stmt
);
187 /* Check whether NAME, an ssa-name used in USE_STMT,
188 is a result of a type promotion, such that:
189 DEF_STMT: NAME = NOP (name0)
190 If CHECK_SIGN is TRUE, check that either both types are signed or both are
194 type_conversion_p (tree name
, gimple
*use_stmt
, bool check_sign
,
195 tree
*orig_type
, gimple
**def_stmt
, bool *promotion
)
197 gimple
*dummy_gimple
;
198 stmt_vec_info stmt_vinfo
;
199 tree type
= TREE_TYPE (name
);
201 enum vect_def_type dt
;
203 stmt_vinfo
= vinfo_for_stmt (use_stmt
);
204 if (!vect_is_simple_use (name
, stmt_vinfo
->vinfo
, def_stmt
, &dt
))
207 if (dt
!= vect_internal_def
208 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
214 if (dt
== vect_internal_def
)
216 stmt_vec_info def_vinfo
= vinfo_for_stmt (*def_stmt
);
217 if (STMT_VINFO_IN_PATTERN_P (def_vinfo
))
221 if (!is_gimple_assign (*def_stmt
))
224 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt
)))
227 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
229 *orig_type
= TREE_TYPE (oprnd0
);
230 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*orig_type
)
231 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*orig_type
)) && check_sign
))
234 if (TYPE_PRECISION (type
) >= (TYPE_PRECISION (*orig_type
) * 2))
239 if (!vect_is_simple_use (oprnd0
, stmt_vinfo
->vinfo
, &dummy_gimple
, &dt
))
245 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
246 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
249 vect_recog_temp_ssa_var (tree type
, gimple
*stmt
)
251 return make_temp_ssa_name (type
, stmt
, "patt");
254 /* Return true if STMT_VINFO describes a reduction for which reassociation
255 is allowed. If STMT_INFO is part of a group, assume that it's part of
256 a reduction chain and optimistically assume that all statements
257 except the last allow reassociation. */
260 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo
)
262 return (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
263 ? STMT_VINFO_REDUC_TYPE (stmt_vinfo
) != FOLD_LEFT_REDUCTION
264 : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo
) != NULL
);
267 /* Function vect_recog_dot_prod_pattern
269 Try to find the following pattern:
275 sum_0 = phi <init, sum_1>
278 S3 x_T = (TYPE1) x_t;
279 S4 y_T = (TYPE1) y_t;
281 [S6 prod = (TYPE2) prod; #optional]
282 S7 sum_1 = prod + sum_0;
284 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
285 same size of 'TYPE1' or bigger. This is a special case of a reduction
290 * STMTS: Contains a stmt from which the pattern search begins. In the
291 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
296 * TYPE_IN: The type of the input arguments to the pattern.
298 * TYPE_OUT: The type of the output of this pattern.
300 * Return value: A new stmt that will be used to replace the sequence of
301 stmts that constitute the pattern. In this case it will be:
302 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
304 Note: The dot-prod idiom is a widening reduction pattern that is
305 vectorized without preserving all the intermediate results. It
306 produces only N/2 (widened) results (by summing up pairs of
307 intermediate results) rather than all N results. Therefore, we
308 cannot allow this pattern when we want to get all the results and in
309 the correct order (as is the case when this computation is in an
310 inner-loop nested in an outer-loop that us being vectorized). */
313 vect_recog_dot_prod_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
316 gimple
*stmt
, *last_stmt
= (*stmts
)[0];
318 tree oprnd00
, oprnd01
;
319 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
320 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
321 tree type
, half_type
;
322 gimple
*pattern_stmt
;
324 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
332 loop
= LOOP_VINFO_LOOP (loop_info
);
334 /* We don't allow changing the order of the computation in the inner-loop
335 when doing outer-loop vectorization. */
336 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
339 if (!is_gimple_assign (last_stmt
))
342 type
= gimple_expr_type (last_stmt
);
344 /* Look for the following pattern
348 DDPROD = (TYPE2) DPROD;
349 sum_1 = DDPROD + sum_0;
351 - DX is double the size of X
352 - DY is double the size of Y
353 - DX, DY, DPROD all have the same type
354 - sum is the same size of DPROD or bigger
355 - sum has been recognized as a reduction variable.
357 This is equivalent to:
358 DPROD = X w* Y; #widen mult
359 sum_1 = DPROD w+ sum_0; #widen summation
361 DPROD = X w* Y; #widen mult
362 sum_1 = DPROD + sum_0; #summation
365 /* Starting from LAST_STMT, follow the defs of its uses in search
366 of the above pattern. */
368 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
371 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
374 if (!vect_reassociating_reduction_p (stmt_vinfo
))
377 oprnd0
= gimple_assign_rhs1 (last_stmt
);
378 oprnd1
= gimple_assign_rhs2 (last_stmt
);
382 if (type_conversion_p (oprnd0
, stmt
, true, &half_type
, &def_stmt
,
387 oprnd0
= gimple_assign_rhs1 (stmt
);
392 /* So far so good. Since last_stmt was detected as a (summation) reduction,
393 we know that oprnd1 is the reduction variable (defined by a loop-header
394 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
395 Left to check that oprnd0 is defined by a (widen_)mult_expr */
396 if (TREE_CODE (oprnd0
) != SSA_NAME
)
399 prod_type
= half_type
;
400 stmt_vec_info mult_vinfo
= vect_get_internal_def (vinfo
, oprnd0
);
404 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
405 inside the loop (in case we are analyzing an outer-loop). */
406 gassign
*mult
= dyn_cast
<gassign
*> (mult_vinfo
->stmt
);
407 if (!mult
|| gimple_assign_rhs_code (mult
) != MULT_EXPR
)
409 if (STMT_VINFO_IN_PATTERN_P (mult_vinfo
))
411 /* Has been detected as a widening multiplication? */
413 mult
= dyn_cast
<gassign
*> (STMT_VINFO_RELATED_STMT (mult_vinfo
));
414 if (!mult
|| gimple_assign_rhs_code (mult
) != WIDEN_MULT_EXPR
)
416 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
)
417 = STMT_VINFO_PATTERN_DEF_SEQ (mult_vinfo
);
418 mult_vinfo
= vinfo_for_stmt (mult
);
419 gcc_assert (mult_vinfo
);
420 gcc_assert (STMT_VINFO_DEF_TYPE (mult_vinfo
) == vect_internal_def
);
421 oprnd00
= gimple_assign_rhs1 (mult
);
422 oprnd01
= gimple_assign_rhs2 (mult
);
426 tree half_type0
, half_type1
;
430 oprnd0
= gimple_assign_rhs1 (mult
);
431 oprnd1
= gimple_assign_rhs2 (mult
);
432 if (!type_conversion_p (oprnd0
, mult
, true, &half_type0
, &def_stmt
,
436 oprnd00
= gimple_assign_rhs1 (def_stmt
);
437 if (!type_conversion_p (oprnd1
, mult
, true, &half_type1
, &def_stmt
,
441 oprnd01
= gimple_assign_rhs1 (def_stmt
);
442 if (!types_compatible_p (half_type0
, half_type1
))
444 if (TYPE_PRECISION (prod_type
) != TYPE_PRECISION (half_type0
) * 2)
448 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt
);
450 half_type
= TREE_TYPE (oprnd00
);
451 *type_in
= half_type
;
454 var
= vect_recog_temp_ssa_var (type
, NULL
);
455 pattern_stmt
= gimple_build_assign (var
, DOT_PROD_EXPR
,
456 oprnd00
, oprnd01
, oprnd1
);
462 /* Function vect_recog_sad_pattern
464 Try to find the following Sum of Absolute Difference (SAD) pattern:
467 signed TYPE1 diff, abs_diff;
470 sum_0 = phi <init, sum_1>
473 S3 x_T = (TYPE1) x_t;
474 S4 y_T = (TYPE1) y_t;
476 S6 abs_diff = ABS_EXPR <diff>;
477 [S7 abs_diff = (TYPE2) abs_diff; #optional]
478 S8 sum_1 = abs_diff + sum_0;
480 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
481 same size of 'TYPE1' or bigger. This is a special case of a reduction
486 * STMTS: Contains a stmt from which the pattern search begins. In the
487 example, when this function is called with S8, the pattern
488 {S3,S4,S5,S6,S7,S8} will be detected.
492 * TYPE_IN: The type of the input arguments to the pattern.
494 * TYPE_OUT: The type of the output of this pattern.
496 * Return value: A new stmt that will be used to replace the sequence of
497 stmts that constitute the pattern. In this case it will be:
498 SAD_EXPR <x_t, y_t, sum_0>
502 vect_recog_sad_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
505 gimple
*last_stmt
= (*stmts
)[0];
506 tree sad_oprnd0
, sad_oprnd1
;
507 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
508 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
510 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
517 loop
= LOOP_VINFO_LOOP (loop_info
);
519 /* We don't allow changing the order of the computation in the inner-loop
520 when doing outer-loop vectorization. */
521 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
524 if (!is_gimple_assign (last_stmt
))
527 tree sum_type
= gimple_expr_type (last_stmt
);
529 /* Look for the following pattern
533 DAD = ABS_EXPR <DDIFF>;
534 DDPROD = (TYPE2) DPROD;
537 - DX is at least double the size of X
538 - DY is at least double the size of Y
539 - DX, DY, DDIFF, DAD all have the same type
540 - sum is the same size of DAD or bigger
541 - sum has been recognized as a reduction variable.
543 This is equivalent to:
544 DDIFF = X w- Y; #widen sub
545 DAD = ABS_EXPR <DDIFF>;
546 sum_1 = DAD w+ sum_0; #widen summation
548 DDIFF = X w- Y; #widen sub
549 DAD = ABS_EXPR <DDIFF>;
550 sum_1 = DAD + sum_0; #summation
553 /* Starting from LAST_STMT, follow the defs of its uses in search
554 of the above pattern. */
556 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
559 tree plus_oprnd0
, plus_oprnd1
;
561 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
564 if (!vect_reassociating_reduction_p (stmt_vinfo
))
567 plus_oprnd0
= gimple_assign_rhs1 (last_stmt
);
568 plus_oprnd1
= gimple_assign_rhs2 (last_stmt
);
570 /* The type conversion could be promotion, demotion,
571 or just signed -> unsigned. */
573 if (type_conversion_p (plus_oprnd0
, last_stmt
, false,
574 &half_type
, &def_stmt
, &promotion
))
575 plus_oprnd0
= gimple_assign_rhs1 (def_stmt
);
577 half_type
= sum_type
;
579 /* So far so good. Since last_stmt was detected as a (summation) reduction,
580 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
581 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
582 Then check that plus_oprnd0 is defined by an abs_expr. */
584 if (TREE_CODE (plus_oprnd0
) != SSA_NAME
)
587 tree abs_type
= half_type
;
588 stmt_vec_info abs_stmt_vinfo
= vect_get_internal_def (vinfo
, plus_oprnd0
);
592 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
593 inside the loop (in case we are analyzing an outer-loop). */
594 gassign
*abs_stmt
= dyn_cast
<gassign
*> (abs_stmt_vinfo
->stmt
);
596 || (gimple_assign_rhs_code (abs_stmt
) != ABS_EXPR
597 && gimple_assign_rhs_code (abs_stmt
) != ABSU_EXPR
))
600 tree abs_oprnd
= gimple_assign_rhs1 (abs_stmt
);
601 if (TYPE_UNSIGNED (abs_type
))
604 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
606 if (TREE_CODE (abs_oprnd
) != SSA_NAME
)
609 stmt_vec_info diff_stmt_vinfo
= vect_get_internal_def (vinfo
, abs_oprnd
);
610 if (!diff_stmt_vinfo
)
613 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
614 inside the loop (in case we are analyzing an outer-loop). */
615 gassign
*diff_stmt
= dyn_cast
<gassign
*> (diff_stmt_vinfo
->stmt
);
616 if (!diff_stmt
|| gimple_assign_rhs_code (diff_stmt
) != MINUS_EXPR
)
619 tree half_type0
, half_type1
;
621 tree minus_oprnd0
= gimple_assign_rhs1 (diff_stmt
);
622 tree minus_oprnd1
= gimple_assign_rhs2 (diff_stmt
);
624 if (!type_conversion_p (minus_oprnd0
, diff_stmt
, false,
625 &half_type0
, &def_stmt
, &promotion
)
628 sad_oprnd0
= gimple_assign_rhs1 (def_stmt
);
630 if (!type_conversion_p (minus_oprnd1
, diff_stmt
, false,
631 &half_type1
, &def_stmt
, &promotion
)
634 sad_oprnd1
= gimple_assign_rhs1 (def_stmt
);
636 if (!types_compatible_p (half_type0
, half_type1
))
638 if (TYPE_PRECISION (abs_type
) < TYPE_PRECISION (half_type0
) * 2
639 || TYPE_PRECISION (sum_type
) < TYPE_PRECISION (half_type0
) * 2)
642 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt
);
644 *type_in
= TREE_TYPE (sad_oprnd0
);
645 *type_out
= sum_type
;
647 tree var
= vect_recog_temp_ssa_var (sum_type
, NULL
);
648 gimple
*pattern_stmt
= gimple_build_assign (var
, SAD_EXPR
, sad_oprnd0
,
649 sad_oprnd1
, plus_oprnd1
);
655 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
658 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
659 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
661 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
662 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
663 that satisfies the above restrictions, we can perform a widening opeartion
664 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
665 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
668 vect_handle_widen_op_by_const (gimple
*stmt
, enum tree_code code
,
669 tree const_oprnd
, tree
*oprnd
,
670 gimple
**wstmt
, tree type
,
671 tree
*half_type
, gimple
*def_stmt
)
673 tree new_type
, new_oprnd
;
675 if (code
!= MULT_EXPR
&& code
!= LSHIFT_EXPR
)
678 if (((code
== MULT_EXPR
&& int_fits_type_p (const_oprnd
, *half_type
))
679 || (code
== LSHIFT_EXPR
680 && compare_tree_int (const_oprnd
, TYPE_PRECISION (*half_type
))
682 && TYPE_PRECISION (type
) == (TYPE_PRECISION (*half_type
) * 2))
684 /* CONST_OPRND is a constant of HALF_TYPE. */
685 *oprnd
= gimple_assign_rhs1 (def_stmt
);
689 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (*half_type
) * 4))
692 if (!vect_same_loop_or_bb_p (stmt
, def_stmt
))
695 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
696 a type 2 times bigger than HALF_TYPE. */
697 new_type
= build_nonstandard_integer_type (TYPE_PRECISION (type
) / 2,
698 TYPE_UNSIGNED (type
));
699 if ((code
== MULT_EXPR
&& !int_fits_type_p (const_oprnd
, new_type
))
700 || (code
== LSHIFT_EXPR
701 && compare_tree_int (const_oprnd
, TYPE_PRECISION (new_type
)) == 1))
704 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
705 *oprnd
= gimple_assign_rhs1 (def_stmt
);
706 new_oprnd
= make_ssa_name (new_type
);
707 *wstmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, *oprnd
);
710 *half_type
= new_type
;
715 /* Function vect_recog_widen_mult_pattern
717 Try to find the following pattern:
721 TYPE a_T, b_T, prod_T;
727 S5 prod_T = a_T * b_T;
729 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
731 Also detect unsigned cases:
735 unsigned TYPE u_prod_T;
736 TYPE a_T, b_T, prod_T;
742 S5 prod_T = a_T * b_T;
743 S6 u_prod_T = (unsigned TYPE) prod_T;
745 and multiplication by constants:
752 S5 prod_T = a_T * CONST;
754 A special case of multiplication by constants is when 'TYPE' is 4 times
755 bigger than 'type', but CONST fits an intermediate type 2 times smaller
756 than 'TYPE'. In that case we create an additional pattern stmt for S3
757 to create a variable of the intermediate type, and perform widen-mult
758 on the intermediate type as well:
762 TYPE a_T, prod_T, prod_T';
766 '--> a_it = (interm_type) a_t;
767 S5 prod_T = a_T * CONST;
768 '--> prod_T' = a_it w* CONST;
772 * STMTS: Contains a stmt from which the pattern search begins. In the
773 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
774 is detected. In case of unsigned widen-mult, the original stmt (S5) is
775 replaced with S6 in STMTS. In case of multiplication by a constant
776 of an intermediate type (the last case above), STMTS also contains S3
777 (inserted before S5).
781 * TYPE_IN: The type of the input arguments to the pattern.
783 * TYPE_OUT: The type of the output of this pattern.
785 * Return value: A new stmt that will be used to replace the sequence of
786 stmts that constitute the pattern. In this case it will be:
787 WIDEN_MULT <a_t, b_t>
788 If the result of WIDEN_MULT needs to be converted to a larger type, the
789 returned stmt will be this type conversion stmt.
793 vect_recog_widen_mult_pattern (vec
<gimple
*> *stmts
,
794 tree
*type_in
, tree
*type_out
)
796 gimple
*last_stmt
= stmts
->pop ();
797 gimple
*def_stmt0
, *def_stmt1
;
799 tree type
, half_type0
, half_type1
;
800 gimple
*new_stmt
= NULL
, *pattern_stmt
= NULL
;
801 tree vectype
, vecitype
;
803 enum tree_code dummy_code
;
809 if (!is_gimple_assign (last_stmt
))
812 type
= gimple_expr_type (last_stmt
);
814 /* Starting from LAST_STMT, follow the defs of its uses in search
815 of the above pattern. */
817 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
820 oprnd0
= gimple_assign_rhs1 (last_stmt
);
821 oprnd1
= gimple_assign_rhs2 (last_stmt
);
823 /* Check argument 0. */
824 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
828 /* Check argument 1. */
829 op1_ok
= type_conversion_p (oprnd1
, last_stmt
, false, &half_type1
,
830 &def_stmt1
, &promotion
);
832 if (op1_ok
&& promotion
)
834 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
835 oprnd1
= gimple_assign_rhs1 (def_stmt1
);
839 if (TREE_CODE (oprnd1
) == INTEGER_CST
840 && TREE_CODE (half_type0
) == INTEGER_TYPE
841 && vect_handle_widen_op_by_const (last_stmt
, MULT_EXPR
, oprnd1
,
842 &oprnd0
, &new_stmt
, type
,
843 &half_type0
, def_stmt0
))
845 half_type1
= half_type0
;
846 oprnd1
= fold_convert (half_type1
, oprnd1
);
852 /* If the two arguments have different sizes, convert the one with
853 the smaller type into the larger type. */
854 if (TYPE_PRECISION (half_type0
) != TYPE_PRECISION (half_type1
))
856 /* If we already used up the single-stmt slot give up. */
861 gimple
*def_stmt
= NULL
;
863 if (TYPE_PRECISION (half_type0
) < TYPE_PRECISION (half_type1
))
865 def_stmt
= def_stmt0
;
866 half_type0
= half_type1
;
871 def_stmt
= def_stmt1
;
872 half_type1
= half_type0
;
876 tree old_oprnd
= gimple_assign_rhs1 (def_stmt
);
877 tree new_oprnd
= make_ssa_name (half_type0
);
878 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, old_oprnd
);
882 /* Handle unsigned case. Look for
883 S6 u_prod_T = (unsigned TYPE) prod_T;
884 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
885 if (TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type0
))
891 if (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (half_type1
))
894 use_stmt
= vect_single_imm_use (last_stmt
);
895 if (!use_stmt
|| !is_gimple_assign (use_stmt
)
896 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
899 use_lhs
= gimple_assign_lhs (use_stmt
);
900 use_type
= TREE_TYPE (use_lhs
);
901 if (!INTEGRAL_TYPE_P (use_type
)
902 || (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (use_type
))
903 || (TYPE_PRECISION (type
) != TYPE_PRECISION (use_type
)))
907 last_stmt
= use_stmt
;
910 if (!types_compatible_p (half_type0
, half_type1
))
913 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
914 to get an intermediate result of type ITYPE. In this case we need
915 to build a statement to convert this intermediate result to type TYPE. */
917 if (TYPE_PRECISION (type
) > TYPE_PRECISION (half_type0
) * 2)
918 itype
= build_nonstandard_integer_type
919 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (half_type0
)) * 2,
920 TYPE_UNSIGNED (type
));
922 /* Pattern detected. */
923 vect_pattern_detected ("vect_recog_widen_mult_pattern", last_stmt
);
925 /* Check target support */
926 vectype
= get_vectype_for_scalar_type (half_type0
);
927 vecitype
= get_vectype_for_scalar_type (itype
);
930 || !supportable_widening_operation (WIDEN_MULT_EXPR
, last_stmt
,
932 &dummy_code
, &dummy_code
,
933 &dummy_int
, &dummy_vec
))
937 *type_out
= get_vectype_for_scalar_type (type
);
939 /* Pattern supported. Create a stmt to be used to replace the pattern: */
940 var
= vect_recog_temp_ssa_var (itype
, NULL
);
941 pattern_stmt
= gimple_build_assign (var
, WIDEN_MULT_EXPR
, oprnd0
, oprnd1
);
943 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
944 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
946 /* If the original two operands have different sizes, we may need to convert
947 the smaller one into the larget type. If this is the case, at this point
948 the new stmt is already built. */
951 append_pattern_def_seq (stmt_vinfo
, new_stmt
);
952 stmt_vec_info new_stmt_info
953 = new_stmt_vec_info (new_stmt
, stmt_vinfo
->vinfo
);
954 set_vinfo_for_stmt (new_stmt
, new_stmt_info
);
955 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
958 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
959 the result of the widen-mult operation into type TYPE. */
962 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
963 stmt_vec_info pattern_stmt_info
964 = new_stmt_vec_info (pattern_stmt
, stmt_vinfo
->vinfo
);
965 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
966 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vecitype
;
967 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
969 gimple_assign_lhs (pattern_stmt
));
972 stmts
->safe_push (last_stmt
);
977 /* Function vect_recog_pow_pattern
979 Try to find the following pattern:
983 with POW being one of pow, powf, powi, powif and N being
988 * LAST_STMT: A stmt from which the pattern search begins.
992 * TYPE_IN: The type of the input arguments to the pattern.
994 * TYPE_OUT: The type of the output of this pattern.
996 * Return value: A new stmt that will be used to replace the sequence of
997 stmts that constitute the pattern. In this case it will be:
1004 vect_recog_pow_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
1007 gimple
*last_stmt
= (*stmts
)[0];
1012 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
1015 switch (gimple_call_combined_fn (last_stmt
))
1025 base
= gimple_call_arg (last_stmt
, 0);
1026 exp
= gimple_call_arg (last_stmt
, 1);
1027 if (TREE_CODE (exp
) != REAL_CST
1028 && TREE_CODE (exp
) != INTEGER_CST
)
1030 if (flag_unsafe_math_optimizations
1031 && TREE_CODE (base
) == REAL_CST
1032 && !gimple_call_internal_p (last_stmt
))
1034 combined_fn log_cfn
;
1035 built_in_function exp_bfn
;
1036 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt
)))
1039 log_cfn
= CFN_BUILT_IN_LOG
;
1040 exp_bfn
= BUILT_IN_EXP
;
1043 log_cfn
= CFN_BUILT_IN_LOGF
;
1044 exp_bfn
= BUILT_IN_EXPF
;
1047 log_cfn
= CFN_BUILT_IN_LOGL
;
1048 exp_bfn
= BUILT_IN_EXPL
;
1053 tree logc
= fold_const_call (log_cfn
, TREE_TYPE (base
), base
);
1054 tree exp_decl
= builtin_decl_implicit (exp_bfn
);
1055 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1056 does that, but if C is a power of 2, we want to use
1057 exp2 (log2 (C) * x) in the non-vectorized version, but for
1058 vectorization we don't have vectorized exp2. */
1060 && TREE_CODE (logc
) == REAL_CST
1062 && lookup_attribute ("omp declare simd",
1063 DECL_ATTRIBUTES (exp_decl
)))
1065 cgraph_node
*node
= cgraph_node::get_create (exp_decl
);
1066 if (node
->simd_clones
== NULL
)
1068 if (targetm
.simd_clone
.compute_vecsize_and_simdlen
== NULL
1069 || node
->definition
)
1071 expand_simd_clones (node
);
1072 if (node
->simd_clones
== NULL
)
1075 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1076 tree def
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1077 gimple
*g
= gimple_build_assign (def
, MULT_EXPR
, exp
, logc
);
1078 new_pattern_def_seq (stmt_vinfo
, g
);
1079 *type_in
= TREE_TYPE (base
);
1080 *type_out
= NULL_TREE
;
1081 tree res
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1082 g
= gimple_build_call (exp_decl
, 1, def
);
1083 gimple_call_set_lhs (g
, res
);
1091 /* We now have a pow or powi builtin function call with a constant
1094 *type_out
= NULL_TREE
;
1096 /* Catch squaring. */
1097 if ((tree_fits_shwi_p (exp
)
1098 && tree_to_shwi (exp
) == 2)
1099 || (TREE_CODE (exp
) == REAL_CST
1100 && real_equal (&TREE_REAL_CST (exp
), &dconst2
)))
1102 *type_in
= TREE_TYPE (base
);
1104 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1105 stmt
= gimple_build_assign (var
, MULT_EXPR
, base
, base
);
1109 /* Catch square root. */
1110 if (TREE_CODE (exp
) == REAL_CST
1111 && real_equal (&TREE_REAL_CST (exp
), &dconsthalf
))
1113 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (base
));
1115 && direct_internal_fn_supported_p (IFN_SQRT
, *type_in
,
1116 OPTIMIZE_FOR_SPEED
))
1118 gcall
*stmt
= gimple_build_call_internal (IFN_SQRT
, 1, base
);
1119 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
1120 gimple_call_set_lhs (stmt
, var
);
1121 gimple_call_set_nothrow (stmt
, true);
1130 /* Function vect_recog_widen_sum_pattern
1132 Try to find the following pattern:
1135 TYPE x_T, sum = init;
1137 sum_0 = phi <init, sum_1>
1139 S2 x_T = (TYPE) x_t;
1140 S3 sum_1 = x_T + sum_0;
1142 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1143 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1144 a special case of a reduction computation.
1148 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1149 when this function is called with S3, the pattern {S2,S3} will be detected.
1153 * TYPE_IN: The type of the input arguments to the pattern.
1155 * TYPE_OUT: The type of the output of this pattern.
1157 * Return value: A new stmt that will be used to replace the sequence of
1158 stmts that constitute the pattern. In this case it will be:
1159 WIDEN_SUM <x_t, sum_0>
1161 Note: The widening-sum idiom is a widening reduction pattern that is
1162 vectorized without preserving all the intermediate results. It
1163 produces only N/2 (widened) results (by summing up pairs of
1164 intermediate results) rather than all N results. Therefore, we
1165 cannot allow this pattern when we want to get all the results and in
1166 the correct order (as is the case when this computation is in an
1167 inner-loop nested in an outer-loop that us being vectorized). */
1170 vect_recog_widen_sum_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
1173 gimple
*stmt
, *last_stmt
= (*stmts
)[0];
1174 tree oprnd0
, oprnd1
;
1175 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1176 tree type
, half_type
;
1177 gimple
*pattern_stmt
;
1178 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1186 loop
= LOOP_VINFO_LOOP (loop_info
);
1188 /* We don't allow changing the order of the computation in the inner-loop
1189 when doing outer-loop vectorization. */
1190 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
1193 if (!is_gimple_assign (last_stmt
))
1196 type
= gimple_expr_type (last_stmt
);
1198 /* Look for the following pattern
1201 In which DX is at least double the size of X, and sum_1 has been
1202 recognized as a reduction variable.
1205 /* Starting from LAST_STMT, follow the defs of its uses in search
1206 of the above pattern. */
1208 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
1211 if (!vect_reassociating_reduction_p (stmt_vinfo
))
1214 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1215 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1217 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1218 we know that oprnd1 is the reduction variable (defined by a loop-header
1219 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1220 Left to check that oprnd0 is defined by a cast from type 'type' to type
1223 if (!type_conversion_p (oprnd0
, last_stmt
, true, &half_type
, &stmt
,
1228 oprnd0
= gimple_assign_rhs1 (stmt
);
1230 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt
);
1232 *type_in
= half_type
;
1235 var
= vect_recog_temp_ssa_var (type
, NULL
);
1236 pattern_stmt
= gimple_build_assign (var
, WIDEN_SUM_EXPR
, oprnd0
, oprnd1
);
1238 return pattern_stmt
;
1242 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1245 STMT - a statement to check.
1246 DEF - we support operations with two operands, one of which is constant.
1247 The other operand can be defined by a demotion operation, or by a
1248 previous statement in a sequence of over-promoted operations. In the
1249 later case DEF is used to replace that operand. (It is defined by a
1250 pattern statement we created for the previous statement in the
1254 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1255 NULL, it's the type of DEF.
1256 STMTS - additional pattern statements. If a pattern statement (type
1257 conversion) is created in this function, its original statement is
1261 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1262 operands to use in the new pattern statement for STMT (will be created
1263 in vect_recog_over_widening_pattern ()).
1264 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1265 statements for STMT: the first one is a type promotion and the second
1266 one is the operation itself. We return the type promotion statement
1267 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1268 the second pattern statement. */
1271 vect_operation_fits_smaller_type (gimple
*stmt
, tree def
, tree
*new_type
,
1272 tree
*op0
, tree
*op1
, gimple
**new_def_stmt
,
1273 vec
<gimple
*> *stmts
)
1275 enum tree_code code
;
1276 tree const_oprnd
, oprnd
;
1277 tree interm_type
= NULL_TREE
, half_type
, new_oprnd
, type
;
1278 gimple
*def_stmt
, *new_stmt
;
1284 *new_def_stmt
= NULL
;
1286 if (!is_gimple_assign (stmt
))
1289 code
= gimple_assign_rhs_code (stmt
);
1290 if (code
!= LSHIFT_EXPR
&& code
!= RSHIFT_EXPR
1291 && code
!= BIT_IOR_EXPR
&& code
!= BIT_XOR_EXPR
&& code
!= BIT_AND_EXPR
)
1294 oprnd
= gimple_assign_rhs1 (stmt
);
1295 const_oprnd
= gimple_assign_rhs2 (stmt
);
1296 type
= gimple_expr_type (stmt
);
1298 if (TREE_CODE (oprnd
) != SSA_NAME
1299 || TREE_CODE (const_oprnd
) != INTEGER_CST
)
1302 /* If oprnd has other uses besides that in stmt we cannot mark it
1303 as being part of a pattern only. */
1304 if (!has_single_use (oprnd
))
1307 /* If we are in the middle of a sequence, we use DEF from a previous
1308 statement. Otherwise, OPRND has to be a result of type promotion. */
1311 half_type
= *new_type
;
1317 if (!type_conversion_p (oprnd
, stmt
, false, &half_type
, &def_stmt
,
1320 || !vect_same_loop_or_bb_p (stmt
, def_stmt
))
1324 /* Can we perform the operation on a smaller type? */
1330 if (!int_fits_type_p (const_oprnd
, half_type
))
1332 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1333 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1336 interm_type
= build_nonstandard_integer_type (
1337 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1338 if (!int_fits_type_p (const_oprnd
, interm_type
))
1345 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1346 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1349 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1350 (e.g., if the original value was char, the shift amount is at most 8
1351 if we want to use short). */
1352 if (compare_tree_int (const_oprnd
, TYPE_PRECISION (half_type
)) == 1)
1355 interm_type
= build_nonstandard_integer_type (
1356 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1358 if (!vect_supportable_shift (code
, interm_type
))
1364 if (vect_supportable_shift (code
, half_type
))
1367 /* Try intermediate type - HALF_TYPE is not supported. */
1368 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1371 interm_type
= build_nonstandard_integer_type (
1372 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1374 if (!vect_supportable_shift (code
, interm_type
))
1383 /* There are four possible cases:
1384 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1385 the first statement in the sequence)
1386 a. The original, HALF_TYPE, is not enough - we replace the promotion
1387 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1388 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1390 2. OPRND is defined by a pattern statement we created.
1391 a. Its type is not sufficient for the operation, we create a new stmt:
1392 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1393 this statement in NEW_DEF_STMT, and it is later put in
1394 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1395 b. OPRND is good to use in the new statement. */
1400 /* Replace the original type conversion HALF_TYPE->TYPE with
1401 HALF_TYPE->INTERM_TYPE. */
1402 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
1404 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
1405 /* Check if the already created pattern stmt is what we need. */
1406 if (!is_gimple_assign (new_stmt
)
1407 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt
))
1408 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != interm_type
)
1411 stmts
->safe_push (def_stmt
);
1412 oprnd
= gimple_assign_lhs (new_stmt
);
1416 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1417 oprnd
= gimple_assign_rhs1 (def_stmt
);
1418 new_oprnd
= make_ssa_name (interm_type
);
1419 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, oprnd
);
1420 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
1421 stmts
->safe_push (def_stmt
);
1427 /* Retrieve the operand before the type promotion. */
1428 oprnd
= gimple_assign_rhs1 (def_stmt
);
1435 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1436 new_oprnd
= make_ssa_name (interm_type
);
1437 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, oprnd
);
1439 *new_def_stmt
= new_stmt
;
1442 /* Otherwise, OPRND is already set. */
1446 *new_type
= interm_type
;
1448 *new_type
= half_type
;
1451 *op1
= fold_convert (*new_type
, const_oprnd
);
1457 /* Try to find a statement or a sequence of statements that can be performed
1461 TYPE x_T, res0_T, res1_T;
1464 S2 x_T = (TYPE) x_t;
1465 S3 res0_T = op (x_T, C0);
1466 S4 res1_T = op (res0_T, C1);
1467 S5 ... = () res1_T; - type demotion
1469 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1471 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1472 be 'type' or some intermediate type. For now, we expect S5 to be a type
1473 demotion operation. We also check that S3 and S4 have only one use. */
1476 vect_recog_over_widening_pattern (vec
<gimple
*> *stmts
,
1477 tree
*type_in
, tree
*type_out
)
1479 gimple
*stmt
= stmts
->pop ();
1480 gimple
*pattern_stmt
= NULL
, *new_def_stmt
, *prev_stmt
= NULL
,
1482 tree op0
, op1
, vectype
= NULL_TREE
, use_lhs
, use_type
;
1483 tree var
= NULL_TREE
, new_type
= NULL_TREE
, new_oprnd
;
1490 if (!vinfo_for_stmt (stmt
)
1491 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt
)))
1494 new_def_stmt
= NULL
;
1495 if (!vect_operation_fits_smaller_type (stmt
, var
, &new_type
,
1496 &op0
, &op1
, &new_def_stmt
,
1505 /* STMT can be performed on a smaller type. Check its uses. */
1506 use_stmt
= vect_single_imm_use (stmt
);
1507 if (!use_stmt
|| !is_gimple_assign (use_stmt
))
1510 /* Create pattern statement for STMT. */
1511 vectype
= get_vectype_for_scalar_type (new_type
);
1515 /* We want to collect all the statements for which we create pattern
1516 statetments, except for the case when the last statement in the
1517 sequence doesn't have a corresponding pattern statement. In such
1518 case we associate the last pattern statement with the last statement
1519 in the sequence. Therefore, we only add the original statement to
1520 the list if we know that it is not the last. */
1522 stmts
->safe_push (prev_stmt
);
1524 var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1526 = gimple_build_assign (var
, gimple_assign_rhs_code (stmt
), op0
, op1
);
1527 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
1528 new_pattern_def_seq (vinfo_for_stmt (stmt
), new_def_stmt
);
1530 if (dump_enabled_p ())
1532 dump_printf_loc (MSG_NOTE
, vect_location
,
1533 "created pattern stmt: ");
1534 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1537 type
= gimple_expr_type (stmt
);
1544 /* We got a sequence. We expect it to end with a type demotion operation.
1545 Otherwise, we quit (for now). There are three possible cases: the
1546 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1547 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1548 NEW_TYPE differs (we create a new conversion statement). */
1549 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
1551 use_lhs
= gimple_assign_lhs (use_stmt
);
1552 use_type
= TREE_TYPE (use_lhs
);
1553 /* Support only type demotion or signedess change. */
1554 if (!INTEGRAL_TYPE_P (use_type
)
1555 || TYPE_PRECISION (type
) <= TYPE_PRECISION (use_type
))
1558 /* Check that NEW_TYPE is not bigger than the conversion result. */
1559 if (TYPE_PRECISION (new_type
) > TYPE_PRECISION (use_type
))
1562 if (TYPE_UNSIGNED (new_type
) != TYPE_UNSIGNED (use_type
)
1563 || TYPE_PRECISION (new_type
) != TYPE_PRECISION (use_type
))
1565 /* Create NEW_TYPE->USE_TYPE conversion. */
1566 new_oprnd
= make_ssa_name (use_type
);
1567 pattern_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, var
);
1568 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt
)) = pattern_stmt
;
1570 *type_in
= get_vectype_for_scalar_type (new_type
);
1571 *type_out
= get_vectype_for_scalar_type (use_type
);
1573 /* We created a pattern statement for the last statement in the
1574 sequence, so we don't need to associate it with the pattern
1575 statement created for PREV_STMT. Therefore, we add PREV_STMT
1576 to the list in order to mark it later in vect_pattern_recog_1. */
1578 stmts
->safe_push (prev_stmt
);
1583 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt
))
1584 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt
));
1587 *type_out
= NULL_TREE
;
1590 stmts
->safe_push (use_stmt
);
1593 /* TODO: support general case, create a conversion to the correct type. */
1596 /* Pattern detected. */
1597 vect_pattern_detected ("vect_recog_over_widening_pattern", stmts
->last ());
1599 return pattern_stmt
;
1602 /* Detect widening shift pattern:
1608 S2 a_T = (TYPE) a_t;
1609 S3 res_T = a_T << CONST;
1611 where type 'TYPE' is at least double the size of type 'type'.
1613 Also detect cases where the shift result is immediately converted
1614 to another type 'result_type' that is no larger in size than 'TYPE'.
1615 In those cases we perform a widen-shift that directly results in
1616 'result_type', to avoid a possible over-widening situation:
1620 result_type res_result;
1623 S2 a_T = (TYPE) a_t;
1624 S3 res_T = a_T << CONST;
1625 S4 res_result = (result_type) res_T;
1626 '--> res_result' = a_t w<< CONST;
1628 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1629 create an additional pattern stmt for S2 to create a variable of an
1630 intermediate type, and perform widen-shift on the intermediate type:
1634 TYPE a_T, res_T, res_T';
1637 S2 a_T = (TYPE) a_t;
1638 '--> a_it = (interm_type) a_t;
1639 S3 res_T = a_T << CONST;
1640 '--> res_T' = a_it <<* CONST;
1644 * STMTS: Contains a stmt from which the pattern search begins.
1645 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1646 in STMTS. When an intermediate type is used and a pattern statement is
1647 created for S2, we also put S2 here (before S3).
1651 * TYPE_IN: The type of the input arguments to the pattern.
1653 * TYPE_OUT: The type of the output of this pattern.
1655 * Return value: A new stmt that will be used to replace the sequence of
1656 stmts that constitute the pattern. In this case it will be:
1657 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1660 vect_recog_widen_shift_pattern (vec
<gimple
*> *stmts
,
1661 tree
*type_in
, tree
*type_out
)
1663 gimple
*last_stmt
= stmts
->pop ();
1665 tree oprnd0
, oprnd1
;
1666 tree type
, half_type0
;
1667 gimple
*pattern_stmt
;
1668 tree vectype
, vectype_out
= NULL_TREE
;
1670 enum tree_code dummy_code
;
1672 vec
<tree
> dummy_vec
;
1676 if (!is_gimple_assign (last_stmt
) || !vinfo_for_stmt (last_stmt
))
1679 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt
)))
1682 if (gimple_assign_rhs_code (last_stmt
) != LSHIFT_EXPR
)
1685 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1686 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1687 if (TREE_CODE (oprnd0
) != SSA_NAME
|| TREE_CODE (oprnd1
) != INTEGER_CST
)
1690 /* Check operand 0: it has to be defined by a type promotion. */
1691 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
1696 /* Check operand 1: has to be positive. We check that it fits the type
1697 in vect_handle_widen_op_by_const (). */
1698 if (tree_int_cst_compare (oprnd1
, size_zero_node
) <= 0)
1701 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
1702 type
= gimple_expr_type (last_stmt
);
1704 /* Check for subsequent conversion to another type. */
1705 use_stmt
= vect_single_imm_use (last_stmt
);
1706 if (use_stmt
&& is_gimple_assign (use_stmt
)
1707 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
))
1708 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
1710 tree use_lhs
= gimple_assign_lhs (use_stmt
);
1711 tree use_type
= TREE_TYPE (use_lhs
);
1713 if (INTEGRAL_TYPE_P (use_type
)
1714 && TYPE_PRECISION (use_type
) <= TYPE_PRECISION (type
))
1716 last_stmt
= use_stmt
;
1721 /* Check if this a widening operation. */
1722 gimple
*wstmt
= NULL
;
1723 if (!vect_handle_widen_op_by_const (last_stmt
, LSHIFT_EXPR
, oprnd1
,
1725 type
, &half_type0
, def_stmt0
))
1728 /* Pattern detected. */
1729 vect_pattern_detected ("vect_recog_widen_shift_pattern", last_stmt
);
1731 /* Check target support. */
1732 vectype
= get_vectype_for_scalar_type (half_type0
);
1733 vectype_out
= get_vectype_for_scalar_type (type
);
1737 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR
, last_stmt
,
1738 vectype_out
, vectype
,
1739 &dummy_code
, &dummy_code
,
1740 &dummy_int
, &dummy_vec
))
1744 *type_out
= vectype_out
;
1746 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1747 var
= vect_recog_temp_ssa_var (type
, NULL
);
1749 = gimple_build_assign (var
, WIDEN_LSHIFT_EXPR
, oprnd0
, oprnd1
);
1752 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1753 new_pattern_def_seq (stmt_vinfo
, wstmt
);
1754 stmt_vec_info new_stmt_info
1755 = new_stmt_vec_info (wstmt
, stmt_vinfo
->vinfo
);
1756 set_vinfo_for_stmt (wstmt
, new_stmt_info
);
1757 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
1760 stmts
->safe_push (last_stmt
);
1761 return pattern_stmt
;
1764 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1768 S0 a_t = b_t r<< c_t;
1772 * STMTS: Contains a stmt from which the pattern search begins,
1773 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1777 S2 e_t = d_t & (B - 1);
1778 S3 f_t = b_t << c_t;
1779 S4 g_t = b_t >> e_t;
1782 where B is element bitsize of type.
1786 * TYPE_IN: The type of the input arguments to the pattern.
1788 * TYPE_OUT: The type of the output of this pattern.
1790 * Return value: A new stmt that will be used to replace the rotate
1794 vect_recog_rotate_pattern (vec
<gimple
*> *stmts
, tree
*type_in
, tree
*type_out
)
1796 gimple
*last_stmt
= stmts
->pop ();
1797 tree oprnd0
, oprnd1
, lhs
, var
, var1
, var2
, vectype
, type
, stype
, def
, def2
;
1798 gimple
*pattern_stmt
, *def_stmt
;
1799 enum tree_code rhs_code
;
1800 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1801 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
1802 enum vect_def_type dt
;
1803 optab optab1
, optab2
;
1804 edge ext_def
= NULL
;
1806 if (!is_gimple_assign (last_stmt
))
1809 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1819 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
1822 lhs
= gimple_assign_lhs (last_stmt
);
1823 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1824 type
= TREE_TYPE (oprnd0
);
1825 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1826 if (TREE_CODE (oprnd0
) != SSA_NAME
1827 || TYPE_PRECISION (TREE_TYPE (lhs
)) != TYPE_PRECISION (type
)
1828 || !INTEGRAL_TYPE_P (type
)
1829 || !TYPE_UNSIGNED (type
))
1832 if (!vect_is_simple_use (oprnd1
, vinfo
, &def_stmt
, &dt
))
1835 if (dt
!= vect_internal_def
1836 && dt
!= vect_constant_def
1837 && dt
!= vect_external_def
)
1840 vectype
= get_vectype_for_scalar_type (type
);
1841 if (vectype
== NULL_TREE
)
1844 /* If vector/vector or vector/scalar rotate is supported by the target,
1845 don't do anything here. */
1846 optab1
= optab_for_tree_code (rhs_code
, vectype
, optab_vector
);
1848 && optab_handler (optab1
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1851 if (is_a
<bb_vec_info
> (vinfo
) || dt
!= vect_internal_def
)
1853 optab2
= optab_for_tree_code (rhs_code
, vectype
, optab_scalar
);
1855 && optab_handler (optab2
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1859 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1860 don't do anything here either. */
1861 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_vector
);
1862 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_vector
);
1864 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1866 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1868 if (! is_a
<bb_vec_info
> (vinfo
) && dt
== vect_internal_def
)
1870 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_scalar
);
1871 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_scalar
);
1873 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1875 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1880 *type_out
= vectype
;
1881 if (*type_in
== NULL_TREE
)
1884 if (dt
== vect_external_def
1885 && TREE_CODE (oprnd1
) == SSA_NAME
1886 && is_a
<loop_vec_info
> (vinfo
))
1888 struct loop
*loop
= as_a
<loop_vec_info
> (vinfo
)->loop
;
1889 ext_def
= loop_preheader_edge (loop
);
1890 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1
))
1892 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (oprnd1
));
1894 || !dominated_by_p (CDI_DOMINATORS
, ext_def
->dest
, bb
))
1900 scalar_int_mode mode
= SCALAR_INT_TYPE_MODE (type
);
1901 if (TREE_CODE (oprnd1
) == INTEGER_CST
1902 || TYPE_MODE (TREE_TYPE (oprnd1
)) == mode
)
1904 else if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
1906 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
1907 if (TYPE_MODE (TREE_TYPE (rhs1
)) == mode
1908 && TYPE_PRECISION (TREE_TYPE (rhs1
))
1909 == TYPE_PRECISION (type
))
1913 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
1914 if (def
== NULL_TREE
)
1916 def
= vect_recog_temp_ssa_var (type
, NULL
);
1917 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
1921 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1922 gcc_assert (!new_bb
);
1925 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1927 stype
= TREE_TYPE (def
);
1928 scalar_int_mode smode
= SCALAR_INT_TYPE_MODE (stype
);
1930 if (TREE_CODE (def
) == INTEGER_CST
)
1932 if (!tree_fits_uhwi_p (def
)
1933 || tree_to_uhwi (def
) >= GET_MODE_PRECISION (mode
)
1934 || integer_zerop (def
))
1936 def2
= build_int_cst (stype
,
1937 GET_MODE_PRECISION (mode
) - tree_to_uhwi (def
));
1941 tree vecstype
= get_vectype_for_scalar_type (stype
);
1942 stmt_vec_info def_stmt_vinfo
;
1944 if (vecstype
== NULL_TREE
)
1946 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1947 def_stmt
= gimple_build_assign (def2
, NEGATE_EXPR
, def
);
1951 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1952 gcc_assert (!new_bb
);
1956 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, vinfo
);
1957 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1958 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
1959 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1962 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1963 tree mask
= build_int_cst (stype
, GET_MODE_PRECISION (smode
) - 1);
1964 def_stmt
= gimple_build_assign (def2
, BIT_AND_EXPR
,
1965 gimple_assign_lhs (def_stmt
), mask
);
1969 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1970 gcc_assert (!new_bb
);
1974 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, vinfo
);
1975 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1976 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
1977 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1981 var1
= vect_recog_temp_ssa_var (type
, NULL
);
1982 def_stmt
= gimple_build_assign (var1
, rhs_code
== LROTATE_EXPR
1983 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
1985 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1987 var2
= vect_recog_temp_ssa_var (type
, NULL
);
1988 def_stmt
= gimple_build_assign (var2
, rhs_code
== LROTATE_EXPR
1989 ? RSHIFT_EXPR
: LSHIFT_EXPR
,
1991 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1993 /* Pattern detected. */
1994 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt
);
1996 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1997 var
= vect_recog_temp_ssa_var (type
, NULL
);
1998 pattern_stmt
= gimple_build_assign (var
, BIT_IOR_EXPR
, var1
, var2
);
2000 stmts
->safe_push (last_stmt
);
2001 return pattern_stmt
;
2004 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2012 S3 res_T = b_T op a_t;
2014 where type 'TYPE' is a type with different size than 'type',
2015 and op is <<, >> or rotate.
2020 TYPE b_T, c_T, res_T;
2023 S1 a_t = (type) c_T;
2025 S3 res_T = b_T op a_t;
2029 * STMTS: Contains a stmt from which the pattern search begins,
2030 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2031 with a shift/rotate which has same type on both operands, in the
2032 second case just b_T op c_T, in the first case with added cast
2033 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2037 * TYPE_IN: The type of the input arguments to the pattern.
2039 * TYPE_OUT: The type of the output of this pattern.
2041 * Return value: A new stmt that will be used to replace the shift/rotate
2045 vect_recog_vector_vector_shift_pattern (vec
<gimple
*> *stmts
,
2046 tree
*type_in
, tree
*type_out
)
2048 gimple
*last_stmt
= stmts
->pop ();
2049 tree oprnd0
, oprnd1
, lhs
, var
;
2050 gimple
*pattern_stmt
;
2051 enum tree_code rhs_code
;
2052 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2053 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
2055 if (!is_gimple_assign (last_stmt
))
2058 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2070 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2073 lhs
= gimple_assign_lhs (last_stmt
);
2074 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2075 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2076 if (TREE_CODE (oprnd0
) != SSA_NAME
2077 || TREE_CODE (oprnd1
) != SSA_NAME
2078 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
2079 || !type_has_mode_precision_p (TREE_TYPE (oprnd1
))
2080 || TYPE_PRECISION (TREE_TYPE (lhs
))
2081 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2084 stmt_vec_info def_vinfo
= vect_get_internal_def (vinfo
, oprnd1
);
2088 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (oprnd0
));
2089 *type_out
= *type_in
;
2090 if (*type_in
== NULL_TREE
)
2093 tree def
= NULL_TREE
;
2094 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_vinfo
->stmt
);
2095 if (!STMT_VINFO_IN_PATTERN_P (def_vinfo
)
2097 && gimple_assign_cast_p (def_stmt
))
2099 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
2100 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
2101 && TYPE_PRECISION (TREE_TYPE (rhs1
))
2102 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2104 if (TYPE_PRECISION (TREE_TYPE (oprnd1
))
2105 >= TYPE_PRECISION (TREE_TYPE (rhs1
)))
2110 = build_low_bits_mask (TREE_TYPE (rhs1
),
2111 TYPE_PRECISION (TREE_TYPE (oprnd1
)));
2112 def
= vect_recog_temp_ssa_var (TREE_TYPE (rhs1
), NULL
);
2113 def_stmt
= gimple_build_assign (def
, BIT_AND_EXPR
, rhs1
, mask
);
2114 stmt_vec_info new_stmt_info
2115 = new_stmt_vec_info (def_stmt
, vinfo
);
2116 set_vinfo_for_stmt (def_stmt
, new_stmt_info
);
2117 STMT_VINFO_VECTYPE (new_stmt_info
)
2118 = get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
2119 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2124 if (def
== NULL_TREE
)
2126 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2127 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
2128 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2131 /* Pattern detected. */
2132 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt
);
2134 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2135 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2136 pattern_stmt
= gimple_build_assign (var
, rhs_code
, oprnd0
, def
);
2138 stmts
->safe_push (last_stmt
);
2139 return pattern_stmt
;
2142 /* Return true iff the target has a vector optab implementing the operation
2143 CODE on type VECTYPE. */
2146 target_has_vecop_for_code (tree_code code
, tree vectype
)
2148 optab voptab
= optab_for_tree_code (code
, vectype
, optab_vector
);
2150 && optab_handler (voptab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
;
2153 /* Verify that the target has optabs of VECTYPE to perform all the steps
2154 needed by the multiplication-by-immediate synthesis algorithm described by
2155 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2156 present. Return true iff the target supports all the steps. */
2159 target_supports_mult_synth_alg (struct algorithm
*alg
, mult_variant var
,
2160 tree vectype
, bool synth_shift_p
)
2162 if (alg
->op
[0] != alg_zero
&& alg
->op
[0] != alg_m
)
2165 bool supports_vminus
= target_has_vecop_for_code (MINUS_EXPR
, vectype
);
2166 bool supports_vplus
= target_has_vecop_for_code (PLUS_EXPR
, vectype
);
2168 if (var
== negate_variant
2169 && !target_has_vecop_for_code (NEGATE_EXPR
, vectype
))
2172 /* If we must synthesize shifts with additions make sure that vector
2173 addition is available. */
2174 if ((var
== add_variant
|| synth_shift_p
) && !supports_vplus
)
2177 for (int i
= 1; i
< alg
->ops
; i
++)
2185 case alg_add_factor
:
2186 if (!supports_vplus
)
2191 case alg_sub_factor
:
2192 if (!supports_vminus
)
2198 case alg_impossible
:
2208 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2209 putting the final result in DEST. Append all statements but the last into
2210 VINFO. Return the last statement. */
2213 synth_lshift_by_additions (tree dest
, tree op
, HOST_WIDE_INT amnt
,
2214 stmt_vec_info vinfo
)
2217 tree itype
= TREE_TYPE (op
);
2219 gcc_assert (amnt
>= 0);
2220 for (i
= 0; i
< amnt
; i
++)
2222 tree tmp_var
= (i
< amnt
- 1) ? vect_recog_temp_ssa_var (itype
, NULL
)
2225 = gimple_build_assign (tmp_var
, PLUS_EXPR
, prev_res
, prev_res
);
2228 append_pattern_def_seq (vinfo
, stmt
);
2236 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2237 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2238 the process if necessary. Append the resulting assignment statements
2239 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2240 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2241 left shifts using additions. */
2244 apply_binop_and_append_stmt (tree_code code
, tree op1
, tree op2
,
2245 stmt_vec_info stmt_vinfo
, bool synth_shift_p
)
2247 if (integer_zerop (op2
)
2248 && (code
== LSHIFT_EXPR
2249 || code
== PLUS_EXPR
))
2251 gcc_assert (TREE_CODE (op1
) == SSA_NAME
);
2256 tree itype
= TREE_TYPE (op1
);
2257 tree tmp_var
= vect_recog_temp_ssa_var (itype
, NULL
);
2259 if (code
== LSHIFT_EXPR
2262 stmt
= synth_lshift_by_additions (tmp_var
, op1
, TREE_INT_CST_LOW (op2
),
2264 append_pattern_def_seq (stmt_vinfo
, stmt
);
2268 stmt
= gimple_build_assign (tmp_var
, code
, op1
, op2
);
2269 append_pattern_def_seq (stmt_vinfo
, stmt
);
2273 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2274 and simple arithmetic operations to be vectorized. Record the statements
2275 produced in STMT_VINFO and return the last statement in the sequence or
2276 NULL if it's not possible to synthesize such a multiplication.
2277 This function mirrors the behavior of expand_mult_const in expmed.c but
2278 works on tree-ssa form. */
2281 vect_synth_mult_by_constant (tree op
, tree val
,
2282 stmt_vec_info stmt_vinfo
)
2284 tree itype
= TREE_TYPE (op
);
2285 machine_mode mode
= TYPE_MODE (itype
);
2286 struct algorithm alg
;
2287 mult_variant variant
;
2288 if (!tree_fits_shwi_p (val
))
2291 /* Multiplication synthesis by shifts, adds and subs can introduce
2292 signed overflow where the original operation didn't. Perform the
2293 operations on an unsigned type and cast back to avoid this.
2294 In the future we may want to relax this for synthesis algorithms
2295 that we can prove do not cause unexpected overflow. */
2296 bool cast_to_unsigned_p
= !TYPE_OVERFLOW_WRAPS (itype
);
2298 tree multtype
= cast_to_unsigned_p
? unsigned_type_for (itype
) : itype
;
2300 /* Targets that don't support vector shifts but support vector additions
2301 can synthesize shifts that way. */
2302 bool synth_shift_p
= !vect_supportable_shift (LSHIFT_EXPR
, multtype
);
2304 HOST_WIDE_INT hwval
= tree_to_shwi (val
);
2305 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2306 The vectorizer's benefit analysis will decide whether it's beneficial
2308 bool possible
= choose_mult_variant (mode
, hwval
, &alg
,
2309 &variant
, MAX_COST
);
2313 tree vectype
= get_vectype_for_scalar_type (multtype
);
2316 || !target_supports_mult_synth_alg (&alg
, variant
,
2317 vectype
, synth_shift_p
))
2322 /* Clear out the sequence of statements so we can populate it below. */
2323 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2324 gimple
*stmt
= NULL
;
2326 if (cast_to_unsigned_p
)
2328 tree tmp_op
= vect_recog_temp_ssa_var (multtype
, NULL
);
2329 stmt
= gimple_build_assign (tmp_op
, CONVERT_EXPR
, op
);
2330 append_pattern_def_seq (stmt_vinfo
, stmt
);
2334 if (alg
.op
[0] == alg_zero
)
2335 accumulator
= build_int_cst (multtype
, 0);
2339 bool needs_fixup
= (variant
== negate_variant
)
2340 || (variant
== add_variant
);
2342 for (int i
= 1; i
< alg
.ops
; i
++)
2344 tree shft_log
= build_int_cst (multtype
, alg
.log
[i
]);
2345 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
2346 tree tmp_var
= NULL_TREE
;
2353 = synth_lshift_by_additions (accum_tmp
, accumulator
, alg
.log
[i
],
2356 stmt
= gimple_build_assign (accum_tmp
, LSHIFT_EXPR
, accumulator
,
2361 = apply_binop_and_append_stmt (LSHIFT_EXPR
, op
, shft_log
,
2362 stmt_vinfo
, synth_shift_p
);
2363 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
,
2367 tmp_var
= apply_binop_and_append_stmt (LSHIFT_EXPR
, op
,
2368 shft_log
, stmt_vinfo
,
2370 /* In some algorithms the first step involves zeroing the
2371 accumulator. If subtracting from such an accumulator
2372 just emit the negation directly. */
2373 if (integer_zerop (accumulator
))
2374 stmt
= gimple_build_assign (accum_tmp
, NEGATE_EXPR
, tmp_var
);
2376 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, accumulator
,
2381 = apply_binop_and_append_stmt (LSHIFT_EXPR
, accumulator
, shft_log
,
2382 stmt_vinfo
, synth_shift_p
);
2383 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, tmp_var
, op
);
2387 = apply_binop_and_append_stmt (LSHIFT_EXPR
, accumulator
, shft_log
,
2388 stmt_vinfo
, synth_shift_p
);
2389 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, tmp_var
, op
);
2391 case alg_add_factor
:
2393 = apply_binop_and_append_stmt (LSHIFT_EXPR
, accumulator
, shft_log
,
2394 stmt_vinfo
, synth_shift_p
);
2395 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
,
2398 case alg_sub_factor
:
2400 = apply_binop_and_append_stmt (LSHIFT_EXPR
, accumulator
, shft_log
,
2401 stmt_vinfo
, synth_shift_p
);
2402 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, tmp_var
,
2408 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2409 but rather return it directly. */
2411 if ((i
< alg
.ops
- 1) || needs_fixup
|| cast_to_unsigned_p
)
2412 append_pattern_def_seq (stmt_vinfo
, stmt
);
2413 accumulator
= accum_tmp
;
2415 if (variant
== negate_variant
)
2417 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
2418 stmt
= gimple_build_assign (accum_tmp
, NEGATE_EXPR
, accumulator
);
2419 accumulator
= accum_tmp
;
2420 if (cast_to_unsigned_p
)
2421 append_pattern_def_seq (stmt_vinfo
, stmt
);
2423 else if (variant
== add_variant
)
2425 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
2426 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
, op
);
2427 accumulator
= accum_tmp
;
2428 if (cast_to_unsigned_p
)
2429 append_pattern_def_seq (stmt_vinfo
, stmt
);
2431 /* Move back to a signed if needed. */
2432 if (cast_to_unsigned_p
)
2434 tree accum_tmp
= vect_recog_temp_ssa_var (itype
, NULL
);
2435 stmt
= gimple_build_assign (accum_tmp
, CONVERT_EXPR
, accumulator
);
2441 /* Detect multiplication by constant and convert it into a sequence of
2442 shifts and additions, subtractions, negations. We reuse the
2443 choose_mult_variant algorithms from expmed.c
2447 STMTS: Contains a stmt from which the pattern search begins,
2452 * TYPE_IN: The type of the input arguments to the pattern.
2454 * TYPE_OUT: The type of the output of this pattern.
2456 * Return value: A new stmt that will be used to replace
2457 the multiplication. */
2460 vect_recog_mult_pattern (vec
<gimple
*> *stmts
,
2461 tree
*type_in
, tree
*type_out
)
2463 gimple
*last_stmt
= stmts
->pop ();
2464 tree oprnd0
, oprnd1
, vectype
, itype
;
2465 gimple
*pattern_stmt
;
2466 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2468 if (!is_gimple_assign (last_stmt
))
2471 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
2474 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2475 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2476 itype
= TREE_TYPE (oprnd0
);
2478 if (TREE_CODE (oprnd0
) != SSA_NAME
2479 || TREE_CODE (oprnd1
) != INTEGER_CST
2480 || !INTEGRAL_TYPE_P (itype
)
2481 || !type_has_mode_precision_p (itype
))
2484 vectype
= get_vectype_for_scalar_type (itype
);
2485 if (vectype
== NULL_TREE
)
2488 /* If the target can handle vectorized multiplication natively,
2489 don't attempt to optimize this. */
2490 optab mul_optab
= optab_for_tree_code (MULT_EXPR
, vectype
, optab_default
);
2491 if (mul_optab
!= unknown_optab
)
2493 machine_mode vec_mode
= TYPE_MODE (vectype
);
2494 int icode
= (int) optab_handler (mul_optab
, vec_mode
);
2495 if (icode
!= CODE_FOR_nothing
)
2499 pattern_stmt
= vect_synth_mult_by_constant (oprnd0
, oprnd1
, stmt_vinfo
);
2503 /* Pattern detected. */
2504 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt
);
2506 stmts
->safe_push (last_stmt
);
2508 *type_out
= vectype
;
2510 return pattern_stmt
;
2513 /* Detect a signed division by a constant that wouldn't be
2514 otherwise vectorized:
2520 where type 'type' is an integral type and N is a constant.
2522 Similarly handle modulo by a constant:
2528 * STMTS: Contains a stmt from which the pattern search begins,
2529 i.e. the division stmt. S1 is replaced by if N is a power
2530 of two constant and type is signed:
2531 S3 y_t = b_t < 0 ? N - 1 : 0;
2533 S1' a_t = x_t >> log2 (N);
2535 S4 is replaced if N is a power of two constant and
2536 type is signed by (where *_T temporaries have unsigned type):
2537 S9 y_T = b_t < 0 ? -1U : 0U;
2538 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2539 S7 z_t = (type) z_T;
2541 S5 x_t = w_t & (N - 1);
2542 S4' a_t = x_t - z_t;
2546 * TYPE_IN: The type of the input arguments to the pattern.
2548 * TYPE_OUT: The type of the output of this pattern.
2550 * Return value: A new stmt that will be used to replace the division
2551 S1 or modulo S4 stmt. */
2554 vect_recog_divmod_pattern (vec
<gimple
*> *stmts
,
2555 tree
*type_in
, tree
*type_out
)
2557 gimple
*last_stmt
= stmts
->pop ();
2558 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
2559 gimple
*pattern_stmt
, *def_stmt
;
2560 enum tree_code rhs_code
;
2561 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2562 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
2565 int dummy_int
, prec
;
2566 stmt_vec_info def_stmt_vinfo
;
2568 if (!is_gimple_assign (last_stmt
))
2571 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2574 case TRUNC_DIV_EXPR
:
2575 case TRUNC_MOD_EXPR
:
2581 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2584 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2585 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2586 itype
= TREE_TYPE (oprnd0
);
2587 if (TREE_CODE (oprnd0
) != SSA_NAME
2588 || TREE_CODE (oprnd1
) != INTEGER_CST
2589 || TREE_CODE (itype
) != INTEGER_TYPE
2590 || !type_has_mode_precision_p (itype
))
2593 scalar_int_mode itype_mode
= SCALAR_INT_TYPE_MODE (itype
);
2594 vectype
= get_vectype_for_scalar_type (itype
);
2595 if (vectype
== NULL_TREE
)
2598 if (optimize_bb_for_size_p (gimple_bb (last_stmt
)))
2600 /* If the target can handle vectorized division or modulo natively,
2601 don't attempt to optimize this, since native division is likely
2602 to give smaller code. */
2603 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
2604 if (optab
!= unknown_optab
)
2606 machine_mode vec_mode
= TYPE_MODE (vectype
);
2607 int icode
= (int) optab_handler (optab
, vec_mode
);
2608 if (icode
!= CODE_FOR_nothing
)
2613 prec
= TYPE_PRECISION (itype
);
2614 if (integer_pow2p (oprnd1
))
2616 if (TYPE_UNSIGNED (itype
) || tree_int_cst_sgn (oprnd1
) != 1)
2619 /* Pattern detected. */
2620 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt
);
2622 cond
= build2 (LT_EXPR
, boolean_type_node
, oprnd0
,
2623 build_int_cst (itype
, 0));
2624 if (rhs_code
== TRUNC_DIV_EXPR
)
2626 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
2629 = gimple_build_assign (var
, COND_EXPR
, cond
,
2630 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2631 build_int_cst (itype
, 1)),
2632 build_int_cst (itype
, 0));
2633 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2634 var
= vect_recog_temp_ssa_var (itype
, NULL
);
2636 = gimple_build_assign (var
, PLUS_EXPR
, oprnd0
,
2637 gimple_assign_lhs (def_stmt
));
2638 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2640 shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
2642 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2643 RSHIFT_EXPR
, var
, shift
);
2648 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2649 if (compare_tree_int (oprnd1
, 2) == 0)
2651 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2652 def_stmt
= gimple_build_assign (signmask
, COND_EXPR
, cond
,
2653 build_int_cst (itype
, 1),
2654 build_int_cst (itype
, 0));
2655 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2660 = build_nonstandard_integer_type (prec
, 1);
2661 tree vecutype
= get_vectype_for_scalar_type (utype
);
2663 = build_int_cst (utype
, GET_MODE_BITSIZE (itype_mode
)
2664 - tree_log2 (oprnd1
));
2665 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
2667 def_stmt
= gimple_build_assign (var
, COND_EXPR
, cond
,
2668 build_int_cst (utype
, -1),
2669 build_int_cst (utype
, 0));
2670 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, vinfo
);
2671 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2672 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2673 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2674 var
= vect_recog_temp_ssa_var (utype
, NULL
);
2675 def_stmt
= gimple_build_assign (var
, RSHIFT_EXPR
,
2676 gimple_assign_lhs (def_stmt
),
2678 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, vinfo
);
2679 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2680 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2681 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2682 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2684 = gimple_build_assign (signmask
, NOP_EXPR
, var
);
2685 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2688 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2689 PLUS_EXPR
, oprnd0
, signmask
);
2690 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2692 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2693 BIT_AND_EXPR
, gimple_assign_lhs (def_stmt
),
2694 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2695 build_int_cst (itype
, 1)));
2696 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2699 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2700 MINUS_EXPR
, gimple_assign_lhs (def_stmt
),
2704 stmts
->safe_push (last_stmt
);
2707 *type_out
= vectype
;
2708 return pattern_stmt
;
2711 if (prec
> HOST_BITS_PER_WIDE_INT
2712 || integer_zerop (oprnd1
))
2715 if (!can_mult_highpart_p (TYPE_MODE (vectype
), TYPE_UNSIGNED (itype
)))
2718 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2720 if (TYPE_UNSIGNED (itype
))
2722 unsigned HOST_WIDE_INT mh
, ml
;
2723 int pre_shift
, post_shift
;
2724 unsigned HOST_WIDE_INT d
= (TREE_INT_CST_LOW (oprnd1
)
2725 & GET_MODE_MASK (itype_mode
));
2726 tree t1
, t2
, t3
, t4
;
2728 if (d
>= (HOST_WIDE_INT_1U
<< (prec
- 1)))
2729 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2732 /* Find a suitable multiplier and right shift count
2733 instead of multiplying with D. */
2734 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
2736 /* If the suggested multiplier is more than SIZE bits, we can do better
2737 for even divisors, using an initial right shift. */
2738 if (mh
!= 0 && (d
& 1) == 0)
2740 pre_shift
= ctz_or_zero (d
);
2741 mh
= choose_multiplier (d
>> pre_shift
, prec
, prec
- pre_shift
,
2742 &ml
, &post_shift
, &dummy_int
);
2750 if (post_shift
- 1 >= prec
)
2753 /* t1 = oprnd0 h* ml;
2757 q = t4 >> (post_shift - 1); */
2758 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2759 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2760 build_int_cst (itype
, ml
));
2761 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2763 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2765 = gimple_build_assign (t2
, MINUS_EXPR
, oprnd0
, t1
);
2766 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2768 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2770 = gimple_build_assign (t3
, RSHIFT_EXPR
, t2
, integer_one_node
);
2771 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2773 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2775 = gimple_build_assign (t4
, PLUS_EXPR
, t1
, t3
);
2777 if (post_shift
!= 1)
2779 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2781 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2783 = gimple_build_assign (q
, RSHIFT_EXPR
, t4
,
2784 build_int_cst (itype
, post_shift
- 1));
2789 pattern_stmt
= def_stmt
;
2794 if (pre_shift
>= prec
|| post_shift
>= prec
)
2797 /* t1 = oprnd0 >> pre_shift;
2799 q = t2 >> post_shift; */
2802 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2804 = gimple_build_assign (t1
, RSHIFT_EXPR
, oprnd0
,
2805 build_int_cst (NULL
, pre_shift
));
2806 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2811 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2812 def_stmt
= gimple_build_assign (t2
, MULT_HIGHPART_EXPR
, t1
,
2813 build_int_cst (itype
, ml
));
2817 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2819 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2821 = gimple_build_assign (q
, RSHIFT_EXPR
, t2
,
2822 build_int_cst (itype
, post_shift
));
2827 pattern_stmt
= def_stmt
;
2832 unsigned HOST_WIDE_INT ml
;
2834 HOST_WIDE_INT d
= TREE_INT_CST_LOW (oprnd1
);
2835 unsigned HOST_WIDE_INT abs_d
;
2837 tree t1
, t2
, t3
, t4
;
2839 /* Give up for -1. */
2843 /* Since d might be INT_MIN, we have to cast to
2844 unsigned HOST_WIDE_INT before negating to avoid
2845 undefined signed overflow. */
2847 ? (unsigned HOST_WIDE_INT
) d
2848 : - (unsigned HOST_WIDE_INT
) d
);
2850 /* n rem d = n rem -d */
2851 if (rhs_code
== TRUNC_MOD_EXPR
&& d
< 0)
2854 oprnd1
= build_int_cst (itype
, abs_d
);
2856 else if (HOST_BITS_PER_WIDE_INT
>= prec
2857 && abs_d
== HOST_WIDE_INT_1U
<< (prec
- 1))
2858 /* This case is not handled correctly below. */
2861 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
, &post_shift
, &dummy_int
);
2862 if (ml
>= HOST_WIDE_INT_1U
<< (prec
- 1))
2865 ml
|= HOST_WIDE_INT_M1U
<< (prec
- 1);
2867 if (post_shift
>= prec
)
2870 /* t1 = oprnd0 h* ml; */
2871 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2872 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2873 build_int_cst (itype
, ml
));
2877 /* t2 = t1 + oprnd0; */
2878 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2879 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2880 def_stmt
= gimple_build_assign (t2
, PLUS_EXPR
, t1
, oprnd0
);
2887 /* t3 = t2 >> post_shift; */
2888 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2889 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2890 def_stmt
= gimple_build_assign (t3
, RSHIFT_EXPR
, t2
,
2891 build_int_cst (itype
, post_shift
));
2896 wide_int oprnd0_min
, oprnd0_max
;
2898 if (get_range_info (oprnd0
, &oprnd0_min
, &oprnd0_max
) == VR_RANGE
)
2900 if (!wi::neg_p (oprnd0_min
, TYPE_SIGN (itype
)))
2902 else if (wi::neg_p (oprnd0_max
, TYPE_SIGN (itype
)))
2906 if (msb
== 0 && d
>= 0)
2910 pattern_stmt
= def_stmt
;
2914 /* t4 = oprnd0 >> (prec - 1);
2915 or if we know from VRP that oprnd0 >= 0
2917 or if we know from VRP that oprnd0 < 0
2919 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2920 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2922 def_stmt
= gimple_build_assign (t4
, INTEGER_CST
,
2923 build_int_cst (itype
, msb
));
2925 def_stmt
= gimple_build_assign (t4
, RSHIFT_EXPR
, oprnd0
,
2926 build_int_cst (itype
, prec
- 1));
2927 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2929 /* q = t3 - t4; or q = t4 - t3; */
2930 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2931 pattern_stmt
= gimple_build_assign (q
, MINUS_EXPR
, d
< 0 ? t4
: t3
,
2936 if (rhs_code
== TRUNC_MOD_EXPR
)
2940 /* We divided. Now finish by:
2943 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2945 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2946 def_stmt
= gimple_build_assign (t1
, MULT_EXPR
, q
, oprnd1
);
2947 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2949 r
= vect_recog_temp_ssa_var (itype
, NULL
);
2950 pattern_stmt
= gimple_build_assign (r
, MINUS_EXPR
, oprnd0
, t1
);
2953 /* Pattern detected. */
2954 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt
);
2956 stmts
->safe_push (last_stmt
);
2959 *type_out
= vectype
;
2960 return pattern_stmt
;
2963 /* Function vect_recog_mixed_size_cond_pattern
2965 Try to find the following pattern:
2970 S1 a_T = x_t CMP y_t ? b_T : c_T;
2972 where type 'TYPE' is an integral type which has different size
2973 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2974 than 'type', the constants need to fit into an integer type
2975 with the same width as 'type') or results of conversion from 'type'.
2979 * LAST_STMT: A stmt from which the pattern search begins.
2983 * TYPE_IN: The type of the input arguments to the pattern.
2985 * TYPE_OUT: The type of the output of this pattern.
2987 * Return value: A new stmt that will be used to replace the pattern.
2988 Additionally a def_stmt is added.
2990 a_it = x_t CMP y_t ? b_it : c_it;
2991 a_T = (TYPE) a_it; */
2994 vect_recog_mixed_size_cond_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
2997 gimple
*last_stmt
= (*stmts
)[0];
2998 tree cond_expr
, then_clause
, else_clause
;
2999 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
), def_stmt_info
;
3000 tree type
, vectype
, comp_vectype
, itype
= NULL_TREE
, vecitype
;
3001 gimple
*pattern_stmt
, *def_stmt
;
3002 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
3003 tree orig_type0
= NULL_TREE
, orig_type1
= NULL_TREE
;
3004 gimple
*def_stmt0
= NULL
, *def_stmt1
= NULL
;
3006 tree comp_scalar_type
;
3008 if (!is_gimple_assign (last_stmt
)
3009 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
3010 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
3013 cond_expr
= gimple_assign_rhs1 (last_stmt
);
3014 then_clause
= gimple_assign_rhs2 (last_stmt
);
3015 else_clause
= gimple_assign_rhs3 (last_stmt
);
3017 if (!COMPARISON_CLASS_P (cond_expr
))
3020 comp_scalar_type
= TREE_TYPE (TREE_OPERAND (cond_expr
, 0));
3021 comp_vectype
= get_vectype_for_scalar_type (comp_scalar_type
);
3022 if (comp_vectype
== NULL_TREE
)
3025 type
= gimple_expr_type (last_stmt
);
3026 if (types_compatible_p (type
, comp_scalar_type
)
3027 || ((TREE_CODE (then_clause
) != INTEGER_CST
3028 || TREE_CODE (else_clause
) != INTEGER_CST
)
3029 && !INTEGRAL_TYPE_P (comp_scalar_type
))
3030 || !INTEGRAL_TYPE_P (type
))
3033 if ((TREE_CODE (then_clause
) != INTEGER_CST
3034 && !type_conversion_p (then_clause
, last_stmt
, false, &orig_type0
,
3035 &def_stmt0
, &promotion
))
3036 || (TREE_CODE (else_clause
) != INTEGER_CST
3037 && !type_conversion_p (else_clause
, last_stmt
, false, &orig_type1
,
3038 &def_stmt1
, &promotion
)))
3041 if (orig_type0
&& orig_type1
3042 && !types_compatible_p (orig_type0
, orig_type1
))
3047 if (!types_compatible_p (orig_type0
, comp_scalar_type
))
3049 then_clause
= gimple_assign_rhs1 (def_stmt0
);
3055 if (!types_compatible_p (orig_type1
, comp_scalar_type
))
3057 else_clause
= gimple_assign_rhs1 (def_stmt1
);
3062 HOST_WIDE_INT cmp_mode_size
3063 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype
));
3065 scalar_int_mode type_mode
= SCALAR_INT_TYPE_MODE (type
);
3066 if (GET_MODE_BITSIZE (type_mode
) == cmp_mode_size
)
3069 vectype
= get_vectype_for_scalar_type (type
);
3070 if (vectype
== NULL_TREE
)
3073 if (expand_vec_cond_expr_p (vectype
, comp_vectype
, TREE_CODE (cond_expr
)))
3076 if (itype
== NULL_TREE
)
3077 itype
= build_nonstandard_integer_type (cmp_mode_size
,
3078 TYPE_UNSIGNED (type
));
3080 if (itype
== NULL_TREE
3081 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype
)) != cmp_mode_size
)
3084 vecitype
= get_vectype_for_scalar_type (itype
);
3085 if (vecitype
== NULL_TREE
)
3088 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
, TREE_CODE (cond_expr
)))
3091 if (GET_MODE_BITSIZE (type_mode
) > cmp_mode_size
)
3093 if ((TREE_CODE (then_clause
) == INTEGER_CST
3094 && !int_fits_type_p (then_clause
, itype
))
3095 || (TREE_CODE (else_clause
) == INTEGER_CST
3096 && !int_fits_type_p (else_clause
, itype
)))
3100 def_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3101 COND_EXPR
, unshare_expr (cond_expr
),
3102 fold_convert (itype
, then_clause
),
3103 fold_convert (itype
, else_clause
));
3104 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
3105 NOP_EXPR
, gimple_assign_lhs (def_stmt
));
3107 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
3108 def_stmt_info
= new_stmt_vec_info (def_stmt
, vinfo
);
3109 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
3110 STMT_VINFO_VECTYPE (def_stmt_info
) = vecitype
;
3111 *type_in
= vecitype
;
3112 *type_out
= vectype
;
3114 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt
);
3116 return pattern_stmt
;
3120 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3121 true if bool VAR can and should be optimized that way. Assume it shouldn't
3122 in case it's a result of a comparison which can be directly vectorized into
3123 a vector comparison. Fills in STMTS with all stmts visited during the
3127 check_bool_pattern (tree var
, vec_info
*vinfo
, hash_set
<gimple
*> &stmts
)
3130 enum tree_code rhs_code
;
3132 stmt_vec_info def_stmt_info
= vect_get_internal_def (vinfo
, var
);
3136 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_stmt_info
->stmt
);
3140 if (stmts
.contains (def_stmt
))
3143 rhs1
= gimple_assign_rhs1 (def_stmt
);
3144 rhs_code
= gimple_assign_rhs_code (def_stmt
);
3148 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
3153 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
3155 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
3160 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
3167 if (! check_bool_pattern (rhs1
, vinfo
, stmts
)
3168 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt
), vinfo
, stmts
))
3173 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
3175 tree vecitype
, comp_vectype
;
3177 /* If the comparison can throw, then is_gimple_condexpr will be
3178 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3179 if (stmt_could_throw_p (def_stmt
))
3182 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
3183 if (comp_vectype
== NULL_TREE
)
3186 tree mask_type
= get_mask_type_for_scalar_type (TREE_TYPE (rhs1
));
3188 && expand_vec_cmp_expr_p (comp_vectype
, mask_type
, rhs_code
))
3191 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
3193 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
3195 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3196 vecitype
= get_vectype_for_scalar_type (itype
);
3197 if (vecitype
== NULL_TREE
)
3201 vecitype
= comp_vectype
;
3202 if (! expand_vec_cond_expr_p (vecitype
, comp_vectype
, rhs_code
))
3210 bool res
= stmts
.add (def_stmt
);
3211 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3218 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3219 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3220 pattern sequence. */
3223 adjust_bool_pattern_cast (tree type
, tree var
, stmt_vec_info stmt_info
)
3225 gimple
*cast_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
3227 stmt_vec_info patt_vinfo
= new_stmt_vec_info (cast_stmt
, stmt_info
->vinfo
);
3228 set_vinfo_for_stmt (cast_stmt
, patt_vinfo
);
3229 STMT_VINFO_VECTYPE (patt_vinfo
) = get_vectype_for_scalar_type (type
);
3230 append_pattern_def_seq (stmt_info
, cast_stmt
);
3231 return gimple_assign_lhs (cast_stmt
);
3234 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3235 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3236 type, OUT_TYPE is the desired final integer type of the whole pattern.
3237 STMT_INFO is the info of the pattern root and is where pattern stmts should
3238 be associated with. DEFS is a map of pattern defs. */
3241 adjust_bool_pattern (tree var
, tree out_type
,
3242 stmt_vec_info stmt_info
, hash_map
<tree
, tree
> &defs
)
3244 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
3245 enum tree_code rhs_code
, def_rhs_code
;
3246 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
3248 gimple
*pattern_stmt
, *def_stmt
;
3249 tree trueval
= NULL_TREE
;
3251 rhs1
= gimple_assign_rhs1 (stmt
);
3252 rhs2
= gimple_assign_rhs2 (stmt
);
3253 rhs_code
= gimple_assign_rhs_code (stmt
);
3254 loc
= gimple_location (stmt
);
3259 irhs1
= *defs
.get (rhs1
);
3260 itype
= TREE_TYPE (irhs1
);
3262 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3267 irhs1
= *defs
.get (rhs1
);
3268 itype
= TREE_TYPE (irhs1
);
3270 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3271 BIT_XOR_EXPR
, irhs1
, build_int_cst (itype
, 1));
3275 /* Try to optimize x = y & (a < b ? 1 : 0); into
3276 x = (a < b ? y : 0);
3282 S1 a_b = x1 CMP1 y1;
3283 S2 b_b = x2 CMP2 y2;
3285 S4 d_T = (TYPE) c_b;
3287 we would normally emit:
3289 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3290 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3291 S3' c_T = a_T & b_T;
3294 but we can save one stmt by using the
3295 result of one of the COND_EXPRs in the other COND_EXPR and leave
3296 BIT_AND_EXPR stmt out:
3298 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3299 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3302 At least when VEC_COND_EXPR is implemented using masks
3303 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3304 computes the comparison masks and ands it, in one case with
3305 all ones vector, in the other case with a vector register.
3306 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3307 often more expensive. */
3308 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
3309 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3310 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3312 irhs1
= *defs
.get (rhs1
);
3313 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3314 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3315 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1
))))
3317 rhs_code
= def_rhs_code
;
3319 rhs2
= gimple_assign_rhs2 (def_stmt
);
3324 irhs2
= *defs
.get (rhs2
);
3327 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
3328 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3329 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3331 irhs2
= *defs
.get (rhs2
);
3332 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3333 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
3334 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1
))))
3336 rhs_code
= def_rhs_code
;
3338 rhs2
= gimple_assign_rhs2 (def_stmt
);
3343 irhs1
= *defs
.get (rhs1
);
3349 irhs1
= *defs
.get (rhs1
);
3350 irhs2
= *defs
.get (rhs2
);
3352 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3353 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
3355 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
3356 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
3357 int out_prec
= TYPE_PRECISION (out_type
);
3358 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
3359 irhs2
= adjust_bool_pattern_cast (TREE_TYPE (irhs1
), irhs2
,
3361 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
3362 irhs1
= adjust_bool_pattern_cast (TREE_TYPE (irhs2
), irhs1
,
3366 irhs1
= adjust_bool_pattern_cast (out_type
, irhs1
, stmt_info
);
3367 irhs2
= adjust_bool_pattern_cast (out_type
, irhs2
, stmt_info
);
3370 itype
= TREE_TYPE (irhs1
);
3372 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3373 rhs_code
, irhs1
, irhs2
);
3378 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
3379 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
3380 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
))
3381 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1
)),
3382 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1
)))))
3384 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
3386 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3389 itype
= TREE_TYPE (rhs1
);
3390 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
3391 if (trueval
== NULL_TREE
)
3392 trueval
= build_int_cst (itype
, 1);
3394 gcc_checking_assert (useless_type_conversion_p (itype
,
3395 TREE_TYPE (trueval
)));
3397 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3398 COND_EXPR
, cond_expr
, trueval
,
3399 build_int_cst (itype
, 0));
3403 gimple_set_location (pattern_stmt
, loc
);
3404 /* ??? Why does vect_mark_pattern_stmts set the vector type on all
3405 pattern def seq stmts instead of just letting auto-detection do
3407 stmt_vec_info patt_vinfo
= new_stmt_vec_info (pattern_stmt
, stmt_info
->vinfo
);
3408 set_vinfo_for_stmt (pattern_stmt
, patt_vinfo
);
3409 STMT_VINFO_VECTYPE (patt_vinfo
) = get_vectype_for_scalar_type (itype
);
3410 append_pattern_def_seq (stmt_info
, pattern_stmt
);
3411 defs
.put (var
, gimple_assign_lhs (pattern_stmt
));
3414 /* Comparison function to qsort a vector of gimple stmts after UID. */
3417 sort_after_uid (const void *p1
, const void *p2
)
3419 const gimple
*stmt1
= *(const gimple
* const *)p1
;
3420 const gimple
*stmt2
= *(const gimple
* const *)p2
;
3421 return gimple_uid (stmt1
) - gimple_uid (stmt2
);
3424 /* Create pattern stmts for all stmts participating in the bool pattern
3425 specified by BOOL_STMT_SET and its root STMT with the desired type
3426 OUT_TYPE. Return the def of the pattern root. */
3429 adjust_bool_stmts (hash_set
<gimple
*> &bool_stmt_set
,
3430 tree out_type
, gimple
*stmt
)
3432 /* Gather original stmts in the bool pattern in their order of appearance
3434 auto_vec
<gimple
*> bool_stmts (bool_stmt_set
.elements ());
3435 for (hash_set
<gimple
*>::iterator i
= bool_stmt_set
.begin ();
3436 i
!= bool_stmt_set
.end (); ++i
)
3437 bool_stmts
.quick_push (*i
);
3438 bool_stmts
.qsort (sort_after_uid
);
3440 /* Now process them in that order, producing pattern stmts. */
3441 hash_map
<tree
, tree
> defs
;
3442 for (unsigned i
= 0; i
< bool_stmts
.length (); ++i
)
3443 adjust_bool_pattern (gimple_assign_lhs (bool_stmts
[i
]),
3444 out_type
, vinfo_for_stmt (stmt
), defs
);
3446 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3447 gimple
*pattern_stmt
3448 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (stmt
)));
3449 return gimple_assign_lhs (pattern_stmt
);
3452 /* Helper for search_type_for_mask. */
3455 search_type_for_mask_1 (tree var
, vec_info
*vinfo
,
3456 hash_map
<gimple
*, tree
> &cache
)
3459 enum tree_code rhs_code
;
3460 tree res
= NULL_TREE
, res2
;
3462 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var
)))
3465 stmt_vec_info def_stmt_info
= vect_get_internal_def (vinfo
, var
);
3469 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_stmt_info
->stmt
);
3473 tree
*c
= cache
.get (def_stmt
);
3477 rhs_code
= gimple_assign_rhs_code (def_stmt
);
3478 rhs1
= gimple_assign_rhs1 (def_stmt
);
3485 res
= search_type_for_mask_1 (rhs1
, vinfo
, cache
);
3491 res
= search_type_for_mask_1 (rhs1
, vinfo
, cache
);
3492 res2
= search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt
), vinfo
,
3494 if (!res
|| (res2
&& TYPE_PRECISION (res
) > TYPE_PRECISION (res2
)))
3499 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
3501 tree comp_vectype
, mask_type
;
3503 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
3505 res
= search_type_for_mask_1 (rhs1
, vinfo
, cache
);
3506 res2
= search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt
),
3508 if (!res
|| (res2
&& TYPE_PRECISION (res
) > TYPE_PRECISION (res2
)))
3513 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
3514 if (comp_vectype
== NULL_TREE
)
3520 mask_type
= get_mask_type_for_scalar_type (TREE_TYPE (rhs1
));
3522 || !expand_vec_cmp_expr_p (comp_vectype
, mask_type
, rhs_code
))
3528 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
3529 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
3531 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
3532 res
= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3535 res
= TREE_TYPE (rhs1
);
3539 cache
.put (def_stmt
, res
);
3543 /* Return the proper type for converting bool VAR into
3544 an integer value or NULL_TREE if no such type exists.
3545 The type is chosen so that converted value has the
3546 same number of elements as VAR's vector type. */
3549 search_type_for_mask (tree var
, vec_info
*vinfo
)
3551 hash_map
<gimple
*, tree
> cache
;
3552 return search_type_for_mask_1 (var
, vinfo
, cache
);
3555 /* Function vect_recog_bool_pattern
3557 Try to find pattern like following:
3559 bool a_b, b_b, c_b, d_b, e_b;
3562 S1 a_b = x1 CMP1 y1;
3563 S2 b_b = x2 CMP2 y2;
3565 S4 d_b = x3 CMP3 y3;
3567 S6 f_T = (TYPE) e_b;
3569 where type 'TYPE' is an integral type. Or a similar pattern
3572 S6 f_Y = e_b ? r_Y : s_Y;
3574 as results from if-conversion of a complex condition.
3578 * LAST_STMT: A stmt at the end from which the pattern
3579 search begins, i.e. cast of a bool to
3584 * TYPE_IN: The type of the input arguments to the pattern.
3586 * TYPE_OUT: The type of the output of this pattern.
3588 * Return value: A new stmt that will be used to replace the pattern.
3590 Assuming size of TYPE is the same as size of all comparisons
3591 (otherwise some casts would be added where needed), the above
3592 sequence we create related pattern stmts:
3593 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3594 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3595 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3596 S5' e_T = c_T | d_T;
3599 Instead of the above S3' we could emit:
3600 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3601 S3' c_T = a_T | b_T;
3602 but the above is more efficient. */
3605 vect_recog_bool_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
3608 gimple
*last_stmt
= stmts
->pop ();
3609 enum tree_code rhs_code
;
3610 tree var
, lhs
, rhs
, vectype
;
3611 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
3612 stmt_vec_info new_stmt_info
;
3613 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
3614 gimple
*pattern_stmt
;
3616 if (!is_gimple_assign (last_stmt
))
3619 var
= gimple_assign_rhs1 (last_stmt
);
3620 lhs
= gimple_assign_lhs (last_stmt
);
3622 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var
)))
3625 hash_set
<gimple
*> bool_stmts
;
3627 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3628 if (CONVERT_EXPR_CODE_P (rhs_code
))
3630 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3631 || TYPE_PRECISION (TREE_TYPE (lhs
)) == 1)
3633 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3634 if (vectype
== NULL_TREE
)
3637 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
3639 rhs
= adjust_bool_stmts (bool_stmts
, TREE_TYPE (lhs
), last_stmt
);
3640 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3641 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3642 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3645 = gimple_build_assign (lhs
, NOP_EXPR
, rhs
);
3649 tree type
= search_type_for_mask (var
, vinfo
);
3650 tree cst0
, cst1
, tmp
;
3655 /* We may directly use cond with narrowed type to avoid
3656 multiple cond exprs with following result packing and
3657 perform single cond with packed mask instead. In case
3658 of widening we better make cond first and then extract
3660 if (TYPE_MODE (type
) == TYPE_MODE (TREE_TYPE (lhs
)))
3661 type
= TREE_TYPE (lhs
);
3663 cst0
= build_int_cst (type
, 0);
3664 cst1
= build_int_cst (type
, 1);
3665 tmp
= vect_recog_temp_ssa_var (type
, NULL
);
3666 pattern_stmt
= gimple_build_assign (tmp
, COND_EXPR
, var
, cst1
, cst0
);
3668 if (!useless_type_conversion_p (type
, TREE_TYPE (lhs
)))
3670 tree new_vectype
= get_vectype_for_scalar_type (type
);
3671 new_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
3672 set_vinfo_for_stmt (pattern_stmt
, new_stmt_info
);
3673 STMT_VINFO_VECTYPE (new_stmt_info
) = new_vectype
;
3674 new_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
3676 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3677 pattern_stmt
= gimple_build_assign (lhs
, CONVERT_EXPR
, tmp
);
3681 *type_out
= vectype
;
3683 stmts
->safe_push (last_stmt
);
3684 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
3686 return pattern_stmt
;
3688 else if (rhs_code
== COND_EXPR
3689 && TREE_CODE (var
) == SSA_NAME
)
3691 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3692 if (vectype
== NULL_TREE
)
3695 /* Build a scalar type for the boolean result that when
3696 vectorized matches the vector type of the result in
3697 size and number of elements. */
3699 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype
)),
3700 TYPE_VECTOR_SUBPARTS (vectype
));
3703 = build_nonstandard_integer_type (prec
,
3704 TYPE_UNSIGNED (TREE_TYPE (var
)));
3705 if (get_vectype_for_scalar_type (type
) == NULL_TREE
)
3708 if (!check_bool_pattern (var
, vinfo
, bool_stmts
))
3711 rhs
= adjust_bool_stmts (bool_stmts
, type
, last_stmt
);
3713 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3715 = gimple_build_assign (lhs
, COND_EXPR
,
3716 build2 (NE_EXPR
, boolean_type_node
,
3717 rhs
, build_int_cst (type
, 0)),
3718 gimple_assign_rhs2 (last_stmt
),
3719 gimple_assign_rhs3 (last_stmt
));
3720 *type_out
= vectype
;
3722 stmts
->safe_push (last_stmt
);
3723 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
3725 return pattern_stmt
;
3727 else if (rhs_code
== SSA_NAME
3728 && STMT_VINFO_DATA_REF (stmt_vinfo
))
3730 stmt_vec_info pattern_stmt_info
;
3731 vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
3732 gcc_assert (vectype
!= NULL_TREE
);
3733 if (!VECTOR_MODE_P (TYPE_MODE (vectype
)))
3736 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
3737 rhs
= adjust_bool_stmts (bool_stmts
, TREE_TYPE (vectype
), last_stmt
);
3740 tree type
= search_type_for_mask (var
, vinfo
);
3741 tree cst0
, cst1
, new_vectype
;
3746 if (TYPE_MODE (type
) == TYPE_MODE (TREE_TYPE (vectype
)))
3747 type
= TREE_TYPE (vectype
);
3749 cst0
= build_int_cst (type
, 0);
3750 cst1
= build_int_cst (type
, 1);
3751 new_vectype
= get_vectype_for_scalar_type (type
);
3753 rhs
= vect_recog_temp_ssa_var (type
, NULL
);
3754 pattern_stmt
= gimple_build_assign (rhs
, COND_EXPR
, var
, cst1
, cst0
);
3756 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
3757 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3758 STMT_VINFO_VECTYPE (pattern_stmt_info
) = new_vectype
;
3759 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
3762 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
3763 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3765 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3766 gimple
*cast_stmt
= gimple_build_assign (rhs2
, NOP_EXPR
, rhs
);
3767 append_pattern_def_seq (stmt_vinfo
, cast_stmt
);
3770 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3771 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
3772 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3773 STMT_VINFO_DATA_REF (pattern_stmt_info
)
3774 = STMT_VINFO_DATA_REF (stmt_vinfo
);
3775 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info
)
3776 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo
);
3777 *type_out
= vectype
;
3779 stmts
->safe_push (last_stmt
);
3780 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
3782 return pattern_stmt
;
3789 /* A helper for vect_recog_mask_conversion_pattern. Build
3790 conversion of MASK to a type suitable for masking VECTYPE.
3791 Built statement gets required vectype and is appended to
3792 a pattern sequence of STMT_VINFO.
3794 Return converted mask. */
3797 build_mask_conversion (tree mask
, tree vectype
, stmt_vec_info stmt_vinfo
,
3802 stmt_vec_info new_stmt_info
;
3804 masktype
= build_same_sized_truth_vector_type (vectype
);
3805 tmp
= vect_recog_temp_ssa_var (TREE_TYPE (masktype
), NULL
);
3806 stmt
= gimple_build_assign (tmp
, CONVERT_EXPR
, mask
);
3807 new_stmt_info
= new_stmt_vec_info (stmt
, vinfo
);
3808 set_vinfo_for_stmt (stmt
, new_stmt_info
);
3809 STMT_VINFO_VECTYPE (new_stmt_info
) = masktype
;
3810 append_pattern_def_seq (stmt_vinfo
, stmt
);
3816 /* Function vect_recog_mask_conversion_pattern
3818 Try to find statements which require boolean type
3819 converison. Additional conversion statements are
3820 added to handle such cases. For example:
3830 S4 c_1 = m_3 ? c_2 : c_3;
3832 Will be transformed into:
3836 S3'' m_2' = (_Bool[bitsize=32])m_2
3837 S3' m_3' = m_1 & m_2';
3838 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3839 S4' c_1' = m_3'' ? c_2 : c_3; */
3842 vect_recog_mask_conversion_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
3845 gimple
*last_stmt
= stmts
->pop ();
3846 enum tree_code rhs_code
;
3847 tree lhs
= NULL_TREE
, rhs1
, rhs2
, tmp
, rhs1_type
, rhs2_type
;
3848 tree vectype1
, vectype2
;
3849 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
3850 stmt_vec_info pattern_stmt_info
;
3851 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
3853 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3854 if (is_gimple_call (last_stmt
)
3855 && gimple_call_internal_p (last_stmt
)
3856 && (gimple_call_internal_fn (last_stmt
) == IFN_MASK_STORE
3857 || gimple_call_internal_fn (last_stmt
) == IFN_MASK_LOAD
))
3859 gcall
*pattern_stmt
;
3860 bool load
= (gimple_call_internal_fn (last_stmt
) == IFN_MASK_LOAD
);
3864 lhs
= gimple_call_lhs (last_stmt
);
3865 vectype1
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3869 rhs2
= gimple_call_arg (last_stmt
, 3);
3870 vectype1
= get_vectype_for_scalar_type (TREE_TYPE (rhs2
));
3873 rhs1
= gimple_call_arg (last_stmt
, 2);
3874 rhs1_type
= search_type_for_mask (rhs1
, vinfo
);
3877 vectype2
= get_mask_type_for_scalar_type (rhs1_type
);
3879 if (!vectype1
|| !vectype2
3880 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1
),
3881 TYPE_VECTOR_SUBPARTS (vectype2
)))
3884 tmp
= build_mask_conversion (rhs1
, vectype1
, stmt_vinfo
, vinfo
);
3888 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3890 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
3891 gimple_call_arg (last_stmt
, 0),
3892 gimple_call_arg (last_stmt
, 1),
3894 gimple_call_set_lhs (pattern_stmt
, lhs
);
3898 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
3899 gimple_call_arg (last_stmt
, 0),
3900 gimple_call_arg (last_stmt
, 1),
3902 gimple_call_arg (last_stmt
, 3));
3904 gimple_call_set_nothrow (pattern_stmt
, true);
3906 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
3907 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3908 STMT_VINFO_DATA_REF (pattern_stmt_info
)
3909 = STMT_VINFO_DATA_REF (stmt_vinfo
);
3910 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info
)
3911 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo
);
3913 *type_out
= vectype1
;
3914 *type_in
= vectype1
;
3915 stmts
->safe_push (last_stmt
);
3916 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
3918 return pattern_stmt
;
3921 if (!is_gimple_assign (last_stmt
))
3924 gimple
*pattern_stmt
;
3925 lhs
= gimple_assign_lhs (last_stmt
);
3926 rhs1
= gimple_assign_rhs1 (last_stmt
);
3927 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3929 /* Check for cond expression requiring mask conversion. */
3930 if (rhs_code
== COND_EXPR
)
3932 /* vect_recog_mixed_size_cond_pattern could apply.
3934 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
3937 vectype1
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3939 if (TREE_CODE (rhs1
) == SSA_NAME
)
3941 rhs1_type
= search_type_for_mask (rhs1
, vinfo
);
3945 else if (COMPARISON_CLASS_P (rhs1
))
3947 /* Check whether we're comparing scalar booleans and (if so)
3948 whether a better mask type exists than the mask associated
3949 with boolean-sized elements. This avoids unnecessary packs
3950 and unpacks if the booleans are set from comparisons of
3951 wider types. E.g. in:
3953 int x1, x2, x3, x4, y1, y1;
3955 bool b1 = (x1 == x2);
3956 bool b2 = (x3 == x4);
3957 ... = b1 == b2 ? y1 : y2;
3959 it is better for b1 and b2 to use the mask type associated
3960 with int elements rather bool (byte) elements. */
3961 rhs1_type
= search_type_for_mask (TREE_OPERAND (rhs1
, 0), vinfo
);
3963 rhs1_type
= TREE_TYPE (TREE_OPERAND (rhs1
, 0));
3968 vectype2
= get_mask_type_for_scalar_type (rhs1_type
);
3970 if (!vectype1
|| !vectype2
)
3973 /* Continue if a conversion is needed. Also continue if we have
3974 a comparison whose vector type would normally be different from
3975 VECTYPE2 when considered in isolation. In that case we'll
3976 replace the comparison with an SSA name (so that we can record
3977 its vector type) and behave as though the comparison was an SSA
3978 name from the outset. */
3979 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1
),
3980 TYPE_VECTOR_SUBPARTS (vectype2
))
3981 && (TREE_CODE (rhs1
) == SSA_NAME
3982 || rhs1_type
== TREE_TYPE (TREE_OPERAND (rhs1
, 0))))
3985 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
3986 in place, we can handle it in vectorizable_condition. This avoids
3987 unnecessary promotion stmts and increased vectorization factor. */
3988 if (COMPARISON_CLASS_P (rhs1
)
3989 && INTEGRAL_TYPE_P (rhs1_type
)
3990 && known_le (TYPE_VECTOR_SUBPARTS (vectype1
),
3991 TYPE_VECTOR_SUBPARTS (vectype2
)))
3994 enum vect_def_type dt
;
3995 if (vect_is_simple_use (TREE_OPERAND (rhs1
, 0), stmt_vinfo
->vinfo
,
3997 && dt
== vect_external_def
3998 && vect_is_simple_use (TREE_OPERAND (rhs1
, 1), stmt_vinfo
->vinfo
,
4000 && (dt
== vect_external_def
4001 || dt
== vect_constant_def
))
4003 tree wide_scalar_type
= build_nonstandard_integer_type
4004 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1
))),
4005 TYPE_UNSIGNED (rhs1_type
));
4006 tree vectype3
= get_vectype_for_scalar_type (wide_scalar_type
);
4007 if (expand_vec_cond_expr_p (vectype1
, vectype3
, TREE_CODE (rhs1
)))
4012 /* If rhs1 is a comparison we need to move it into a
4013 separate statement. */
4014 if (TREE_CODE (rhs1
) != SSA_NAME
)
4016 tmp
= vect_recog_temp_ssa_var (TREE_TYPE (rhs1
), NULL
);
4017 pattern_stmt
= gimple_build_assign (tmp
, rhs1
);
4020 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
4021 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
4022 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vectype2
;
4023 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
4026 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
4027 TYPE_VECTOR_SUBPARTS (vectype2
)))
4028 tmp
= build_mask_conversion (rhs1
, vectype1
, stmt_vinfo
, vinfo
);
4032 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
4033 pattern_stmt
= gimple_build_assign (lhs
, COND_EXPR
, tmp
,
4034 gimple_assign_rhs2 (last_stmt
),
4035 gimple_assign_rhs3 (last_stmt
));
4037 *type_out
= vectype1
;
4038 *type_in
= vectype1
;
4039 stmts
->safe_push (last_stmt
);
4040 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
4042 return pattern_stmt
;
4045 /* Now check for binary boolean operations requiring conversion for
4047 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs
)))
4050 if (rhs_code
!= BIT_IOR_EXPR
4051 && rhs_code
!= BIT_XOR_EXPR
4052 && rhs_code
!= BIT_AND_EXPR
4053 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
)
4056 rhs2
= gimple_assign_rhs2 (last_stmt
);
4058 rhs1_type
= search_type_for_mask (rhs1
, vinfo
);
4059 rhs2_type
= search_type_for_mask (rhs2
, vinfo
);
4061 if (!rhs1_type
|| !rhs2_type
4062 || TYPE_PRECISION (rhs1_type
) == TYPE_PRECISION (rhs2_type
))
4065 if (TYPE_PRECISION (rhs1_type
) < TYPE_PRECISION (rhs2_type
))
4067 vectype1
= get_mask_type_for_scalar_type (rhs1_type
);
4070 rhs2
= build_mask_conversion (rhs2
, vectype1
, stmt_vinfo
, vinfo
);
4074 vectype1
= get_mask_type_for_scalar_type (rhs2_type
);
4077 rhs1
= build_mask_conversion (rhs1
, vectype1
, stmt_vinfo
, vinfo
);
4080 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
4081 pattern_stmt
= gimple_build_assign (lhs
, rhs_code
, rhs1
, rhs2
);
4083 *type_out
= vectype1
;
4084 *type_in
= vectype1
;
4085 stmts
->safe_push (last_stmt
);
4086 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
4088 return pattern_stmt
;
4091 /* STMT is a load or store. If the load or store is conditional, return
4092 the boolean condition under which it occurs, otherwise return null. */
4095 vect_get_load_store_mask (gimple
*stmt
)
4097 if (gassign
*def_assign
= dyn_cast
<gassign
*> (stmt
))
4099 gcc_assert (gimple_assign_single_p (def_assign
));
4103 if (gcall
*def_call
= dyn_cast
<gcall
*> (stmt
))
4105 internal_fn ifn
= gimple_call_internal_fn (def_call
);
4106 int mask_index
= internal_fn_mask_index (ifn
);
4107 return gimple_call_arg (def_call
, mask_index
);
4113 /* Return the scalar offset type that an internal gather/scatter function
4114 should use. GS_INFO describes the gather/scatter operation. */
4117 vect_get_gather_scatter_offset_type (gather_scatter_info
*gs_info
)
4119 tree offset_type
= TREE_TYPE (gs_info
->offset
);
4120 unsigned int element_bits
= tree_to_uhwi (TYPE_SIZE (gs_info
->element_type
));
4122 /* Enforced by vect_check_gather_scatter. */
4123 unsigned int offset_bits
= TYPE_PRECISION (offset_type
);
4124 gcc_assert (element_bits
>= offset_bits
);
4126 /* If the offset is narrower than the elements, extend it according
4128 if (element_bits
> offset_bits
)
4129 return build_nonstandard_integer_type (element_bits
,
4130 TYPE_UNSIGNED (offset_type
));
4135 /* Return MASK if MASK is suitable for masking an operation on vectors
4136 of type VECTYPE, otherwise convert it into such a form and return
4137 the result. Associate any conversion statements with STMT_INFO's
4141 vect_convert_mask_for_vectype (tree mask
, tree vectype
,
4142 stmt_vec_info stmt_info
, vec_info
*vinfo
)
4144 tree mask_type
= search_type_for_mask (mask
, vinfo
);
4147 tree mask_vectype
= get_mask_type_for_scalar_type (mask_type
);
4149 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
),
4150 TYPE_VECTOR_SUBPARTS (mask_vectype
)))
4151 mask
= build_mask_conversion (mask
, vectype
, stmt_info
, vinfo
);
4156 /* Return the equivalent of:
4158 fold_convert (TYPE, VALUE)
4160 with the expectation that the operation will be vectorized.
4161 If new statements are needed, add them as pattern statements
4165 vect_add_conversion_to_patterm (tree type
, tree value
,
4166 stmt_vec_info stmt_info
,
4169 if (useless_type_conversion_p (type
, TREE_TYPE (value
)))
4172 tree new_value
= vect_recog_temp_ssa_var (type
, NULL
);
4173 gassign
*conversion
= gimple_build_assign (new_value
, CONVERT_EXPR
, value
);
4174 stmt_vec_info new_stmt_info
= new_stmt_vec_info (conversion
, vinfo
);
4175 set_vinfo_for_stmt (conversion
, new_stmt_info
);
4176 STMT_VINFO_VECTYPE (new_stmt_info
) = get_vectype_for_scalar_type (type
);
4177 append_pattern_def_seq (stmt_info
, conversion
);
4181 /* Try to convert STMT into a call to a gather load or scatter store
4182 internal function. Return the final statement on success and set
4183 *TYPE_IN and *TYPE_OUT to the vector type being loaded or stored.
4185 This function only handles gathers and scatters that were recognized
4186 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4189 vect_try_gather_scatter_pattern (gimple
*stmt
, stmt_vec_info last_stmt_info
,
4190 tree
*type_in
, tree
*type_out
)
4192 /* Currently we only support this for loop vectorization. */
4193 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4194 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (stmt_info
->vinfo
);
4198 /* Make sure that we're looking at a gather load or scatter store. */
4199 data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
4200 if (!dr
|| !STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
4203 /* Get the boolean that controls whether the load or store happens.
4204 This is null if the operation is unconditional. */
4205 tree mask
= vect_get_load_store_mask (stmt
);
4207 /* Make sure that the target supports an appropriate internal
4208 function for the gather/scatter operation. */
4209 gather_scatter_info gs_info
;
4210 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, &gs_info
)
4214 /* Convert the mask to the right form. */
4215 tree gs_vectype
= get_vectype_for_scalar_type (gs_info
.element_type
);
4217 mask
= vect_convert_mask_for_vectype (mask
, gs_vectype
, last_stmt_info
,
4220 /* Get the invariant base and non-invariant offset, converting the
4221 latter to the same width as the vector elements. */
4222 tree base
= gs_info
.base
;
4223 tree offset_type
= vect_get_gather_scatter_offset_type (&gs_info
);
4224 tree offset
= vect_add_conversion_to_patterm (offset_type
, gs_info
.offset
,
4225 last_stmt_info
, loop_vinfo
);
4227 /* Build the new pattern statement. */
4228 tree scale
= size_int (gs_info
.scale
);
4229 gcall
*pattern_stmt
;
4230 if (DR_IS_READ (dr
))
4233 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 4, base
,
4234 offset
, scale
, mask
);
4236 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 3, base
,
4238 tree load_lhs
= vect_recog_temp_ssa_var (gs_info
.element_type
, NULL
);
4239 gimple_call_set_lhs (pattern_stmt
, load_lhs
);
4243 tree rhs
= vect_get_store_rhs (stmt
);
4245 pattern_stmt
= gimple_build_call_internal (IFN_MASK_SCATTER_STORE
, 5,
4246 base
, offset
, scale
, rhs
,
4249 pattern_stmt
= gimple_build_call_internal (IFN_SCATTER_STORE
, 4,
4250 base
, offset
, scale
, rhs
);
4252 gimple_call_set_nothrow (pattern_stmt
, true);
4254 /* Copy across relevant vectorization info and associate DR with the
4255 new pattern statement instead of the original statement. */
4256 stmt_vec_info pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
,
4258 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
4259 STMT_VINFO_DATA_REF (pattern_stmt_info
) = dr
;
4260 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info
)
4261 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info
);
4262 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info
)
4263 = STMT_VINFO_GATHER_SCATTER_P (stmt_info
);
4265 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4266 *type_out
= vectype
;
4268 vect_pattern_detected ("gather/scatter pattern", stmt
);
4270 return pattern_stmt
;
4273 /* Pattern wrapper around vect_try_gather_scatter_pattern. */
4276 vect_recog_gather_scatter_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
4279 gimple
*last_stmt
= stmts
->pop ();
4280 stmt_vec_info last_stmt_info
= vinfo_for_stmt (last_stmt
);
4281 gimple
*pattern_stmt
= vect_try_gather_scatter_pattern (last_stmt
,
4285 stmts
->safe_push (last_stmt
);
4286 return pattern_stmt
;
4289 /* Mark statements that are involved in a pattern. */
4292 vect_mark_pattern_stmts (gimple
*orig_stmt
, gimple
*pattern_stmt
,
4293 tree pattern_vectype
)
4295 stmt_vec_info pattern_stmt_info
, def_stmt_info
;
4296 stmt_vec_info orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
4297 vec_info
*vinfo
= orig_stmt_info
->vinfo
;
4300 pattern_stmt_info
= vinfo_for_stmt (pattern_stmt
);
4301 if (pattern_stmt_info
== NULL
)
4303 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
4304 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
4306 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt
));
4308 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt
;
4309 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
4310 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
4311 STMT_VINFO_VECTYPE (pattern_stmt_info
) = pattern_vectype
;
4312 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
4313 STMT_VINFO_RELATED_STMT (orig_stmt_info
) = pattern_stmt
;
4314 if (gimple
*def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
))
4315 for (gimple_stmt_iterator si
= gsi_start (def_seq
);
4316 !gsi_end_p (si
); gsi_next (&si
))
4318 def_stmt
= gsi_stmt (si
);
4319 def_stmt_info
= vinfo_for_stmt (def_stmt
);
4320 if (def_stmt_info
== NULL
)
4322 def_stmt_info
= new_stmt_vec_info (def_stmt
, vinfo
);
4323 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
4325 gimple_set_bb (def_stmt
, gimple_bb (orig_stmt
));
4326 STMT_VINFO_RELATED_STMT (def_stmt_info
) = orig_stmt
;
4327 STMT_VINFO_DEF_TYPE (def_stmt_info
) = vect_internal_def
;
4328 if (STMT_VINFO_VECTYPE (def_stmt_info
) == NULL_TREE
)
4329 STMT_VINFO_VECTYPE (def_stmt_info
) = pattern_vectype
;
4333 /* Function vect_pattern_recog_1
4336 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4337 computation pattern.
4338 STMT: A stmt from which the pattern search should start.
4340 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
4341 expression that computes the same functionality and can be used to
4342 replace the sequence of stmts that are involved in the pattern.
4345 This function checks if the expression returned by PATTERN_RECOG_FUNC is
4346 supported in vector form by the target. We use 'TYPE_IN' to obtain the
4347 relevant vector type. If 'TYPE_IN' is already a vector type, then this
4348 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
4349 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
4350 to the available target pattern.
4352 This function also does some bookkeeping, as explained in the documentation
4353 for vect_recog_pattern. */
4356 vect_pattern_recog_1 (vect_recog_func
*recog_func
,
4357 gimple_stmt_iterator si
,
4358 vec
<gimple
*> *stmts_to_replace
)
4360 gimple
*stmt
= gsi_stmt (si
), *pattern_stmt
;
4361 stmt_vec_info stmt_info
;
4362 loop_vec_info loop_vinfo
;
4363 tree pattern_vectype
;
4364 tree type_in
, type_out
;
4365 enum tree_code code
;
4368 stmts_to_replace
->truncate (0);
4369 stmts_to_replace
->quick_push (stmt
);
4370 pattern_stmt
= recog_func
->fn (stmts_to_replace
, &type_in
, &type_out
);
4373 /* Clear related stmt info that analysis might have noted for
4374 to be replaced stmts. */
4375 for (i
= 0; stmts_to_replace
->iterate (i
, &stmt
)
4376 && (unsigned) i
< stmts_to_replace
->length ();
4379 stmt_info
= vinfo_for_stmt (stmt
);
4380 STMT_VINFO_RELATED_STMT (stmt_info
) = NULL
;
4385 stmt
= stmts_to_replace
->last ();
4386 stmt_info
= vinfo_for_stmt (stmt
);
4387 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4389 if (VECTOR_BOOLEAN_TYPE_P (type_in
)
4390 || VECTOR_TYPE_P (type_in
))
4392 /* No need to check target support (already checked by the pattern
4393 recognition function). */
4394 pattern_vectype
= type_out
? type_out
: type_in
;
4398 /* Check target support */
4399 type_in
= get_vectype_for_scalar_type (type_in
);
4403 type_out
= get_vectype_for_scalar_type (type_out
);
4408 pattern_vectype
= type_out
;
4410 if (is_gimple_assign (pattern_stmt
))
4412 enum insn_code icode
;
4413 code
= gimple_assign_rhs_code (pattern_stmt
);
4414 optab optab
= optab_for_tree_code (code
, type_in
, optab_default
);
4415 machine_mode vec_mode
= TYPE_MODE (type_in
);
4417 || (icode
= optab_handler (optab
, vec_mode
)) == CODE_FOR_nothing
4418 || (insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (type_out
)))
4422 gcc_assert (is_gimple_call (pattern_stmt
));
4425 /* Found a vectorizable pattern. */
4426 if (dump_enabled_p ())
4428 dump_printf_loc (MSG_NOTE
, vect_location
,
4429 "%s pattern recognized: ", recog_func
->name
);
4430 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
4433 /* Mark the stmts that are involved in the pattern. */
4434 vect_mark_pattern_stmts (stmt
, pattern_stmt
, pattern_vectype
);
4436 /* Patterns cannot be vectorized using SLP, because they change the order of
4442 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo
), ix
, ix2
,
4443 elem_ptr
, *elem_ptr
== stmt
);
4446 /* It is possible that additional pattern stmts are created and inserted in
4447 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
4448 relevant statements. */
4449 for (i
= 0; stmts_to_replace
->iterate (i
, &stmt
)
4450 && (unsigned) i
< (stmts_to_replace
->length () - 1);
4453 stmt_info
= vinfo_for_stmt (stmt
);
4454 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
4455 if (dump_enabled_p ())
4457 dump_printf_loc (MSG_NOTE
, vect_location
,
4458 "additional pattern stmt: ");
4459 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
4462 vect_mark_pattern_stmts (stmt
, pattern_stmt
, NULL_TREE
);
4469 /* Function vect_pattern_recog
4472 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4475 Output - for each computation idiom that is detected we create a new stmt
4476 that provides the same functionality and that can be vectorized. We
4477 also record some information in the struct_stmt_info of the relevant
4478 stmts, as explained below:
4480 At the entry to this function we have the following stmts, with the
4481 following initial value in the STMT_VINFO fields:
4483 stmt in_pattern_p related_stmt vec_stmt
4484 S1: a_i = .... - - -
4485 S2: a_2 = ..use(a_i).. - - -
4486 S3: a_1 = ..use(a_2).. - - -
4487 S4: a_0 = ..use(a_1).. - - -
4488 S5: ... = ..use(a_0).. - - -
4490 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4491 represented by a single stmt. We then:
4492 - create a new stmt S6 equivalent to the pattern (the stmt is not
4493 inserted into the code)
4494 - fill in the STMT_VINFO fields as follows:
4496 in_pattern_p related_stmt vec_stmt
4497 S1: a_i = .... - - -
4498 S2: a_2 = ..use(a_i).. - - -
4499 S3: a_1 = ..use(a_2).. - - -
4500 S4: a_0 = ..use(a_1).. true S6 -
4501 '---> S6: a_new = .... - S4 -
4502 S5: ... = ..use(a_0).. - - -
4504 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4505 to each other through the RELATED_STMT field).
4507 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4508 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4509 remain irrelevant unless used by stmts other than S4.
4511 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4512 (because they are marked as irrelevant). It will vectorize S6, and record
4513 a pointer to the new vector stmt VS6 from S6 (as usual).
4514 S4 will be skipped, and S5 will be vectorized as usual:
4516 in_pattern_p related_stmt vec_stmt
4517 S1: a_i = .... - - -
4518 S2: a_2 = ..use(a_i).. - - -
4519 S3: a_1 = ..use(a_2).. - - -
4520 > VS6: va_new = .... - - -
4521 S4: a_0 = ..use(a_1).. true S6 VS6
4522 '---> S6: a_new = .... - S4 VS6
4523 > VS5: ... = ..vuse(va_new).. - - -
4524 S5: ... = ..use(a_0).. - - -
4526 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4527 elsewhere), and we'll end up with:
4530 VS5: ... = ..vuse(va_new)..
4532 In case of more than one pattern statements, e.g., widen-mult with
4536 S2 a_T = (TYPE) a_t;
4537 '--> S3: a_it = (interm_type) a_t;
4538 S4 prod_T = a_T * CONST;
4539 '--> S5: prod_T' = a_it w* CONST;
4541 there may be other users of a_T outside the pattern. In that case S2 will
4542 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4543 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4544 be recorded in S3. */
4547 vect_pattern_recog (vec_info
*vinfo
)
4552 gimple_stmt_iterator si
;
4554 auto_vec
<gimple
*, 1> stmts_to_replace
;
4556 DUMP_VECT_SCOPE ("vect_pattern_recog");
4558 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
4560 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4561 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
4562 nbbs
= loop
->num_nodes
;
4564 /* Scan through the loop stmts, applying the pattern recognition
4565 functions starting at each stmt visited: */
4566 for (i
= 0; i
< nbbs
; i
++)
4568 basic_block bb
= bbs
[i
];
4569 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
4571 gimple
*stmt
= gsi_stmt (si
);
4572 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4573 if (stmt_info
&& STMT_VINFO_IN_PATTERN_P (stmt_info
))
4575 /* Scan over all generic vect_recog_xxx_pattern functions. */
4576 for (j
= 0; j
< NUM_PATTERNS
; j
++)
4577 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs
[j
], si
,
4585 bb_vec_info bb_vinfo
= as_a
<bb_vec_info
> (vinfo
);
4586 for (si
= bb_vinfo
->region_begin
;
4587 gsi_stmt (si
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&si
))
4589 gimple
*stmt
= gsi_stmt (si
);
4590 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4592 && (!STMT_VINFO_VECTORIZABLE (stmt_info
)
4593 || STMT_VINFO_IN_PATTERN_P (stmt_info
)))
4596 /* Scan over all generic vect_recog_xxx_pattern functions. */
4597 for (j
= 0; j
< NUM_PATTERNS
; j
++)
4598 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs
[j
], si
,