1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
37 #include "gimple-iterator.h"
39 #include "tree-vectorizer.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
47 #include "omp-simd-clone.h"
50 /* Pattern recognition functions */
51 static gimple
*vect_recog_widen_sum_pattern (vec
<gimple
*> *, tree
*,
53 static gimple
*vect_recog_widen_mult_pattern (vec
<gimple
*> *, tree
*,
55 static gimple
*vect_recog_dot_prod_pattern (vec
<gimple
*> *, tree
*,
57 static gimple
*vect_recog_sad_pattern (vec
<gimple
*> *, tree
*,
59 static gimple
*vect_recog_pow_pattern (vec
<gimple
*> *, tree
*, tree
*);
60 static gimple
*vect_recog_over_widening_pattern (vec
<gimple
*> *, tree
*,
62 static gimple
*vect_recog_widen_shift_pattern (vec
<gimple
*> *,
64 static gimple
*vect_recog_rotate_pattern (vec
<gimple
*> *, tree
*, tree
*);
65 static gimple
*vect_recog_vector_vector_shift_pattern (vec
<gimple
*> *,
67 static gimple
*vect_recog_divmod_pattern (vec
<gimple
*> *,
70 static gimple
*vect_recog_mult_pattern (vec
<gimple
*> *,
73 static gimple
*vect_recog_mixed_size_cond_pattern (vec
<gimple
*> *,
75 static gimple
*vect_recog_bool_pattern (vec
<gimple
*> *, tree
*, tree
*);
76 static gimple
*vect_recog_mask_conversion_pattern (vec
<gimple
*> *, tree
*, tree
*);
77 static gimple
*vect_recog_gather_scatter_pattern (vec
<gimple
*> *, tree
*,
80 struct vect_recog_func
82 vect_recog_func_ptr fn
;
86 /* Note that ordering matters - the first pattern matching on a stmt
87 is taken which means usually the more complex one needs to preceed
88 the less comples onex (widen_sum only after dot_prod or sad for example). */
89 static vect_recog_func vect_vect_recog_func_ptrs
[NUM_PATTERNS
] = {
90 { vect_recog_widen_mult_pattern
, "widen_mult" },
91 { vect_recog_dot_prod_pattern
, "dot_prod" },
92 { vect_recog_sad_pattern
, "sad" },
93 { vect_recog_widen_sum_pattern
, "widen_sum" },
94 { vect_recog_pow_pattern
, "pow" },
95 { vect_recog_widen_shift_pattern
, "widen_shift" },
96 { vect_recog_over_widening_pattern
, "over_widening" },
97 { vect_recog_rotate_pattern
, "rotate" },
98 { vect_recog_vector_vector_shift_pattern
, "vector_vector_shift" },
99 { vect_recog_divmod_pattern
, "divmod" },
100 { vect_recog_mult_pattern
, "mult" },
101 { vect_recog_mixed_size_cond_pattern
, "mixed_size_cond" },
102 { vect_recog_bool_pattern
, "bool" },
103 /* This must come before mask conversion, and includes the parts
104 of mask conversion that are needed for gather and scatter
105 internal functions. */
106 { vect_recog_gather_scatter_pattern
, "gather_scatter" },
107 { vect_recog_mask_conversion_pattern
, "mask_conversion" }
111 append_pattern_def_seq (stmt_vec_info stmt_info
, gimple
*stmt
)
113 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
118 new_pattern_def_seq (stmt_vec_info stmt_info
, gimple
*stmt
)
120 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
121 append_pattern_def_seq (stmt_info
, stmt
);
124 /* Check whether STMT2 is in the same loop or basic block as STMT1.
125 Which of the two applies depends on whether we're currently doing
126 loop-based or basic-block-based vectorization, as determined by
127 the vinfo_for_stmt for STMT1 (which must be defined).
129 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
130 to be defined as well. */
133 vect_same_loop_or_bb_p (gimple
*stmt1
, gimple
*stmt2
)
135 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt1
);
136 return vect_stmt_in_region_p (stmt_vinfo
->vinfo
, stmt2
);
139 /* If the LHS of DEF_STMT has a single use, and that statement is
140 in the same loop or basic block, return it. */
143 vect_single_imm_use (gimple
*def_stmt
)
145 tree lhs
= gimple_assign_lhs (def_stmt
);
149 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
152 if (!vect_same_loop_or_bb_p (def_stmt
, use_stmt
))
158 /* If OP is defined by a statement that's being considered for vectorization,
159 return information about that statement, otherwise return NULL. */
162 vect_get_internal_def (vec_info
*vinfo
, tree op
)
166 if (TREE_CODE (op
) != SSA_NAME
167 || !vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
)
168 || dt
!= vect_internal_def
)
171 return vinfo_for_stmt (def_stmt
);
174 /* Check whether NAME, an ssa-name used in USE_STMT,
175 is a result of a type promotion, such that:
176 DEF_STMT: NAME = NOP (name0)
177 If CHECK_SIGN is TRUE, check that either both types are signed or both are
181 type_conversion_p (tree name
, gimple
*use_stmt
, bool check_sign
,
182 tree
*orig_type
, gimple
**def_stmt
, bool *promotion
)
184 gimple
*dummy_gimple
;
185 stmt_vec_info stmt_vinfo
;
186 tree type
= TREE_TYPE (name
);
188 enum vect_def_type dt
;
190 stmt_vinfo
= vinfo_for_stmt (use_stmt
);
191 if (!vect_is_simple_use (name
, stmt_vinfo
->vinfo
, def_stmt
, &dt
))
194 if (dt
!= vect_internal_def
195 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
201 if (dt
== vect_internal_def
)
203 stmt_vec_info def_vinfo
= vinfo_for_stmt (*def_stmt
);
204 if (STMT_VINFO_IN_PATTERN_P (def_vinfo
))
208 if (!is_gimple_assign (*def_stmt
))
211 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt
)))
214 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
216 *orig_type
= TREE_TYPE (oprnd0
);
217 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*orig_type
)
218 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*orig_type
)) && check_sign
))
221 if (TYPE_PRECISION (type
) >= (TYPE_PRECISION (*orig_type
) * 2))
226 if (!vect_is_simple_use (oprnd0
, stmt_vinfo
->vinfo
, &dummy_gimple
, &dt
))
232 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
233 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
236 vect_recog_temp_ssa_var (tree type
, gimple
*stmt
)
238 return make_temp_ssa_name (type
, stmt
, "patt");
241 /* Return true if STMT_VINFO describes a reduction for which reassociation
242 is allowed. If STMT_INFO is part of a group, assume that it's part of
243 a reduction chain and optimistically assume that all statements
244 except the last allow reassociation. */
247 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo
)
249 return (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
250 ? STMT_VINFO_REDUC_TYPE (stmt_vinfo
) != FOLD_LEFT_REDUCTION
251 : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo
) != NULL
);
254 /* Function vect_recog_dot_prod_pattern
256 Try to find the following pattern:
262 sum_0 = phi <init, sum_1>
265 S3 x_T = (TYPE1) x_t;
266 S4 y_T = (TYPE1) y_t;
268 [S6 prod = (TYPE2) prod; #optional]
269 S7 sum_1 = prod + sum_0;
271 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
272 same size of 'TYPE1' or bigger. This is a special case of a reduction
277 * STMTS: Contains a stmt from which the pattern search begins. In the
278 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
283 * TYPE_IN: The type of the input arguments to the pattern.
285 * TYPE_OUT: The type of the output of this pattern.
287 * Return value: A new stmt that will be used to replace the sequence of
288 stmts that constitute the pattern. In this case it will be:
289 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
291 Note: The dot-prod idiom is a widening reduction pattern that is
292 vectorized without preserving all the intermediate results. It
293 produces only N/2 (widened) results (by summing up pairs of
294 intermediate results) rather than all N results. Therefore, we
295 cannot allow this pattern when we want to get all the results and in
296 the correct order (as is the case when this computation is in an
297 inner-loop nested in an outer-loop that us being vectorized). */
300 vect_recog_dot_prod_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
303 gimple
*stmt
, *last_stmt
= (*stmts
)[0];
305 tree oprnd00
, oprnd01
;
306 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
307 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
308 tree type
, half_type
;
309 gimple
*pattern_stmt
;
311 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
319 loop
= LOOP_VINFO_LOOP (loop_info
);
321 /* We don't allow changing the order of the computation in the inner-loop
322 when doing outer-loop vectorization. */
323 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
326 if (!is_gimple_assign (last_stmt
))
329 type
= gimple_expr_type (last_stmt
);
331 /* Look for the following pattern
335 DDPROD = (TYPE2) DPROD;
336 sum_1 = DDPROD + sum_0;
338 - DX is double the size of X
339 - DY is double the size of Y
340 - DX, DY, DPROD all have the same type
341 - sum is the same size of DPROD or bigger
342 - sum has been recognized as a reduction variable.
344 This is equivalent to:
345 DPROD = X w* Y; #widen mult
346 sum_1 = DPROD w+ sum_0; #widen summation
348 DPROD = X w* Y; #widen mult
349 sum_1 = DPROD + sum_0; #summation
352 /* Starting from LAST_STMT, follow the defs of its uses in search
353 of the above pattern. */
355 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
358 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
361 if (!vect_reassociating_reduction_p (stmt_vinfo
))
364 oprnd0
= gimple_assign_rhs1 (last_stmt
);
365 oprnd1
= gimple_assign_rhs2 (last_stmt
);
369 if (type_conversion_p (oprnd0
, stmt
, true, &half_type
, &def_stmt
,
374 oprnd0
= gimple_assign_rhs1 (stmt
);
379 /* So far so good. Since last_stmt was detected as a (summation) reduction,
380 we know that oprnd1 is the reduction variable (defined by a loop-header
381 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
382 Left to check that oprnd0 is defined by a (widen_)mult_expr */
383 if (TREE_CODE (oprnd0
) != SSA_NAME
)
386 prod_type
= half_type
;
387 stmt_vec_info mult_vinfo
= vect_get_internal_def (vinfo
, oprnd0
);
391 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
392 inside the loop (in case we are analyzing an outer-loop). */
393 gassign
*mult
= dyn_cast
<gassign
*> (mult_vinfo
->stmt
);
394 if (!mult
|| gimple_assign_rhs_code (mult
) != MULT_EXPR
)
396 if (STMT_VINFO_IN_PATTERN_P (mult_vinfo
))
398 /* Has been detected as a widening multiplication? */
400 mult
= dyn_cast
<gassign
*> (STMT_VINFO_RELATED_STMT (mult_vinfo
));
401 if (!mult
|| gimple_assign_rhs_code (mult
) != WIDEN_MULT_EXPR
)
403 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
)
404 = STMT_VINFO_PATTERN_DEF_SEQ (mult_vinfo
);
405 mult_vinfo
= vinfo_for_stmt (mult
);
406 gcc_assert (mult_vinfo
);
407 gcc_assert (STMT_VINFO_DEF_TYPE (mult_vinfo
) == vect_internal_def
);
408 oprnd00
= gimple_assign_rhs1 (mult
);
409 oprnd01
= gimple_assign_rhs2 (mult
);
413 tree half_type0
, half_type1
;
417 oprnd0
= gimple_assign_rhs1 (mult
);
418 oprnd1
= gimple_assign_rhs2 (mult
);
419 if (!type_conversion_p (oprnd0
, mult
, true, &half_type0
, &def_stmt
,
423 oprnd00
= gimple_assign_rhs1 (def_stmt
);
424 if (!type_conversion_p (oprnd1
, mult
, true, &half_type1
, &def_stmt
,
428 oprnd01
= gimple_assign_rhs1 (def_stmt
);
429 if (!types_compatible_p (half_type0
, half_type1
))
431 if (TYPE_PRECISION (prod_type
) != TYPE_PRECISION (half_type0
) * 2)
435 half_type
= TREE_TYPE (oprnd00
);
436 *type_in
= half_type
;
439 /* Pattern detected. Create a stmt to be used to replace the pattern: */
440 var
= vect_recog_temp_ssa_var (type
, NULL
);
441 pattern_stmt
= gimple_build_assign (var
, DOT_PROD_EXPR
,
442 oprnd00
, oprnd01
, oprnd1
);
444 if (dump_enabled_p ())
446 dump_printf_loc (MSG_NOTE
, vect_location
,
447 "vect_recog_dot_prod_pattern: detected: ");
448 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
455 /* Function vect_recog_sad_pattern
457 Try to find the following Sum of Absolute Difference (SAD) pattern:
460 signed TYPE1 diff, abs_diff;
463 sum_0 = phi <init, sum_1>
466 S3 x_T = (TYPE1) x_t;
467 S4 y_T = (TYPE1) y_t;
469 S6 abs_diff = ABS_EXPR <diff>;
470 [S7 abs_diff = (TYPE2) abs_diff; #optional]
471 S8 sum_1 = abs_diff + sum_0;
473 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
474 same size of 'TYPE1' or bigger. This is a special case of a reduction
479 * STMTS: Contains a stmt from which the pattern search begins. In the
480 example, when this function is called with S8, the pattern
481 {S3,S4,S5,S6,S7,S8} will be detected.
485 * TYPE_IN: The type of the input arguments to the pattern.
487 * TYPE_OUT: The type of the output of this pattern.
489 * Return value: A new stmt that will be used to replace the sequence of
490 stmts that constitute the pattern. In this case it will be:
491 SAD_EXPR <x_t, y_t, sum_0>
495 vect_recog_sad_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
498 gimple
*last_stmt
= (*stmts
)[0];
499 tree sad_oprnd0
, sad_oprnd1
;
500 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
501 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
503 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
510 loop
= LOOP_VINFO_LOOP (loop_info
);
512 /* We don't allow changing the order of the computation in the inner-loop
513 when doing outer-loop vectorization. */
514 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
517 if (!is_gimple_assign (last_stmt
))
520 tree sum_type
= gimple_expr_type (last_stmt
);
522 /* Look for the following pattern
526 DAD = ABS_EXPR <DDIFF>;
527 DDPROD = (TYPE2) DPROD;
530 - DX is at least double the size of X
531 - DY is at least double the size of Y
532 - DX, DY, DDIFF, DAD all have the same type
533 - sum is the same size of DAD or bigger
534 - sum has been recognized as a reduction variable.
536 This is equivalent to:
537 DDIFF = X w- Y; #widen sub
538 DAD = ABS_EXPR <DDIFF>;
539 sum_1 = DAD w+ sum_0; #widen summation
541 DDIFF = X w- Y; #widen sub
542 DAD = ABS_EXPR <DDIFF>;
543 sum_1 = DAD + sum_0; #summation
546 /* Starting from LAST_STMT, follow the defs of its uses in search
547 of the above pattern. */
549 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
552 tree plus_oprnd0
, plus_oprnd1
;
554 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
557 if (!vect_reassociating_reduction_p (stmt_vinfo
))
560 plus_oprnd0
= gimple_assign_rhs1 (last_stmt
);
561 plus_oprnd1
= gimple_assign_rhs2 (last_stmt
);
563 /* The type conversion could be promotion, demotion,
564 or just signed -> unsigned. */
566 if (type_conversion_p (plus_oprnd0
, last_stmt
, false,
567 &half_type
, &def_stmt
, &promotion
))
568 plus_oprnd0
= gimple_assign_rhs1 (def_stmt
);
570 half_type
= sum_type
;
572 /* So far so good. Since last_stmt was detected as a (summation) reduction,
573 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
574 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
575 Then check that plus_oprnd0 is defined by an abs_expr. */
577 if (TREE_CODE (plus_oprnd0
) != SSA_NAME
)
580 tree abs_type
= half_type
;
581 stmt_vec_info abs_stmt_vinfo
= vect_get_internal_def (vinfo
, plus_oprnd0
);
585 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
586 inside the loop (in case we are analyzing an outer-loop). */
587 gassign
*abs_stmt
= dyn_cast
<gassign
*> (abs_stmt_vinfo
->stmt
);
589 || (gimple_assign_rhs_code (abs_stmt
) != ABS_EXPR
590 && gimple_assign_rhs_code (abs_stmt
) != ABSU_EXPR
))
593 tree abs_oprnd
= gimple_assign_rhs1 (abs_stmt
);
594 if (TYPE_UNSIGNED (abs_type
))
597 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
599 if (TREE_CODE (abs_oprnd
) != SSA_NAME
)
602 stmt_vec_info diff_stmt_vinfo
= vect_get_internal_def (vinfo
, abs_oprnd
);
603 if (!diff_stmt_vinfo
)
606 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
607 inside the loop (in case we are analyzing an outer-loop). */
608 gassign
*diff_stmt
= dyn_cast
<gassign
*> (diff_stmt_vinfo
->stmt
);
609 if (!diff_stmt
|| gimple_assign_rhs_code (diff_stmt
) != MINUS_EXPR
)
612 tree half_type0
, half_type1
;
614 tree minus_oprnd0
= gimple_assign_rhs1 (diff_stmt
);
615 tree minus_oprnd1
= gimple_assign_rhs2 (diff_stmt
);
617 if (!type_conversion_p (minus_oprnd0
, diff_stmt
, false,
618 &half_type0
, &def_stmt
, &promotion
)
621 sad_oprnd0
= gimple_assign_rhs1 (def_stmt
);
623 if (!type_conversion_p (minus_oprnd1
, diff_stmt
, false,
624 &half_type1
, &def_stmt
, &promotion
)
627 sad_oprnd1
= gimple_assign_rhs1 (def_stmt
);
629 if (!types_compatible_p (half_type0
, half_type1
))
631 if (TYPE_PRECISION (abs_type
) < TYPE_PRECISION (half_type0
) * 2
632 || TYPE_PRECISION (sum_type
) < TYPE_PRECISION (half_type0
) * 2)
635 *type_in
= TREE_TYPE (sad_oprnd0
);
636 *type_out
= sum_type
;
638 /* Pattern detected. Create a stmt to be used to replace the pattern: */
639 tree var
= vect_recog_temp_ssa_var (sum_type
, NULL
);
640 gimple
*pattern_stmt
= gimple_build_assign (var
, SAD_EXPR
, sad_oprnd0
,
641 sad_oprnd1
, plus_oprnd1
);
643 if (dump_enabled_p ())
645 dump_printf_loc (MSG_NOTE
, vect_location
,
646 "vect_recog_sad_pattern: detected: ");
647 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
654 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
657 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
658 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
660 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
661 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
662 that satisfies the above restrictions, we can perform a widening opeartion
663 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
664 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
667 vect_handle_widen_op_by_const (gimple
*stmt
, enum tree_code code
,
668 tree const_oprnd
, tree
*oprnd
,
669 gimple
**wstmt
, tree type
,
670 tree
*half_type
, gimple
*def_stmt
)
672 tree new_type
, new_oprnd
;
674 if (code
!= MULT_EXPR
&& code
!= LSHIFT_EXPR
)
677 if (((code
== MULT_EXPR
&& int_fits_type_p (const_oprnd
, *half_type
))
678 || (code
== LSHIFT_EXPR
679 && compare_tree_int (const_oprnd
, TYPE_PRECISION (*half_type
))
681 && TYPE_PRECISION (type
) == (TYPE_PRECISION (*half_type
) * 2))
683 /* CONST_OPRND is a constant of HALF_TYPE. */
684 *oprnd
= gimple_assign_rhs1 (def_stmt
);
688 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (*half_type
) * 4))
691 if (!vect_same_loop_or_bb_p (stmt
, def_stmt
))
694 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
695 a type 2 times bigger than HALF_TYPE. */
696 new_type
= build_nonstandard_integer_type (TYPE_PRECISION (type
) / 2,
697 TYPE_UNSIGNED (type
));
698 if ((code
== MULT_EXPR
&& !int_fits_type_p (const_oprnd
, new_type
))
699 || (code
== LSHIFT_EXPR
700 && compare_tree_int (const_oprnd
, TYPE_PRECISION (new_type
)) == 1))
703 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
704 *oprnd
= gimple_assign_rhs1 (def_stmt
);
705 new_oprnd
= make_ssa_name (new_type
);
706 *wstmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, *oprnd
);
709 *half_type
= new_type
;
714 /* Function vect_recog_widen_mult_pattern
716 Try to find the following pattern:
720 TYPE a_T, b_T, prod_T;
726 S5 prod_T = a_T * b_T;
728 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
730 Also detect unsigned cases:
734 unsigned TYPE u_prod_T;
735 TYPE a_T, b_T, prod_T;
741 S5 prod_T = a_T * b_T;
742 S6 u_prod_T = (unsigned TYPE) prod_T;
744 and multiplication by constants:
751 S5 prod_T = a_T * CONST;
753 A special case of multiplication by constants is when 'TYPE' is 4 times
754 bigger than 'type', but CONST fits an intermediate type 2 times smaller
755 than 'TYPE'. In that case we create an additional pattern stmt for S3
756 to create a variable of the intermediate type, and perform widen-mult
757 on the intermediate type as well:
761 TYPE a_T, prod_T, prod_T';
765 '--> a_it = (interm_type) a_t;
766 S5 prod_T = a_T * CONST;
767 '--> prod_T' = a_it w* CONST;
771 * STMTS: Contains a stmt from which the pattern search begins. In the
772 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
773 is detected. In case of unsigned widen-mult, the original stmt (S5) is
774 replaced with S6 in STMTS. In case of multiplication by a constant
775 of an intermediate type (the last case above), STMTS also contains S3
776 (inserted before S5).
780 * TYPE_IN: The type of the input arguments to the pattern.
782 * TYPE_OUT: The type of the output of this pattern.
784 * Return value: A new stmt that will be used to replace the sequence of
785 stmts that constitute the pattern. In this case it will be:
786 WIDEN_MULT <a_t, b_t>
787 If the result of WIDEN_MULT needs to be converted to a larger type, the
788 returned stmt will be this type conversion stmt.
792 vect_recog_widen_mult_pattern (vec
<gimple
*> *stmts
,
793 tree
*type_in
, tree
*type_out
)
795 gimple
*last_stmt
= stmts
->pop ();
796 gimple
*def_stmt0
, *def_stmt1
;
798 tree type
, half_type0
, half_type1
;
799 gimple
*new_stmt
= NULL
, *pattern_stmt
= NULL
;
800 tree vectype
, vecitype
;
802 enum tree_code dummy_code
;
808 if (!is_gimple_assign (last_stmt
))
811 type
= gimple_expr_type (last_stmt
);
813 /* Starting from LAST_STMT, follow the defs of its uses in search
814 of the above pattern. */
816 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
819 oprnd0
= gimple_assign_rhs1 (last_stmt
);
820 oprnd1
= gimple_assign_rhs2 (last_stmt
);
822 /* Check argument 0. */
823 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
827 /* Check argument 1. */
828 op1_ok
= type_conversion_p (oprnd1
, last_stmt
, false, &half_type1
,
829 &def_stmt1
, &promotion
);
831 if (op1_ok
&& promotion
)
833 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
834 oprnd1
= gimple_assign_rhs1 (def_stmt1
);
838 if (TREE_CODE (oprnd1
) == INTEGER_CST
839 && TREE_CODE (half_type0
) == INTEGER_TYPE
840 && vect_handle_widen_op_by_const (last_stmt
, MULT_EXPR
, oprnd1
,
841 &oprnd0
, &new_stmt
, type
,
842 &half_type0
, def_stmt0
))
844 half_type1
= half_type0
;
845 oprnd1
= fold_convert (half_type1
, oprnd1
);
851 /* If the two arguments have different sizes, convert the one with
852 the smaller type into the larger type. */
853 if (TYPE_PRECISION (half_type0
) != TYPE_PRECISION (half_type1
))
855 /* If we already used up the single-stmt slot give up. */
860 gimple
*def_stmt
= NULL
;
862 if (TYPE_PRECISION (half_type0
) < TYPE_PRECISION (half_type1
))
864 def_stmt
= def_stmt0
;
865 half_type0
= half_type1
;
870 def_stmt
= def_stmt1
;
871 half_type1
= half_type0
;
875 tree old_oprnd
= gimple_assign_rhs1 (def_stmt
);
876 tree new_oprnd
= make_ssa_name (half_type0
);
877 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, old_oprnd
);
881 /* Handle unsigned case. Look for
882 S6 u_prod_T = (unsigned TYPE) prod_T;
883 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
884 if (TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type0
))
890 if (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (half_type1
))
893 use_stmt
= vect_single_imm_use (last_stmt
);
894 if (!use_stmt
|| !is_gimple_assign (use_stmt
)
895 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
898 use_lhs
= gimple_assign_lhs (use_stmt
);
899 use_type
= TREE_TYPE (use_lhs
);
900 if (!INTEGRAL_TYPE_P (use_type
)
901 || (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (use_type
))
902 || (TYPE_PRECISION (type
) != TYPE_PRECISION (use_type
)))
906 last_stmt
= use_stmt
;
909 if (!types_compatible_p (half_type0
, half_type1
))
912 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
913 to get an intermediate result of type ITYPE. In this case we need
914 to build a statement to convert this intermediate result to type TYPE. */
916 if (TYPE_PRECISION (type
) > TYPE_PRECISION (half_type0
) * 2)
917 itype
= build_nonstandard_integer_type
918 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (half_type0
)) * 2,
919 TYPE_UNSIGNED (type
));
921 /* Pattern detected. */
922 if (dump_enabled_p ())
923 dump_printf_loc (MSG_NOTE
, vect_location
,
924 "vect_recog_widen_mult_pattern: detected:\n");
926 /* Check target support */
927 vectype
= get_vectype_for_scalar_type (half_type0
);
928 vecitype
= get_vectype_for_scalar_type (itype
);
931 || !supportable_widening_operation (WIDEN_MULT_EXPR
, last_stmt
,
933 &dummy_code
, &dummy_code
,
934 &dummy_int
, &dummy_vec
))
938 *type_out
= get_vectype_for_scalar_type (type
);
940 /* Pattern supported. Create a stmt to be used to replace the pattern: */
941 var
= vect_recog_temp_ssa_var (itype
, NULL
);
942 pattern_stmt
= gimple_build_assign (var
, WIDEN_MULT_EXPR
, oprnd0
, oprnd1
);
944 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
945 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
947 /* If the original two operands have different sizes, we may need to convert
948 the smaller one into the larget type. If this is the case, at this point
949 the new stmt is already built. */
952 append_pattern_def_seq (stmt_vinfo
, new_stmt
);
953 stmt_vec_info new_stmt_info
954 = new_stmt_vec_info (new_stmt
, stmt_vinfo
->vinfo
);
955 set_vinfo_for_stmt (new_stmt
, new_stmt_info
);
956 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
959 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
960 the result of the widen-mult operation into type TYPE. */
963 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
964 stmt_vec_info pattern_stmt_info
965 = new_stmt_vec_info (pattern_stmt
, stmt_vinfo
->vinfo
);
966 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
967 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vecitype
;
968 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
970 gimple_assign_lhs (pattern_stmt
));
973 if (dump_enabled_p ())
974 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
976 stmts
->safe_push (last_stmt
);
981 /* Function vect_recog_pow_pattern
983 Try to find the following pattern:
987 with POW being one of pow, powf, powi, powif and N being
992 * LAST_STMT: A stmt from which the pattern search begins.
996 * TYPE_IN: The type of the input arguments to the pattern.
998 * TYPE_OUT: The type of the output of this pattern.
1000 * Return value: A new stmt that will be used to replace the sequence of
1001 stmts that constitute the pattern. In this case it will be:
1008 vect_recog_pow_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
1011 gimple
*last_stmt
= (*stmts
)[0];
1016 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
1019 switch (gimple_call_combined_fn (last_stmt
))
1029 base
= gimple_call_arg (last_stmt
, 0);
1030 exp
= gimple_call_arg (last_stmt
, 1);
1031 if (TREE_CODE (exp
) != REAL_CST
1032 && TREE_CODE (exp
) != INTEGER_CST
)
1034 if (flag_unsafe_math_optimizations
1035 && TREE_CODE (base
) == REAL_CST
1036 && !gimple_call_internal_p (last_stmt
))
1038 combined_fn log_cfn
;
1039 built_in_function exp_bfn
;
1040 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt
)))
1043 log_cfn
= CFN_BUILT_IN_LOG
;
1044 exp_bfn
= BUILT_IN_EXP
;
1047 log_cfn
= CFN_BUILT_IN_LOGF
;
1048 exp_bfn
= BUILT_IN_EXPF
;
1051 log_cfn
= CFN_BUILT_IN_LOGL
;
1052 exp_bfn
= BUILT_IN_EXPL
;
1057 tree logc
= fold_const_call (log_cfn
, TREE_TYPE (base
), base
);
1058 tree exp_decl
= builtin_decl_implicit (exp_bfn
);
1059 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1060 does that, but if C is a power of 2, we want to use
1061 exp2 (log2 (C) * x) in the non-vectorized version, but for
1062 vectorization we don't have vectorized exp2. */
1064 && TREE_CODE (logc
) == REAL_CST
1066 && lookup_attribute ("omp declare simd",
1067 DECL_ATTRIBUTES (exp_decl
)))
1069 cgraph_node
*node
= cgraph_node::get_create (exp_decl
);
1070 if (node
->simd_clones
== NULL
)
1072 if (targetm
.simd_clone
.compute_vecsize_and_simdlen
== NULL
1073 || node
->definition
)
1075 expand_simd_clones (node
);
1076 if (node
->simd_clones
== NULL
)
1079 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1080 tree def
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1081 gimple
*g
= gimple_build_assign (def
, MULT_EXPR
, exp
, logc
);
1082 new_pattern_def_seq (stmt_vinfo
, g
);
1083 *type_in
= TREE_TYPE (base
);
1084 *type_out
= NULL_TREE
;
1085 tree res
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1086 g
= gimple_build_call (exp_decl
, 1, def
);
1087 gimple_call_set_lhs (g
, res
);
1095 /* We now have a pow or powi builtin function call with a constant
1098 *type_out
= NULL_TREE
;
1100 /* Catch squaring. */
1101 if ((tree_fits_shwi_p (exp
)
1102 && tree_to_shwi (exp
) == 2)
1103 || (TREE_CODE (exp
) == REAL_CST
1104 && real_equal (&TREE_REAL_CST (exp
), &dconst2
)))
1106 *type_in
= TREE_TYPE (base
);
1108 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1109 stmt
= gimple_build_assign (var
, MULT_EXPR
, base
, base
);
1113 /* Catch square root. */
1114 if (TREE_CODE (exp
) == REAL_CST
1115 && real_equal (&TREE_REAL_CST (exp
), &dconsthalf
))
1117 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (base
));
1119 && direct_internal_fn_supported_p (IFN_SQRT
, *type_in
,
1120 OPTIMIZE_FOR_SPEED
))
1122 gcall
*stmt
= gimple_build_call_internal (IFN_SQRT
, 1, base
);
1123 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
1124 gimple_call_set_lhs (stmt
, var
);
1125 gimple_call_set_nothrow (stmt
, true);
1134 /* Function vect_recog_widen_sum_pattern
1136 Try to find the following pattern:
1139 TYPE x_T, sum = init;
1141 sum_0 = phi <init, sum_1>
1143 S2 x_T = (TYPE) x_t;
1144 S3 sum_1 = x_T + sum_0;
1146 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1147 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1148 a special case of a reduction computation.
1152 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1153 when this function is called with S3, the pattern {S2,S3} will be detected.
1157 * TYPE_IN: The type of the input arguments to the pattern.
1159 * TYPE_OUT: The type of the output of this pattern.
1161 * Return value: A new stmt that will be used to replace the sequence of
1162 stmts that constitute the pattern. In this case it will be:
1163 WIDEN_SUM <x_t, sum_0>
1165 Note: The widening-sum idiom is a widening reduction pattern that is
1166 vectorized without preserving all the intermediate results. It
1167 produces only N/2 (widened) results (by summing up pairs of
1168 intermediate results) rather than all N results. Therefore, we
1169 cannot allow this pattern when we want to get all the results and in
1170 the correct order (as is the case when this computation is in an
1171 inner-loop nested in an outer-loop that us being vectorized). */
1174 vect_recog_widen_sum_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
1177 gimple
*stmt
, *last_stmt
= (*stmts
)[0];
1178 tree oprnd0
, oprnd1
;
1179 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1180 tree type
, half_type
;
1181 gimple
*pattern_stmt
;
1182 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1190 loop
= LOOP_VINFO_LOOP (loop_info
);
1192 /* We don't allow changing the order of the computation in the inner-loop
1193 when doing outer-loop vectorization. */
1194 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
1197 if (!is_gimple_assign (last_stmt
))
1200 type
= gimple_expr_type (last_stmt
);
1202 /* Look for the following pattern
1205 In which DX is at least double the size of X, and sum_1 has been
1206 recognized as a reduction variable.
1209 /* Starting from LAST_STMT, follow the defs of its uses in search
1210 of the above pattern. */
1212 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
1215 if (!vect_reassociating_reduction_p (stmt_vinfo
))
1218 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1219 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1221 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1222 we know that oprnd1 is the reduction variable (defined by a loop-header
1223 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1224 Left to check that oprnd0 is defined by a cast from type 'type' to type
1227 if (!type_conversion_p (oprnd0
, last_stmt
, true, &half_type
, &stmt
,
1232 oprnd0
= gimple_assign_rhs1 (stmt
);
1233 *type_in
= half_type
;
1236 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1237 var
= vect_recog_temp_ssa_var (type
, NULL
);
1238 pattern_stmt
= gimple_build_assign (var
, WIDEN_SUM_EXPR
, oprnd0
, oprnd1
);
1240 if (dump_enabled_p ())
1242 dump_printf_loc (MSG_NOTE
, vect_location
,
1243 "vect_recog_widen_sum_pattern: detected: ");
1244 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1247 return pattern_stmt
;
1251 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1254 STMT - a statement to check.
1255 DEF - we support operations with two operands, one of which is constant.
1256 The other operand can be defined by a demotion operation, or by a
1257 previous statement in a sequence of over-promoted operations. In the
1258 later case DEF is used to replace that operand. (It is defined by a
1259 pattern statement we created for the previous statement in the
1263 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1264 NULL, it's the type of DEF.
1265 STMTS - additional pattern statements. If a pattern statement (type
1266 conversion) is created in this function, its original statement is
1270 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1271 operands to use in the new pattern statement for STMT (will be created
1272 in vect_recog_over_widening_pattern ()).
1273 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1274 statements for STMT: the first one is a type promotion and the second
1275 one is the operation itself. We return the type promotion statement
1276 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1277 the second pattern statement. */
1280 vect_operation_fits_smaller_type (gimple
*stmt
, tree def
, tree
*new_type
,
1281 tree
*op0
, tree
*op1
, gimple
**new_def_stmt
,
1282 vec
<gimple
*> *stmts
)
1284 enum tree_code code
;
1285 tree const_oprnd
, oprnd
;
1286 tree interm_type
= NULL_TREE
, half_type
, new_oprnd
, type
;
1287 gimple
*def_stmt
, *new_stmt
;
1293 *new_def_stmt
= NULL
;
1295 if (!is_gimple_assign (stmt
))
1298 code
= gimple_assign_rhs_code (stmt
);
1299 if (code
!= LSHIFT_EXPR
&& code
!= RSHIFT_EXPR
1300 && code
!= BIT_IOR_EXPR
&& code
!= BIT_XOR_EXPR
&& code
!= BIT_AND_EXPR
)
1303 oprnd
= gimple_assign_rhs1 (stmt
);
1304 const_oprnd
= gimple_assign_rhs2 (stmt
);
1305 type
= gimple_expr_type (stmt
);
1307 if (TREE_CODE (oprnd
) != SSA_NAME
1308 || TREE_CODE (const_oprnd
) != INTEGER_CST
)
1311 /* If oprnd has other uses besides that in stmt we cannot mark it
1312 as being part of a pattern only. */
1313 if (!has_single_use (oprnd
))
1316 /* If we are in the middle of a sequence, we use DEF from a previous
1317 statement. Otherwise, OPRND has to be a result of type promotion. */
1320 half_type
= *new_type
;
1326 if (!type_conversion_p (oprnd
, stmt
, false, &half_type
, &def_stmt
,
1329 || !vect_same_loop_or_bb_p (stmt
, def_stmt
))
1333 /* Can we perform the operation on a smaller type? */
1339 if (!int_fits_type_p (const_oprnd
, half_type
))
1341 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1342 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1345 interm_type
= build_nonstandard_integer_type (
1346 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1347 if (!int_fits_type_p (const_oprnd
, interm_type
))
1354 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1355 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1358 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1359 (e.g., if the original value was char, the shift amount is at most 8
1360 if we want to use short). */
1361 if (compare_tree_int (const_oprnd
, TYPE_PRECISION (half_type
)) == 1)
1364 interm_type
= build_nonstandard_integer_type (
1365 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1367 if (!vect_supportable_shift (code
, interm_type
))
1373 if (vect_supportable_shift (code
, half_type
))
1376 /* Try intermediate type - HALF_TYPE is not supported. */
1377 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1380 interm_type
= build_nonstandard_integer_type (
1381 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1383 if (!vect_supportable_shift (code
, interm_type
))
1392 /* There are four possible cases:
1393 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1394 the first statement in the sequence)
1395 a. The original, HALF_TYPE, is not enough - we replace the promotion
1396 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1397 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1399 2. OPRND is defined by a pattern statement we created.
1400 a. Its type is not sufficient for the operation, we create a new stmt:
1401 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1402 this statement in NEW_DEF_STMT, and it is later put in
1403 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1404 b. OPRND is good to use in the new statement. */
1409 /* Replace the original type conversion HALF_TYPE->TYPE with
1410 HALF_TYPE->INTERM_TYPE. */
1411 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
1413 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
1414 /* Check if the already created pattern stmt is what we need. */
1415 if (!is_gimple_assign (new_stmt
)
1416 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt
))
1417 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != interm_type
)
1420 stmts
->safe_push (def_stmt
);
1421 oprnd
= gimple_assign_lhs (new_stmt
);
1425 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1426 oprnd
= gimple_assign_rhs1 (def_stmt
);
1427 new_oprnd
= make_ssa_name (interm_type
);
1428 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, oprnd
);
1429 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
1430 stmts
->safe_push (def_stmt
);
1436 /* Retrieve the operand before the type promotion. */
1437 oprnd
= gimple_assign_rhs1 (def_stmt
);
1444 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1445 new_oprnd
= make_ssa_name (interm_type
);
1446 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, oprnd
);
1448 *new_def_stmt
= new_stmt
;
1451 /* Otherwise, OPRND is already set. */
1455 *new_type
= interm_type
;
1457 *new_type
= half_type
;
1460 *op1
= fold_convert (*new_type
, const_oprnd
);
1466 /* Try to find a statement or a sequence of statements that can be performed
1470 TYPE x_T, res0_T, res1_T;
1473 S2 x_T = (TYPE) x_t;
1474 S3 res0_T = op (x_T, C0);
1475 S4 res1_T = op (res0_T, C1);
1476 S5 ... = () res1_T; - type demotion
1478 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1480 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1481 be 'type' or some intermediate type. For now, we expect S5 to be a type
1482 demotion operation. We also check that S3 and S4 have only one use. */
1485 vect_recog_over_widening_pattern (vec
<gimple
*> *stmts
,
1486 tree
*type_in
, tree
*type_out
)
1488 gimple
*stmt
= stmts
->pop ();
1489 gimple
*pattern_stmt
= NULL
, *new_def_stmt
, *prev_stmt
= NULL
,
1491 tree op0
, op1
, vectype
= NULL_TREE
, use_lhs
, use_type
;
1492 tree var
= NULL_TREE
, new_type
= NULL_TREE
, new_oprnd
;
1499 if (!vinfo_for_stmt (stmt
)
1500 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt
)))
1503 new_def_stmt
= NULL
;
1504 if (!vect_operation_fits_smaller_type (stmt
, var
, &new_type
,
1505 &op0
, &op1
, &new_def_stmt
,
1514 /* STMT can be performed on a smaller type. Check its uses. */
1515 use_stmt
= vect_single_imm_use (stmt
);
1516 if (!use_stmt
|| !is_gimple_assign (use_stmt
))
1519 /* Create pattern statement for STMT. */
1520 vectype
= get_vectype_for_scalar_type (new_type
);
1524 /* We want to collect all the statements for which we create pattern
1525 statetments, except for the case when the last statement in the
1526 sequence doesn't have a corresponding pattern statement. In such
1527 case we associate the last pattern statement with the last statement
1528 in the sequence. Therefore, we only add the original statement to
1529 the list if we know that it is not the last. */
1531 stmts
->safe_push (prev_stmt
);
1533 var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1535 = gimple_build_assign (var
, gimple_assign_rhs_code (stmt
), op0
, op1
);
1536 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
1537 new_pattern_def_seq (vinfo_for_stmt (stmt
), new_def_stmt
);
1539 if (dump_enabled_p ())
1541 dump_printf_loc (MSG_NOTE
, vect_location
,
1542 "created pattern stmt: ");
1543 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1546 type
= gimple_expr_type (stmt
);
1553 /* We got a sequence. We expect it to end with a type demotion operation.
1554 Otherwise, we quit (for now). There are three possible cases: the
1555 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1556 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1557 NEW_TYPE differs (we create a new conversion statement). */
1558 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
1560 use_lhs
= gimple_assign_lhs (use_stmt
);
1561 use_type
= TREE_TYPE (use_lhs
);
1562 /* Support only type demotion or signedess change. */
1563 if (!INTEGRAL_TYPE_P (use_type
)
1564 || TYPE_PRECISION (type
) <= TYPE_PRECISION (use_type
))
1567 /* Check that NEW_TYPE is not bigger than the conversion result. */
1568 if (TYPE_PRECISION (new_type
) > TYPE_PRECISION (use_type
))
1571 if (TYPE_UNSIGNED (new_type
) != TYPE_UNSIGNED (use_type
)
1572 || TYPE_PRECISION (new_type
) != TYPE_PRECISION (use_type
))
1574 /* Create NEW_TYPE->USE_TYPE conversion. */
1575 new_oprnd
= make_ssa_name (use_type
);
1576 pattern_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, var
);
1577 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt
)) = pattern_stmt
;
1579 *type_in
= get_vectype_for_scalar_type (new_type
);
1580 *type_out
= get_vectype_for_scalar_type (use_type
);
1582 /* We created a pattern statement for the last statement in the
1583 sequence, so we don't need to associate it with the pattern
1584 statement created for PREV_STMT. Therefore, we add PREV_STMT
1585 to the list in order to mark it later in vect_pattern_recog_1. */
1587 stmts
->safe_push (prev_stmt
);
1592 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt
))
1593 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt
));
1596 *type_out
= NULL_TREE
;
1599 stmts
->safe_push (use_stmt
);
1602 /* TODO: support general case, create a conversion to the correct type. */
1605 /* Pattern detected. */
1606 if (dump_enabled_p ())
1608 dump_printf_loc (MSG_NOTE
, vect_location
,
1609 "vect_recog_over_widening_pattern: detected: ");
1610 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1613 return pattern_stmt
;
1616 /* Detect widening shift pattern:
1622 S2 a_T = (TYPE) a_t;
1623 S3 res_T = a_T << CONST;
1625 where type 'TYPE' is at least double the size of type 'type'.
1627 Also detect cases where the shift result is immediately converted
1628 to another type 'result_type' that is no larger in size than 'TYPE'.
1629 In those cases we perform a widen-shift that directly results in
1630 'result_type', to avoid a possible over-widening situation:
1634 result_type res_result;
1637 S2 a_T = (TYPE) a_t;
1638 S3 res_T = a_T << CONST;
1639 S4 res_result = (result_type) res_T;
1640 '--> res_result' = a_t w<< CONST;
1642 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1643 create an additional pattern stmt for S2 to create a variable of an
1644 intermediate type, and perform widen-shift on the intermediate type:
1648 TYPE a_T, res_T, res_T';
1651 S2 a_T = (TYPE) a_t;
1652 '--> a_it = (interm_type) a_t;
1653 S3 res_T = a_T << CONST;
1654 '--> res_T' = a_it <<* CONST;
1658 * STMTS: Contains a stmt from which the pattern search begins.
1659 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1660 in STMTS. When an intermediate type is used and a pattern statement is
1661 created for S2, we also put S2 here (before S3).
1665 * TYPE_IN: The type of the input arguments to the pattern.
1667 * TYPE_OUT: The type of the output of this pattern.
1669 * Return value: A new stmt that will be used to replace the sequence of
1670 stmts that constitute the pattern. In this case it will be:
1671 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1674 vect_recog_widen_shift_pattern (vec
<gimple
*> *stmts
,
1675 tree
*type_in
, tree
*type_out
)
1677 gimple
*last_stmt
= stmts
->pop ();
1679 tree oprnd0
, oprnd1
;
1680 tree type
, half_type0
;
1681 gimple
*pattern_stmt
;
1682 tree vectype
, vectype_out
= NULL_TREE
;
1684 enum tree_code dummy_code
;
1686 vec
<tree
> dummy_vec
;
1690 if (!is_gimple_assign (last_stmt
) || !vinfo_for_stmt (last_stmt
))
1693 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt
)))
1696 if (gimple_assign_rhs_code (last_stmt
) != LSHIFT_EXPR
)
1699 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1700 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1701 if (TREE_CODE (oprnd0
) != SSA_NAME
|| TREE_CODE (oprnd1
) != INTEGER_CST
)
1704 /* Check operand 0: it has to be defined by a type promotion. */
1705 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
1710 /* Check operand 1: has to be positive. We check that it fits the type
1711 in vect_handle_widen_op_by_const (). */
1712 if (tree_int_cst_compare (oprnd1
, size_zero_node
) <= 0)
1715 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
1716 type
= gimple_expr_type (last_stmt
);
1718 /* Check for subsequent conversion to another type. */
1719 use_stmt
= vect_single_imm_use (last_stmt
);
1720 if (use_stmt
&& is_gimple_assign (use_stmt
)
1721 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
))
1722 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
1724 tree use_lhs
= gimple_assign_lhs (use_stmt
);
1725 tree use_type
= TREE_TYPE (use_lhs
);
1727 if (INTEGRAL_TYPE_P (use_type
)
1728 && TYPE_PRECISION (use_type
) <= TYPE_PRECISION (type
))
1730 last_stmt
= use_stmt
;
1735 /* Check if this a widening operation. */
1736 gimple
*wstmt
= NULL
;
1737 if (!vect_handle_widen_op_by_const (last_stmt
, LSHIFT_EXPR
, oprnd1
,
1739 type
, &half_type0
, def_stmt0
))
1742 /* Pattern detected. */
1743 if (dump_enabled_p ())
1744 dump_printf_loc (MSG_NOTE
, vect_location
,
1745 "vect_recog_widen_shift_pattern: detected:\n");
1747 /* Check target support. */
1748 vectype
= get_vectype_for_scalar_type (half_type0
);
1749 vectype_out
= get_vectype_for_scalar_type (type
);
1753 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR
, last_stmt
,
1754 vectype_out
, vectype
,
1755 &dummy_code
, &dummy_code
,
1756 &dummy_int
, &dummy_vec
))
1760 *type_out
= vectype_out
;
1762 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1763 var
= vect_recog_temp_ssa_var (type
, NULL
);
1765 = gimple_build_assign (var
, WIDEN_LSHIFT_EXPR
, oprnd0
, oprnd1
);
1768 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1769 new_pattern_def_seq (stmt_vinfo
, wstmt
);
1770 stmt_vec_info new_stmt_info
1771 = new_stmt_vec_info (wstmt
, stmt_vinfo
->vinfo
);
1772 set_vinfo_for_stmt (wstmt
, new_stmt_info
);
1773 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
1776 if (dump_enabled_p ())
1777 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1779 stmts
->safe_push (last_stmt
);
1780 return pattern_stmt
;
1783 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1787 S0 a_t = b_t r<< c_t;
1791 * STMTS: Contains a stmt from which the pattern search begins,
1792 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1796 S2 e_t = d_t & (B - 1);
1797 S3 f_t = b_t << c_t;
1798 S4 g_t = b_t >> e_t;
1801 where B is element bitsize of type.
1805 * TYPE_IN: The type of the input arguments to the pattern.
1807 * TYPE_OUT: The type of the output of this pattern.
1809 * Return value: A new stmt that will be used to replace the rotate
1813 vect_recog_rotate_pattern (vec
<gimple
*> *stmts
, tree
*type_in
, tree
*type_out
)
1815 gimple
*last_stmt
= stmts
->pop ();
1816 tree oprnd0
, oprnd1
, lhs
, var
, var1
, var2
, vectype
, type
, stype
, def
, def2
;
1817 gimple
*pattern_stmt
, *def_stmt
;
1818 enum tree_code rhs_code
;
1819 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1820 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
1821 enum vect_def_type dt
;
1822 optab optab1
, optab2
;
1823 edge ext_def
= NULL
;
1825 if (!is_gimple_assign (last_stmt
))
1828 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1838 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
1841 lhs
= gimple_assign_lhs (last_stmt
);
1842 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1843 type
= TREE_TYPE (oprnd0
);
1844 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1845 if (TREE_CODE (oprnd0
) != SSA_NAME
1846 || TYPE_PRECISION (TREE_TYPE (lhs
)) != TYPE_PRECISION (type
)
1847 || !INTEGRAL_TYPE_P (type
)
1848 || !TYPE_UNSIGNED (type
))
1851 if (!vect_is_simple_use (oprnd1
, vinfo
, &def_stmt
, &dt
))
1854 if (dt
!= vect_internal_def
1855 && dt
!= vect_constant_def
1856 && dt
!= vect_external_def
)
1859 vectype
= get_vectype_for_scalar_type (type
);
1860 if (vectype
== NULL_TREE
)
1863 /* If vector/vector or vector/scalar rotate is supported by the target,
1864 don't do anything here. */
1865 optab1
= optab_for_tree_code (rhs_code
, vectype
, optab_vector
);
1867 && optab_handler (optab1
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1870 if (is_a
<bb_vec_info
> (vinfo
) || dt
!= vect_internal_def
)
1872 optab2
= optab_for_tree_code (rhs_code
, vectype
, optab_scalar
);
1874 && optab_handler (optab2
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1878 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1879 don't do anything here either. */
1880 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_vector
);
1881 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_vector
);
1883 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1885 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1887 if (! is_a
<bb_vec_info
> (vinfo
) && dt
== vect_internal_def
)
1889 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_scalar
);
1890 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_scalar
);
1892 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1894 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1899 *type_out
= vectype
;
1900 if (*type_in
== NULL_TREE
)
1903 if (dt
== vect_external_def
1904 && TREE_CODE (oprnd1
) == SSA_NAME
1905 && is_a
<loop_vec_info
> (vinfo
))
1907 struct loop
*loop
= as_a
<loop_vec_info
> (vinfo
)->loop
;
1908 ext_def
= loop_preheader_edge (loop
);
1909 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1
))
1911 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (oprnd1
));
1913 || !dominated_by_p (CDI_DOMINATORS
, ext_def
->dest
, bb
))
1919 scalar_int_mode mode
= SCALAR_INT_TYPE_MODE (type
);
1920 if (TREE_CODE (oprnd1
) == INTEGER_CST
1921 || TYPE_MODE (TREE_TYPE (oprnd1
)) == mode
)
1923 else if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
1925 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
1926 if (TYPE_MODE (TREE_TYPE (rhs1
)) == mode
1927 && TYPE_PRECISION (TREE_TYPE (rhs1
))
1928 == TYPE_PRECISION (type
))
1932 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
1933 if (def
== NULL_TREE
)
1935 def
= vect_recog_temp_ssa_var (type
, NULL
);
1936 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
1940 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1941 gcc_assert (!new_bb
);
1944 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1946 stype
= TREE_TYPE (def
);
1947 scalar_int_mode smode
= SCALAR_INT_TYPE_MODE (stype
);
1949 if (TREE_CODE (def
) == INTEGER_CST
)
1951 if (!tree_fits_uhwi_p (def
)
1952 || tree_to_uhwi (def
) >= GET_MODE_PRECISION (mode
)
1953 || integer_zerop (def
))
1955 def2
= build_int_cst (stype
,
1956 GET_MODE_PRECISION (mode
) - tree_to_uhwi (def
));
1960 tree vecstype
= get_vectype_for_scalar_type (stype
);
1961 stmt_vec_info def_stmt_vinfo
;
1963 if (vecstype
== NULL_TREE
)
1965 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1966 def_stmt
= gimple_build_assign (def2
, NEGATE_EXPR
, def
);
1970 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1971 gcc_assert (!new_bb
);
1975 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, vinfo
);
1976 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1977 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
1978 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1981 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1982 tree mask
= build_int_cst (stype
, GET_MODE_PRECISION (smode
) - 1);
1983 def_stmt
= gimple_build_assign (def2
, BIT_AND_EXPR
,
1984 gimple_assign_lhs (def_stmt
), mask
);
1988 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1989 gcc_assert (!new_bb
);
1993 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, vinfo
);
1994 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1995 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
1996 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2000 var1
= vect_recog_temp_ssa_var (type
, NULL
);
2001 def_stmt
= gimple_build_assign (var1
, rhs_code
== LROTATE_EXPR
2002 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
2004 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2006 var2
= vect_recog_temp_ssa_var (type
, NULL
);
2007 def_stmt
= gimple_build_assign (var2
, rhs_code
== LROTATE_EXPR
2008 ? RSHIFT_EXPR
: LSHIFT_EXPR
,
2010 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2012 /* Pattern detected. */
2013 if (dump_enabled_p ())
2014 dump_printf_loc (MSG_NOTE
, vect_location
,
2015 "vect_recog_rotate_pattern: detected:\n");
2017 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2018 var
= vect_recog_temp_ssa_var (type
, NULL
);
2019 pattern_stmt
= gimple_build_assign (var
, BIT_IOR_EXPR
, var1
, var2
);
2021 if (dump_enabled_p ())
2022 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
2024 stmts
->safe_push (last_stmt
);
2025 return pattern_stmt
;
2028 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2036 S3 res_T = b_T op a_t;
2038 where type 'TYPE' is a type with different size than 'type',
2039 and op is <<, >> or rotate.
2044 TYPE b_T, c_T, res_T;
2047 S1 a_t = (type) c_T;
2049 S3 res_T = b_T op a_t;
2053 * STMTS: Contains a stmt from which the pattern search begins,
2054 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2055 with a shift/rotate which has same type on both operands, in the
2056 second case just b_T op c_T, in the first case with added cast
2057 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2061 * TYPE_IN: The type of the input arguments to the pattern.
2063 * TYPE_OUT: The type of the output of this pattern.
2065 * Return value: A new stmt that will be used to replace the shift/rotate
2069 vect_recog_vector_vector_shift_pattern (vec
<gimple
*> *stmts
,
2070 tree
*type_in
, tree
*type_out
)
2072 gimple
*last_stmt
= stmts
->pop ();
2073 tree oprnd0
, oprnd1
, lhs
, var
;
2074 gimple
*pattern_stmt
;
2075 enum tree_code rhs_code
;
2076 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2077 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
2079 if (!is_gimple_assign (last_stmt
))
2082 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2094 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2097 lhs
= gimple_assign_lhs (last_stmt
);
2098 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2099 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2100 if (TREE_CODE (oprnd0
) != SSA_NAME
2101 || TREE_CODE (oprnd1
) != SSA_NAME
2102 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
2103 || !type_has_mode_precision_p (TREE_TYPE (oprnd1
))
2104 || TYPE_PRECISION (TREE_TYPE (lhs
))
2105 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2108 stmt_vec_info def_vinfo
= vect_get_internal_def (vinfo
, oprnd1
);
2112 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (oprnd0
));
2113 *type_out
= *type_in
;
2114 if (*type_in
== NULL_TREE
)
2117 tree def
= NULL_TREE
;
2118 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_vinfo
->stmt
);
2119 if (!STMT_VINFO_IN_PATTERN_P (def_vinfo
)
2121 && gimple_assign_cast_p (def_stmt
))
2123 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
2124 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
2125 && TYPE_PRECISION (TREE_TYPE (rhs1
))
2126 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2128 if (TYPE_PRECISION (TREE_TYPE (oprnd1
))
2129 >= TYPE_PRECISION (TREE_TYPE (rhs1
)))
2134 = build_low_bits_mask (TREE_TYPE (rhs1
),
2135 TYPE_PRECISION (TREE_TYPE (oprnd1
)));
2136 def
= vect_recog_temp_ssa_var (TREE_TYPE (rhs1
), NULL
);
2137 def_stmt
= gimple_build_assign (def
, BIT_AND_EXPR
, rhs1
, mask
);
2138 stmt_vec_info new_stmt_info
2139 = new_stmt_vec_info (def_stmt
, vinfo
);
2140 set_vinfo_for_stmt (def_stmt
, new_stmt_info
);
2141 STMT_VINFO_VECTYPE (new_stmt_info
)
2142 = get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
2143 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2148 if (def
== NULL_TREE
)
2150 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2151 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
2152 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2155 /* Pattern detected. */
2156 if (dump_enabled_p ())
2157 dump_printf_loc (MSG_NOTE
, vect_location
,
2158 "vect_recog_vector_vector_shift_pattern: detected:\n");
2160 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2161 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2162 pattern_stmt
= gimple_build_assign (var
, rhs_code
, oprnd0
, def
);
2164 if (dump_enabled_p ())
2165 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
2167 stmts
->safe_push (last_stmt
);
2168 return pattern_stmt
;
2171 /* Return true iff the target has a vector optab implementing the operation
2172 CODE on type VECTYPE. */
2175 target_has_vecop_for_code (tree_code code
, tree vectype
)
2177 optab voptab
= optab_for_tree_code (code
, vectype
, optab_vector
);
2179 && optab_handler (voptab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
;
2182 /* Verify that the target has optabs of VECTYPE to perform all the steps
2183 needed by the multiplication-by-immediate synthesis algorithm described by
2184 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2185 present. Return true iff the target supports all the steps. */
2188 target_supports_mult_synth_alg (struct algorithm
*alg
, mult_variant var
,
2189 tree vectype
, bool synth_shift_p
)
2191 if (alg
->op
[0] != alg_zero
&& alg
->op
[0] != alg_m
)
2194 bool supports_vminus
= target_has_vecop_for_code (MINUS_EXPR
, vectype
);
2195 bool supports_vplus
= target_has_vecop_for_code (PLUS_EXPR
, vectype
);
2197 if (var
== negate_variant
2198 && !target_has_vecop_for_code (NEGATE_EXPR
, vectype
))
2201 /* If we must synthesize shifts with additions make sure that vector
2202 addition is available. */
2203 if ((var
== add_variant
|| synth_shift_p
) && !supports_vplus
)
2206 for (int i
= 1; i
< alg
->ops
; i
++)
2214 case alg_add_factor
:
2215 if (!supports_vplus
)
2220 case alg_sub_factor
:
2221 if (!supports_vminus
)
2227 case alg_impossible
:
2237 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2238 putting the final result in DEST. Append all statements but the last into
2239 VINFO. Return the last statement. */
2242 synth_lshift_by_additions (tree dest
, tree op
, HOST_WIDE_INT amnt
,
2243 stmt_vec_info vinfo
)
2246 tree itype
= TREE_TYPE (op
);
2248 gcc_assert (amnt
>= 0);
2249 for (i
= 0; i
< amnt
; i
++)
2251 tree tmp_var
= (i
< amnt
- 1) ? vect_recog_temp_ssa_var (itype
, NULL
)
2254 = gimple_build_assign (tmp_var
, PLUS_EXPR
, prev_res
, prev_res
);
2257 append_pattern_def_seq (vinfo
, stmt
);
2265 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2266 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2267 the process if necessary. Append the resulting assignment statements
2268 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2269 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2270 left shifts using additions. */
2273 apply_binop_and_append_stmt (tree_code code
, tree op1
, tree op2
,
2274 stmt_vec_info stmt_vinfo
, bool synth_shift_p
)
2276 if (integer_zerop (op2
)
2277 && (code
== LSHIFT_EXPR
2278 || code
== PLUS_EXPR
))
2280 gcc_assert (TREE_CODE (op1
) == SSA_NAME
);
2285 tree itype
= TREE_TYPE (op1
);
2286 tree tmp_var
= vect_recog_temp_ssa_var (itype
, NULL
);
2288 if (code
== LSHIFT_EXPR
2291 stmt
= synth_lshift_by_additions (tmp_var
, op1
, TREE_INT_CST_LOW (op2
),
2293 append_pattern_def_seq (stmt_vinfo
, stmt
);
2297 stmt
= gimple_build_assign (tmp_var
, code
, op1
, op2
);
2298 append_pattern_def_seq (stmt_vinfo
, stmt
);
2302 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2303 and simple arithmetic operations to be vectorized. Record the statements
2304 produced in STMT_VINFO and return the last statement in the sequence or
2305 NULL if it's not possible to synthesize such a multiplication.
2306 This function mirrors the behavior of expand_mult_const in expmed.c but
2307 works on tree-ssa form. */
2310 vect_synth_mult_by_constant (tree op
, tree val
,
2311 stmt_vec_info stmt_vinfo
)
2313 tree itype
= TREE_TYPE (op
);
2314 machine_mode mode
= TYPE_MODE (itype
);
2315 struct algorithm alg
;
2316 mult_variant variant
;
2317 if (!tree_fits_shwi_p (val
))
2320 /* Multiplication synthesis by shifts, adds and subs can introduce
2321 signed overflow where the original operation didn't. Perform the
2322 operations on an unsigned type and cast back to avoid this.
2323 In the future we may want to relax this for synthesis algorithms
2324 that we can prove do not cause unexpected overflow. */
2325 bool cast_to_unsigned_p
= !TYPE_OVERFLOW_WRAPS (itype
);
2327 tree multtype
= cast_to_unsigned_p
? unsigned_type_for (itype
) : itype
;
2329 /* Targets that don't support vector shifts but support vector additions
2330 can synthesize shifts that way. */
2331 bool synth_shift_p
= !vect_supportable_shift (LSHIFT_EXPR
, multtype
);
2333 HOST_WIDE_INT hwval
= tree_to_shwi (val
);
2334 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2335 The vectorizer's benefit analysis will decide whether it's beneficial
2337 bool possible
= choose_mult_variant (mode
, hwval
, &alg
,
2338 &variant
, MAX_COST
);
2342 tree vectype
= get_vectype_for_scalar_type (multtype
);
2345 || !target_supports_mult_synth_alg (&alg
, variant
,
2346 vectype
, synth_shift_p
))
2351 /* Clear out the sequence of statements so we can populate it below. */
2352 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2353 gimple
*stmt
= NULL
;
2355 if (cast_to_unsigned_p
)
2357 tree tmp_op
= vect_recog_temp_ssa_var (multtype
, NULL
);
2358 stmt
= gimple_build_assign (tmp_op
, CONVERT_EXPR
, op
);
2359 append_pattern_def_seq (stmt_vinfo
, stmt
);
2363 if (alg
.op
[0] == alg_zero
)
2364 accumulator
= build_int_cst (multtype
, 0);
2368 bool needs_fixup
= (variant
== negate_variant
)
2369 || (variant
== add_variant
);
2371 for (int i
= 1; i
< alg
.ops
; i
++)
2373 tree shft_log
= build_int_cst (multtype
, alg
.log
[i
]);
2374 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
2375 tree tmp_var
= NULL_TREE
;
2382 = synth_lshift_by_additions (accum_tmp
, accumulator
, alg
.log
[i
],
2385 stmt
= gimple_build_assign (accum_tmp
, LSHIFT_EXPR
, accumulator
,
2390 = apply_binop_and_append_stmt (LSHIFT_EXPR
, op
, shft_log
,
2391 stmt_vinfo
, synth_shift_p
);
2392 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
,
2396 tmp_var
= apply_binop_and_append_stmt (LSHIFT_EXPR
, op
,
2397 shft_log
, stmt_vinfo
,
2399 /* In some algorithms the first step involves zeroing the
2400 accumulator. If subtracting from such an accumulator
2401 just emit the negation directly. */
2402 if (integer_zerop (accumulator
))
2403 stmt
= gimple_build_assign (accum_tmp
, NEGATE_EXPR
, tmp_var
);
2405 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, accumulator
,
2410 = apply_binop_and_append_stmt (LSHIFT_EXPR
, accumulator
, shft_log
,
2411 stmt_vinfo
, synth_shift_p
);
2412 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, tmp_var
, op
);
2416 = apply_binop_and_append_stmt (LSHIFT_EXPR
, accumulator
, shft_log
,
2417 stmt_vinfo
, synth_shift_p
);
2418 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, tmp_var
, op
);
2420 case alg_add_factor
:
2422 = apply_binop_and_append_stmt (LSHIFT_EXPR
, accumulator
, shft_log
,
2423 stmt_vinfo
, synth_shift_p
);
2424 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
,
2427 case alg_sub_factor
:
2429 = apply_binop_and_append_stmt (LSHIFT_EXPR
, accumulator
, shft_log
,
2430 stmt_vinfo
, synth_shift_p
);
2431 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, tmp_var
,
2437 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2438 but rather return it directly. */
2440 if ((i
< alg
.ops
- 1) || needs_fixup
|| cast_to_unsigned_p
)
2441 append_pattern_def_seq (stmt_vinfo
, stmt
);
2442 accumulator
= accum_tmp
;
2444 if (variant
== negate_variant
)
2446 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
2447 stmt
= gimple_build_assign (accum_tmp
, NEGATE_EXPR
, accumulator
);
2448 accumulator
= accum_tmp
;
2449 if (cast_to_unsigned_p
)
2450 append_pattern_def_seq (stmt_vinfo
, stmt
);
2452 else if (variant
== add_variant
)
2454 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
2455 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
, op
);
2456 accumulator
= accum_tmp
;
2457 if (cast_to_unsigned_p
)
2458 append_pattern_def_seq (stmt_vinfo
, stmt
);
2460 /* Move back to a signed if needed. */
2461 if (cast_to_unsigned_p
)
2463 tree accum_tmp
= vect_recog_temp_ssa_var (itype
, NULL
);
2464 stmt
= gimple_build_assign (accum_tmp
, CONVERT_EXPR
, accumulator
);
2470 /* Detect multiplication by constant and convert it into a sequence of
2471 shifts and additions, subtractions, negations. We reuse the
2472 choose_mult_variant algorithms from expmed.c
2476 STMTS: Contains a stmt from which the pattern search begins,
2481 * TYPE_IN: The type of the input arguments to the pattern.
2483 * TYPE_OUT: The type of the output of this pattern.
2485 * Return value: A new stmt that will be used to replace
2486 the multiplication. */
2489 vect_recog_mult_pattern (vec
<gimple
*> *stmts
,
2490 tree
*type_in
, tree
*type_out
)
2492 gimple
*last_stmt
= stmts
->pop ();
2493 tree oprnd0
, oprnd1
, vectype
, itype
;
2494 gimple
*pattern_stmt
;
2495 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2497 if (!is_gimple_assign (last_stmt
))
2500 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
2503 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2504 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2505 itype
= TREE_TYPE (oprnd0
);
2507 if (TREE_CODE (oprnd0
) != SSA_NAME
2508 || TREE_CODE (oprnd1
) != INTEGER_CST
2509 || !INTEGRAL_TYPE_P (itype
)
2510 || !type_has_mode_precision_p (itype
))
2513 vectype
= get_vectype_for_scalar_type (itype
);
2514 if (vectype
== NULL_TREE
)
2517 /* If the target can handle vectorized multiplication natively,
2518 don't attempt to optimize this. */
2519 optab mul_optab
= optab_for_tree_code (MULT_EXPR
, vectype
, optab_default
);
2520 if (mul_optab
!= unknown_optab
)
2522 machine_mode vec_mode
= TYPE_MODE (vectype
);
2523 int icode
= (int) optab_handler (mul_optab
, vec_mode
);
2524 if (icode
!= CODE_FOR_nothing
)
2528 pattern_stmt
= vect_synth_mult_by_constant (oprnd0
, oprnd1
, stmt_vinfo
);
2532 /* Pattern detected. */
2533 if (dump_enabled_p ())
2534 dump_printf_loc (MSG_NOTE
, vect_location
,
2535 "vect_recog_mult_pattern: detected:\n");
2537 if (dump_enabled_p ())
2538 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
,
2541 stmts
->safe_push (last_stmt
);
2543 *type_out
= vectype
;
2545 return pattern_stmt
;
2548 /* Detect a signed division by a constant that wouldn't be
2549 otherwise vectorized:
2555 where type 'type' is an integral type and N is a constant.
2557 Similarly handle modulo by a constant:
2563 * STMTS: Contains a stmt from which the pattern search begins,
2564 i.e. the division stmt. S1 is replaced by if N is a power
2565 of two constant and type is signed:
2566 S3 y_t = b_t < 0 ? N - 1 : 0;
2568 S1' a_t = x_t >> log2 (N);
2570 S4 is replaced if N is a power of two constant and
2571 type is signed by (where *_T temporaries have unsigned type):
2572 S9 y_T = b_t < 0 ? -1U : 0U;
2573 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2574 S7 z_t = (type) z_T;
2576 S5 x_t = w_t & (N - 1);
2577 S4' a_t = x_t - z_t;
2581 * TYPE_IN: The type of the input arguments to the pattern.
2583 * TYPE_OUT: The type of the output of this pattern.
2585 * Return value: A new stmt that will be used to replace the division
2586 S1 or modulo S4 stmt. */
2589 vect_recog_divmod_pattern (vec
<gimple
*> *stmts
,
2590 tree
*type_in
, tree
*type_out
)
2592 gimple
*last_stmt
= stmts
->pop ();
2593 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
2594 gimple
*pattern_stmt
, *def_stmt
;
2595 enum tree_code rhs_code
;
2596 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2597 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
2600 int dummy_int
, prec
;
2601 stmt_vec_info def_stmt_vinfo
;
2603 if (!is_gimple_assign (last_stmt
))
2606 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2609 case TRUNC_DIV_EXPR
:
2610 case TRUNC_MOD_EXPR
:
2616 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2619 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2620 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2621 itype
= TREE_TYPE (oprnd0
);
2622 if (TREE_CODE (oprnd0
) != SSA_NAME
2623 || TREE_CODE (oprnd1
) != INTEGER_CST
2624 || TREE_CODE (itype
) != INTEGER_TYPE
2625 || !type_has_mode_precision_p (itype
))
2628 scalar_int_mode itype_mode
= SCALAR_INT_TYPE_MODE (itype
);
2629 vectype
= get_vectype_for_scalar_type (itype
);
2630 if (vectype
== NULL_TREE
)
2633 if (optimize_bb_for_size_p (gimple_bb (last_stmt
)))
2635 /* If the target can handle vectorized division or modulo natively,
2636 don't attempt to optimize this, since native division is likely
2637 to give smaller code. */
2638 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
2639 if (optab
!= unknown_optab
)
2641 machine_mode vec_mode
= TYPE_MODE (vectype
);
2642 int icode
= (int) optab_handler (optab
, vec_mode
);
2643 if (icode
!= CODE_FOR_nothing
)
2648 prec
= TYPE_PRECISION (itype
);
2649 if (integer_pow2p (oprnd1
))
2651 if (TYPE_UNSIGNED (itype
) || tree_int_cst_sgn (oprnd1
) != 1)
2654 /* Pattern detected. */
2655 if (dump_enabled_p ())
2656 dump_printf_loc (MSG_NOTE
, vect_location
,
2657 "vect_recog_divmod_pattern: detected:\n");
2659 cond
= build2 (LT_EXPR
, boolean_type_node
, oprnd0
,
2660 build_int_cst (itype
, 0));
2661 if (rhs_code
== TRUNC_DIV_EXPR
)
2663 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
2666 = gimple_build_assign (var
, COND_EXPR
, cond
,
2667 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2668 build_int_cst (itype
, 1)),
2669 build_int_cst (itype
, 0));
2670 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2671 var
= vect_recog_temp_ssa_var (itype
, NULL
);
2673 = gimple_build_assign (var
, PLUS_EXPR
, oprnd0
,
2674 gimple_assign_lhs (def_stmt
));
2675 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2677 shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
2679 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2680 RSHIFT_EXPR
, var
, shift
);
2685 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2686 if (compare_tree_int (oprnd1
, 2) == 0)
2688 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2689 def_stmt
= gimple_build_assign (signmask
, COND_EXPR
, cond
,
2690 build_int_cst (itype
, 1),
2691 build_int_cst (itype
, 0));
2692 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2697 = build_nonstandard_integer_type (prec
, 1);
2698 tree vecutype
= get_vectype_for_scalar_type (utype
);
2700 = build_int_cst (utype
, GET_MODE_BITSIZE (itype_mode
)
2701 - tree_log2 (oprnd1
));
2702 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
2704 def_stmt
= gimple_build_assign (var
, COND_EXPR
, cond
,
2705 build_int_cst (utype
, -1),
2706 build_int_cst (utype
, 0));
2707 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, vinfo
);
2708 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2709 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2710 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2711 var
= vect_recog_temp_ssa_var (utype
, NULL
);
2712 def_stmt
= gimple_build_assign (var
, RSHIFT_EXPR
,
2713 gimple_assign_lhs (def_stmt
),
2715 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, vinfo
);
2716 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2717 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2718 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2719 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2721 = gimple_build_assign (signmask
, NOP_EXPR
, var
);
2722 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2725 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2726 PLUS_EXPR
, oprnd0
, signmask
);
2727 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2729 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2730 BIT_AND_EXPR
, gimple_assign_lhs (def_stmt
),
2731 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2732 build_int_cst (itype
, 1)));
2733 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2736 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2737 MINUS_EXPR
, gimple_assign_lhs (def_stmt
),
2741 if (dump_enabled_p ())
2742 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
,
2745 stmts
->safe_push (last_stmt
);
2748 *type_out
= vectype
;
2749 return pattern_stmt
;
2752 if (prec
> HOST_BITS_PER_WIDE_INT
2753 || integer_zerop (oprnd1
))
2756 if (!can_mult_highpart_p (TYPE_MODE (vectype
), TYPE_UNSIGNED (itype
)))
2759 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2761 if (TYPE_UNSIGNED (itype
))
2763 unsigned HOST_WIDE_INT mh
, ml
;
2764 int pre_shift
, post_shift
;
2765 unsigned HOST_WIDE_INT d
= (TREE_INT_CST_LOW (oprnd1
)
2766 & GET_MODE_MASK (itype_mode
));
2767 tree t1
, t2
, t3
, t4
;
2769 if (d
>= (HOST_WIDE_INT_1U
<< (prec
- 1)))
2770 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2773 /* Find a suitable multiplier and right shift count
2774 instead of multiplying with D. */
2775 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
2777 /* If the suggested multiplier is more than SIZE bits, we can do better
2778 for even divisors, using an initial right shift. */
2779 if (mh
!= 0 && (d
& 1) == 0)
2781 pre_shift
= ctz_or_zero (d
);
2782 mh
= choose_multiplier (d
>> pre_shift
, prec
, prec
- pre_shift
,
2783 &ml
, &post_shift
, &dummy_int
);
2791 if (post_shift
- 1 >= prec
)
2794 /* t1 = oprnd0 h* ml;
2798 q = t4 >> (post_shift - 1); */
2799 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2800 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2801 build_int_cst (itype
, ml
));
2802 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2804 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2806 = gimple_build_assign (t2
, MINUS_EXPR
, oprnd0
, t1
);
2807 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2809 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2811 = gimple_build_assign (t3
, RSHIFT_EXPR
, t2
, integer_one_node
);
2812 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2814 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2816 = gimple_build_assign (t4
, PLUS_EXPR
, t1
, t3
);
2818 if (post_shift
!= 1)
2820 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2822 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2824 = gimple_build_assign (q
, RSHIFT_EXPR
, t4
,
2825 build_int_cst (itype
, post_shift
- 1));
2830 pattern_stmt
= def_stmt
;
2835 if (pre_shift
>= prec
|| post_shift
>= prec
)
2838 /* t1 = oprnd0 >> pre_shift;
2840 q = t2 >> post_shift; */
2843 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2845 = gimple_build_assign (t1
, RSHIFT_EXPR
, oprnd0
,
2846 build_int_cst (NULL
, pre_shift
));
2847 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2852 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2853 def_stmt
= gimple_build_assign (t2
, MULT_HIGHPART_EXPR
, t1
,
2854 build_int_cst (itype
, ml
));
2858 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2860 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2862 = gimple_build_assign (q
, RSHIFT_EXPR
, t2
,
2863 build_int_cst (itype
, post_shift
));
2868 pattern_stmt
= def_stmt
;
2873 unsigned HOST_WIDE_INT ml
;
2875 HOST_WIDE_INT d
= TREE_INT_CST_LOW (oprnd1
);
2876 unsigned HOST_WIDE_INT abs_d
;
2878 tree t1
, t2
, t3
, t4
;
2880 /* Give up for -1. */
2884 /* Since d might be INT_MIN, we have to cast to
2885 unsigned HOST_WIDE_INT before negating to avoid
2886 undefined signed overflow. */
2888 ? (unsigned HOST_WIDE_INT
) d
2889 : - (unsigned HOST_WIDE_INT
) d
);
2891 /* n rem d = n rem -d */
2892 if (rhs_code
== TRUNC_MOD_EXPR
&& d
< 0)
2895 oprnd1
= build_int_cst (itype
, abs_d
);
2897 else if (HOST_BITS_PER_WIDE_INT
>= prec
2898 && abs_d
== HOST_WIDE_INT_1U
<< (prec
- 1))
2899 /* This case is not handled correctly below. */
2902 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
, &post_shift
, &dummy_int
);
2903 if (ml
>= HOST_WIDE_INT_1U
<< (prec
- 1))
2906 ml
|= HOST_WIDE_INT_M1U
<< (prec
- 1);
2908 if (post_shift
>= prec
)
2911 /* t1 = oprnd0 h* ml; */
2912 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2913 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2914 build_int_cst (itype
, ml
));
2918 /* t2 = t1 + oprnd0; */
2919 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2920 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2921 def_stmt
= gimple_build_assign (t2
, PLUS_EXPR
, t1
, oprnd0
);
2928 /* t3 = t2 >> post_shift; */
2929 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2930 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2931 def_stmt
= gimple_build_assign (t3
, RSHIFT_EXPR
, t2
,
2932 build_int_cst (itype
, post_shift
));
2937 wide_int oprnd0_min
, oprnd0_max
;
2939 if (get_range_info (oprnd0
, &oprnd0_min
, &oprnd0_max
) == VR_RANGE
)
2941 if (!wi::neg_p (oprnd0_min
, TYPE_SIGN (itype
)))
2943 else if (wi::neg_p (oprnd0_max
, TYPE_SIGN (itype
)))
2947 if (msb
== 0 && d
>= 0)
2951 pattern_stmt
= def_stmt
;
2955 /* t4 = oprnd0 >> (prec - 1);
2956 or if we know from VRP that oprnd0 >= 0
2958 or if we know from VRP that oprnd0 < 0
2960 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2961 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2963 def_stmt
= gimple_build_assign (t4
, INTEGER_CST
,
2964 build_int_cst (itype
, msb
));
2966 def_stmt
= gimple_build_assign (t4
, RSHIFT_EXPR
, oprnd0
,
2967 build_int_cst (itype
, prec
- 1));
2968 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2970 /* q = t3 - t4; or q = t4 - t3; */
2971 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2972 pattern_stmt
= gimple_build_assign (q
, MINUS_EXPR
, d
< 0 ? t4
: t3
,
2977 if (rhs_code
== TRUNC_MOD_EXPR
)
2981 /* We divided. Now finish by:
2984 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2986 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2987 def_stmt
= gimple_build_assign (t1
, MULT_EXPR
, q
, oprnd1
);
2988 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2990 r
= vect_recog_temp_ssa_var (itype
, NULL
);
2991 pattern_stmt
= gimple_build_assign (r
, MINUS_EXPR
, oprnd0
, t1
);
2994 /* Pattern detected. */
2995 if (dump_enabled_p ())
2997 dump_printf_loc (MSG_NOTE
, vect_location
,
2998 "vect_recog_divmod_pattern: detected: ");
2999 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3002 stmts
->safe_push (last_stmt
);
3005 *type_out
= vectype
;
3006 return pattern_stmt
;
3009 /* Function vect_recog_mixed_size_cond_pattern
3011 Try to find the following pattern:
3016 S1 a_T = x_t CMP y_t ? b_T : c_T;
3018 where type 'TYPE' is an integral type which has different size
3019 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3020 than 'type', the constants need to fit into an integer type
3021 with the same width as 'type') or results of conversion from 'type'.
3025 * LAST_STMT: A stmt from which the pattern search begins.
3029 * TYPE_IN: The type of the input arguments to the pattern.
3031 * TYPE_OUT: The type of the output of this pattern.
3033 * Return value: A new stmt that will be used to replace the pattern.
3034 Additionally a def_stmt is added.
3036 a_it = x_t CMP y_t ? b_it : c_it;
3037 a_T = (TYPE) a_it; */
3040 vect_recog_mixed_size_cond_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
3043 gimple
*last_stmt
= (*stmts
)[0];
3044 tree cond_expr
, then_clause
, else_clause
;
3045 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
), def_stmt_info
;
3046 tree type
, vectype
, comp_vectype
, itype
= NULL_TREE
, vecitype
;
3047 gimple
*pattern_stmt
, *def_stmt
;
3048 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
3049 tree orig_type0
= NULL_TREE
, orig_type1
= NULL_TREE
;
3050 gimple
*def_stmt0
= NULL
, *def_stmt1
= NULL
;
3052 tree comp_scalar_type
;
3054 if (!is_gimple_assign (last_stmt
)
3055 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
3056 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
3059 cond_expr
= gimple_assign_rhs1 (last_stmt
);
3060 then_clause
= gimple_assign_rhs2 (last_stmt
);
3061 else_clause
= gimple_assign_rhs3 (last_stmt
);
3063 if (!COMPARISON_CLASS_P (cond_expr
))
3066 comp_scalar_type
= TREE_TYPE (TREE_OPERAND (cond_expr
, 0));
3067 comp_vectype
= get_vectype_for_scalar_type (comp_scalar_type
);
3068 if (comp_vectype
== NULL_TREE
)
3071 type
= gimple_expr_type (last_stmt
);
3072 if (types_compatible_p (type
, comp_scalar_type
)
3073 || ((TREE_CODE (then_clause
) != INTEGER_CST
3074 || TREE_CODE (else_clause
) != INTEGER_CST
)
3075 && !INTEGRAL_TYPE_P (comp_scalar_type
))
3076 || !INTEGRAL_TYPE_P (type
))
3079 if ((TREE_CODE (then_clause
) != INTEGER_CST
3080 && !type_conversion_p (then_clause
, last_stmt
, false, &orig_type0
,
3081 &def_stmt0
, &promotion
))
3082 || (TREE_CODE (else_clause
) != INTEGER_CST
3083 && !type_conversion_p (else_clause
, last_stmt
, false, &orig_type1
,
3084 &def_stmt1
, &promotion
)))
3087 if (orig_type0
&& orig_type1
3088 && !types_compatible_p (orig_type0
, orig_type1
))
3093 if (!types_compatible_p (orig_type0
, comp_scalar_type
))
3095 then_clause
= gimple_assign_rhs1 (def_stmt0
);
3101 if (!types_compatible_p (orig_type1
, comp_scalar_type
))
3103 else_clause
= gimple_assign_rhs1 (def_stmt1
);
3108 HOST_WIDE_INT cmp_mode_size
3109 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype
));
3111 scalar_int_mode type_mode
= SCALAR_INT_TYPE_MODE (type
);
3112 if (GET_MODE_BITSIZE (type_mode
) == cmp_mode_size
)
3115 vectype
= get_vectype_for_scalar_type (type
);
3116 if (vectype
== NULL_TREE
)
3119 if (expand_vec_cond_expr_p (vectype
, comp_vectype
, TREE_CODE (cond_expr
)))
3122 if (itype
== NULL_TREE
)
3123 itype
= build_nonstandard_integer_type (cmp_mode_size
,
3124 TYPE_UNSIGNED (type
));
3126 if (itype
== NULL_TREE
3127 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype
)) != cmp_mode_size
)
3130 vecitype
= get_vectype_for_scalar_type (itype
);
3131 if (vecitype
== NULL_TREE
)
3134 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
, TREE_CODE (cond_expr
)))
3137 if (GET_MODE_BITSIZE (type_mode
) > cmp_mode_size
)
3139 if ((TREE_CODE (then_clause
) == INTEGER_CST
3140 && !int_fits_type_p (then_clause
, itype
))
3141 || (TREE_CODE (else_clause
) == INTEGER_CST
3142 && !int_fits_type_p (else_clause
, itype
)))
3146 def_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3147 COND_EXPR
, unshare_expr (cond_expr
),
3148 fold_convert (itype
, then_clause
),
3149 fold_convert (itype
, else_clause
));
3150 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
3151 NOP_EXPR
, gimple_assign_lhs (def_stmt
));
3153 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
3154 def_stmt_info
= new_stmt_vec_info (def_stmt
, vinfo
);
3155 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
3156 STMT_VINFO_VECTYPE (def_stmt_info
) = vecitype
;
3157 *type_in
= vecitype
;
3158 *type_out
= vectype
;
3160 if (dump_enabled_p ())
3161 dump_printf_loc (MSG_NOTE
, vect_location
,
3162 "vect_recog_mixed_size_cond_pattern: detected:\n");
3164 return pattern_stmt
;
3168 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3169 true if bool VAR can and should be optimized that way. Assume it shouldn't
3170 in case it's a result of a comparison which can be directly vectorized into
3171 a vector comparison. Fills in STMTS with all stmts visited during the
3175 check_bool_pattern (tree var
, vec_info
*vinfo
, hash_set
<gimple
*> &stmts
)
3178 enum tree_code rhs_code
;
3180 stmt_vec_info def_stmt_info
= vect_get_internal_def (vinfo
, var
);
3184 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_stmt_info
->stmt
);
3188 if (stmts
.contains (def_stmt
))
3191 rhs1
= gimple_assign_rhs1 (def_stmt
);
3192 rhs_code
= gimple_assign_rhs_code (def_stmt
);
3196 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
3201 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
3203 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
3208 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
3215 if (! check_bool_pattern (rhs1
, vinfo
, stmts
)
3216 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt
), vinfo
, stmts
))
3221 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
3223 tree vecitype
, comp_vectype
;
3225 /* If the comparison can throw, then is_gimple_condexpr will be
3226 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3227 if (stmt_could_throw_p (def_stmt
))
3230 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
3231 if (comp_vectype
== NULL_TREE
)
3234 tree mask_type
= get_mask_type_for_scalar_type (TREE_TYPE (rhs1
));
3236 && expand_vec_cmp_expr_p (comp_vectype
, mask_type
, rhs_code
))
3239 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
3241 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
3243 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3244 vecitype
= get_vectype_for_scalar_type (itype
);
3245 if (vecitype
== NULL_TREE
)
3249 vecitype
= comp_vectype
;
3250 if (! expand_vec_cond_expr_p (vecitype
, comp_vectype
, rhs_code
))
3258 bool res
= stmts
.add (def_stmt
);
3259 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3266 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3267 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3268 pattern sequence. */
3271 adjust_bool_pattern_cast (tree type
, tree var
, stmt_vec_info stmt_info
)
3273 gimple
*cast_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
3275 stmt_vec_info patt_vinfo
= new_stmt_vec_info (cast_stmt
, stmt_info
->vinfo
);
3276 set_vinfo_for_stmt (cast_stmt
, patt_vinfo
);
3277 STMT_VINFO_VECTYPE (patt_vinfo
) = get_vectype_for_scalar_type (type
);
3278 append_pattern_def_seq (stmt_info
, cast_stmt
);
3279 return gimple_assign_lhs (cast_stmt
);
3282 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3283 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3284 type, OUT_TYPE is the desired final integer type of the whole pattern.
3285 STMT_INFO is the info of the pattern root and is where pattern stmts should
3286 be associated with. DEFS is a map of pattern defs. */
3289 adjust_bool_pattern (tree var
, tree out_type
,
3290 stmt_vec_info stmt_info
, hash_map
<tree
, tree
> &defs
)
3292 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
3293 enum tree_code rhs_code
, def_rhs_code
;
3294 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
3296 gimple
*pattern_stmt
, *def_stmt
;
3297 tree trueval
= NULL_TREE
;
3299 rhs1
= gimple_assign_rhs1 (stmt
);
3300 rhs2
= gimple_assign_rhs2 (stmt
);
3301 rhs_code
= gimple_assign_rhs_code (stmt
);
3302 loc
= gimple_location (stmt
);
3307 irhs1
= *defs
.get (rhs1
);
3308 itype
= TREE_TYPE (irhs1
);
3310 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3315 irhs1
= *defs
.get (rhs1
);
3316 itype
= TREE_TYPE (irhs1
);
3318 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3319 BIT_XOR_EXPR
, irhs1
, build_int_cst (itype
, 1));
3323 /* Try to optimize x = y & (a < b ? 1 : 0); into
3324 x = (a < b ? y : 0);
3330 S1 a_b = x1 CMP1 y1;
3331 S2 b_b = x2 CMP2 y2;
3333 S4 d_T = (TYPE) c_b;
3335 we would normally emit:
3337 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3338 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3339 S3' c_T = a_T & b_T;
3342 but we can save one stmt by using the
3343 result of one of the COND_EXPRs in the other COND_EXPR and leave
3344 BIT_AND_EXPR stmt out:
3346 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3347 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3350 At least when VEC_COND_EXPR is implemented using masks
3351 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3352 computes the comparison masks and ands it, in one case with
3353 all ones vector, in the other case with a vector register.
3354 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3355 often more expensive. */
3356 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
3357 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3358 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3360 irhs1
= *defs
.get (rhs1
);
3361 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3362 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3363 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1
))))
3365 rhs_code
= def_rhs_code
;
3367 rhs2
= gimple_assign_rhs2 (def_stmt
);
3372 irhs2
= *defs
.get (rhs2
);
3375 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
3376 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3377 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3379 irhs2
= *defs
.get (rhs2
);
3380 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3381 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
3382 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1
))))
3384 rhs_code
= def_rhs_code
;
3386 rhs2
= gimple_assign_rhs2 (def_stmt
);
3391 irhs1
= *defs
.get (rhs1
);
3397 irhs1
= *defs
.get (rhs1
);
3398 irhs2
= *defs
.get (rhs2
);
3400 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3401 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
3403 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
3404 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
3405 int out_prec
= TYPE_PRECISION (out_type
);
3406 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
3407 irhs2
= adjust_bool_pattern_cast (TREE_TYPE (irhs1
), irhs2
,
3409 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
3410 irhs1
= adjust_bool_pattern_cast (TREE_TYPE (irhs2
), irhs1
,
3414 irhs1
= adjust_bool_pattern_cast (out_type
, irhs1
, stmt_info
);
3415 irhs2
= adjust_bool_pattern_cast (out_type
, irhs2
, stmt_info
);
3418 itype
= TREE_TYPE (irhs1
);
3420 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3421 rhs_code
, irhs1
, irhs2
);
3426 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
3427 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
3428 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
))
3429 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1
)),
3430 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1
)))))
3432 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
3434 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3437 itype
= TREE_TYPE (rhs1
);
3438 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
3439 if (trueval
== NULL_TREE
)
3440 trueval
= build_int_cst (itype
, 1);
3442 gcc_checking_assert (useless_type_conversion_p (itype
,
3443 TREE_TYPE (trueval
)));
3445 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3446 COND_EXPR
, cond_expr
, trueval
,
3447 build_int_cst (itype
, 0));
3451 gimple_set_location (pattern_stmt
, loc
);
3452 /* ??? Why does vect_mark_pattern_stmts set the vector type on all
3453 pattern def seq stmts instead of just letting auto-detection do
3455 stmt_vec_info patt_vinfo
= new_stmt_vec_info (pattern_stmt
, stmt_info
->vinfo
);
3456 set_vinfo_for_stmt (pattern_stmt
, patt_vinfo
);
3457 STMT_VINFO_VECTYPE (patt_vinfo
) = get_vectype_for_scalar_type (itype
);
3458 append_pattern_def_seq (stmt_info
, pattern_stmt
);
3459 defs
.put (var
, gimple_assign_lhs (pattern_stmt
));
3462 /* Comparison function to qsort a vector of gimple stmts after UID. */
3465 sort_after_uid (const void *p1
, const void *p2
)
3467 const gimple
*stmt1
= *(const gimple
* const *)p1
;
3468 const gimple
*stmt2
= *(const gimple
* const *)p2
;
3469 return gimple_uid (stmt1
) - gimple_uid (stmt2
);
3472 /* Create pattern stmts for all stmts participating in the bool pattern
3473 specified by BOOL_STMT_SET and its root STMT with the desired type
3474 OUT_TYPE. Return the def of the pattern root. */
3477 adjust_bool_stmts (hash_set
<gimple
*> &bool_stmt_set
,
3478 tree out_type
, gimple
*stmt
)
3480 /* Gather original stmts in the bool pattern in their order of appearance
3482 auto_vec
<gimple
*> bool_stmts (bool_stmt_set
.elements ());
3483 for (hash_set
<gimple
*>::iterator i
= bool_stmt_set
.begin ();
3484 i
!= bool_stmt_set
.end (); ++i
)
3485 bool_stmts
.quick_push (*i
);
3486 bool_stmts
.qsort (sort_after_uid
);
3488 /* Now process them in that order, producing pattern stmts. */
3489 hash_map
<tree
, tree
> defs
;
3490 for (unsigned i
= 0; i
< bool_stmts
.length (); ++i
)
3491 adjust_bool_pattern (gimple_assign_lhs (bool_stmts
[i
]),
3492 out_type
, vinfo_for_stmt (stmt
), defs
);
3494 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3495 gimple
*pattern_stmt
3496 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (stmt
)));
3497 return gimple_assign_lhs (pattern_stmt
);
3500 /* Helper for search_type_for_mask. */
3503 search_type_for_mask_1 (tree var
, vec_info
*vinfo
,
3504 hash_map
<gimple
*, tree
> &cache
)
3507 enum tree_code rhs_code
;
3508 tree res
= NULL_TREE
, res2
;
3510 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var
)))
3513 stmt_vec_info def_stmt_info
= vect_get_internal_def (vinfo
, var
);
3517 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_stmt_info
->stmt
);
3521 tree
*c
= cache
.get (def_stmt
);
3525 rhs_code
= gimple_assign_rhs_code (def_stmt
);
3526 rhs1
= gimple_assign_rhs1 (def_stmt
);
3533 res
= search_type_for_mask_1 (rhs1
, vinfo
, cache
);
3539 res
= search_type_for_mask_1 (rhs1
, vinfo
, cache
);
3540 res2
= search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt
), vinfo
,
3542 if (!res
|| (res2
&& TYPE_PRECISION (res
) > TYPE_PRECISION (res2
)))
3547 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
3549 tree comp_vectype
, mask_type
;
3551 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
3553 res
= search_type_for_mask_1 (rhs1
, vinfo
, cache
);
3554 res2
= search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt
),
3556 if (!res
|| (res2
&& TYPE_PRECISION (res
) > TYPE_PRECISION (res2
)))
3561 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
3562 if (comp_vectype
== NULL_TREE
)
3568 mask_type
= get_mask_type_for_scalar_type (TREE_TYPE (rhs1
));
3570 || !expand_vec_cmp_expr_p (comp_vectype
, mask_type
, rhs_code
))
3576 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
3577 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
3579 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
3580 res
= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3583 res
= TREE_TYPE (rhs1
);
3587 cache
.put (def_stmt
, res
);
3591 /* Return the proper type for converting bool VAR into
3592 an integer value or NULL_TREE if no such type exists.
3593 The type is chosen so that converted value has the
3594 same number of elements as VAR's vector type. */
3597 search_type_for_mask (tree var
, vec_info
*vinfo
)
3599 hash_map
<gimple
*, tree
> cache
;
3600 return search_type_for_mask_1 (var
, vinfo
, cache
);
3603 /* Function vect_recog_bool_pattern
3605 Try to find pattern like following:
3607 bool a_b, b_b, c_b, d_b, e_b;
3610 S1 a_b = x1 CMP1 y1;
3611 S2 b_b = x2 CMP2 y2;
3613 S4 d_b = x3 CMP3 y3;
3615 S6 f_T = (TYPE) e_b;
3617 where type 'TYPE' is an integral type. Or a similar pattern
3620 S6 f_Y = e_b ? r_Y : s_Y;
3622 as results from if-conversion of a complex condition.
3626 * LAST_STMT: A stmt at the end from which the pattern
3627 search begins, i.e. cast of a bool to
3632 * TYPE_IN: The type of the input arguments to the pattern.
3634 * TYPE_OUT: The type of the output of this pattern.
3636 * Return value: A new stmt that will be used to replace the pattern.
3638 Assuming size of TYPE is the same as size of all comparisons
3639 (otherwise some casts would be added where needed), the above
3640 sequence we create related pattern stmts:
3641 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3642 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3643 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3644 S5' e_T = c_T | d_T;
3647 Instead of the above S3' we could emit:
3648 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3649 S3' c_T = a_T | b_T;
3650 but the above is more efficient. */
3653 vect_recog_bool_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
3656 gimple
*last_stmt
= stmts
->pop ();
3657 enum tree_code rhs_code
;
3658 tree var
, lhs
, rhs
, vectype
;
3659 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
3660 stmt_vec_info new_stmt_info
;
3661 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
3662 gimple
*pattern_stmt
;
3664 if (!is_gimple_assign (last_stmt
))
3667 var
= gimple_assign_rhs1 (last_stmt
);
3668 lhs
= gimple_assign_lhs (last_stmt
);
3670 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var
)))
3673 hash_set
<gimple
*> bool_stmts
;
3675 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3676 if (CONVERT_EXPR_CODE_P (rhs_code
))
3678 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3679 || TYPE_PRECISION (TREE_TYPE (lhs
)) == 1)
3681 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3682 if (vectype
== NULL_TREE
)
3685 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
3687 rhs
= adjust_bool_stmts (bool_stmts
, TREE_TYPE (lhs
), last_stmt
);
3688 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3689 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3690 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3693 = gimple_build_assign (lhs
, NOP_EXPR
, rhs
);
3697 tree type
= search_type_for_mask (var
, vinfo
);
3698 tree cst0
, cst1
, tmp
;
3703 /* We may directly use cond with narrowed type to avoid
3704 multiple cond exprs with following result packing and
3705 perform single cond with packed mask instead. In case
3706 of widening we better make cond first and then extract
3708 if (TYPE_MODE (type
) == TYPE_MODE (TREE_TYPE (lhs
)))
3709 type
= TREE_TYPE (lhs
);
3711 cst0
= build_int_cst (type
, 0);
3712 cst1
= build_int_cst (type
, 1);
3713 tmp
= vect_recog_temp_ssa_var (type
, NULL
);
3714 pattern_stmt
= gimple_build_assign (tmp
, COND_EXPR
, var
, cst1
, cst0
);
3716 if (!useless_type_conversion_p (type
, TREE_TYPE (lhs
)))
3718 tree new_vectype
= get_vectype_for_scalar_type (type
);
3719 new_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
3720 set_vinfo_for_stmt (pattern_stmt
, new_stmt_info
);
3721 STMT_VINFO_VECTYPE (new_stmt_info
) = new_vectype
;
3722 new_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
3724 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3725 pattern_stmt
= gimple_build_assign (lhs
, CONVERT_EXPR
, tmp
);
3729 *type_out
= vectype
;
3731 stmts
->safe_push (last_stmt
);
3732 if (dump_enabled_p ())
3733 dump_printf_loc (MSG_NOTE
, vect_location
,
3734 "vect_recog_bool_pattern: detected:\n");
3736 return pattern_stmt
;
3738 else if (rhs_code
== COND_EXPR
3739 && TREE_CODE (var
) == SSA_NAME
)
3741 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3742 if (vectype
== NULL_TREE
)
3745 /* Build a scalar type for the boolean result that when
3746 vectorized matches the vector type of the result in
3747 size and number of elements. */
3749 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype
)),
3750 TYPE_VECTOR_SUBPARTS (vectype
));
3753 = build_nonstandard_integer_type (prec
,
3754 TYPE_UNSIGNED (TREE_TYPE (var
)));
3755 if (get_vectype_for_scalar_type (type
) == NULL_TREE
)
3758 if (!check_bool_pattern (var
, vinfo
, bool_stmts
))
3761 rhs
= adjust_bool_stmts (bool_stmts
, type
, last_stmt
);
3763 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3765 = gimple_build_assign (lhs
, COND_EXPR
,
3766 build2 (NE_EXPR
, boolean_type_node
,
3767 rhs
, build_int_cst (type
, 0)),
3768 gimple_assign_rhs2 (last_stmt
),
3769 gimple_assign_rhs3 (last_stmt
));
3770 *type_out
= vectype
;
3772 stmts
->safe_push (last_stmt
);
3773 if (dump_enabled_p ())
3774 dump_printf_loc (MSG_NOTE
, vect_location
,
3775 "vect_recog_bool_pattern: detected:\n");
3777 return pattern_stmt
;
3779 else if (rhs_code
== SSA_NAME
3780 && STMT_VINFO_DATA_REF (stmt_vinfo
))
3782 stmt_vec_info pattern_stmt_info
;
3783 vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
3784 gcc_assert (vectype
!= NULL_TREE
);
3785 if (!VECTOR_MODE_P (TYPE_MODE (vectype
)))
3788 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
3789 rhs
= adjust_bool_stmts (bool_stmts
, TREE_TYPE (vectype
), last_stmt
);
3792 tree type
= search_type_for_mask (var
, vinfo
);
3793 tree cst0
, cst1
, new_vectype
;
3798 if (TYPE_MODE (type
) == TYPE_MODE (TREE_TYPE (vectype
)))
3799 type
= TREE_TYPE (vectype
);
3801 cst0
= build_int_cst (type
, 0);
3802 cst1
= build_int_cst (type
, 1);
3803 new_vectype
= get_vectype_for_scalar_type (type
);
3805 rhs
= vect_recog_temp_ssa_var (type
, NULL
);
3806 pattern_stmt
= gimple_build_assign (rhs
, COND_EXPR
, var
, cst1
, cst0
);
3808 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
3809 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3810 STMT_VINFO_VECTYPE (pattern_stmt_info
) = new_vectype
;
3811 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
3814 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
3815 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3817 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3818 gimple
*cast_stmt
= gimple_build_assign (rhs2
, NOP_EXPR
, rhs
);
3819 append_pattern_def_seq (stmt_vinfo
, cast_stmt
);
3822 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3823 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
3824 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3825 STMT_VINFO_DATA_REF (pattern_stmt_info
)
3826 = STMT_VINFO_DATA_REF (stmt_vinfo
);
3827 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info
)
3828 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo
);
3829 *type_out
= vectype
;
3831 stmts
->safe_push (last_stmt
);
3832 if (dump_enabled_p ())
3833 dump_printf_loc (MSG_NOTE
, vect_location
,
3834 "vect_recog_bool_pattern: detected:\n");
3835 return pattern_stmt
;
3842 /* A helper for vect_recog_mask_conversion_pattern. Build
3843 conversion of MASK to a type suitable for masking VECTYPE.
3844 Built statement gets required vectype and is appended to
3845 a pattern sequence of STMT_VINFO.
3847 Return converted mask. */
3850 build_mask_conversion (tree mask
, tree vectype
, stmt_vec_info stmt_vinfo
,
3855 stmt_vec_info new_stmt_info
;
3857 masktype
= build_same_sized_truth_vector_type (vectype
);
3858 tmp
= vect_recog_temp_ssa_var (TREE_TYPE (masktype
), NULL
);
3859 stmt
= gimple_build_assign (tmp
, CONVERT_EXPR
, mask
);
3860 new_stmt_info
= new_stmt_vec_info (stmt
, vinfo
);
3861 set_vinfo_for_stmt (stmt
, new_stmt_info
);
3862 STMT_VINFO_VECTYPE (new_stmt_info
) = masktype
;
3863 append_pattern_def_seq (stmt_vinfo
, stmt
);
3869 /* Function vect_recog_mask_conversion_pattern
3871 Try to find statements which require boolean type
3872 converison. Additional conversion statements are
3873 added to handle such cases. For example:
3883 S4 c_1 = m_3 ? c_2 : c_3;
3885 Will be transformed into:
3889 S3'' m_2' = (_Bool[bitsize=32])m_2
3890 S3' m_3' = m_1 & m_2';
3891 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3892 S4' c_1' = m_3'' ? c_2 : c_3; */
3895 vect_recog_mask_conversion_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
3898 gimple
*last_stmt
= stmts
->pop ();
3899 enum tree_code rhs_code
;
3900 tree lhs
= NULL_TREE
, rhs1
, rhs2
, tmp
, rhs1_type
, rhs2_type
;
3901 tree vectype1
, vectype2
;
3902 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
3903 stmt_vec_info pattern_stmt_info
;
3904 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
3906 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3907 if (is_gimple_call (last_stmt
)
3908 && gimple_call_internal_p (last_stmt
)
3909 && (gimple_call_internal_fn (last_stmt
) == IFN_MASK_STORE
3910 || gimple_call_internal_fn (last_stmt
) == IFN_MASK_LOAD
))
3912 gcall
*pattern_stmt
;
3913 bool load
= (gimple_call_internal_fn (last_stmt
) == IFN_MASK_LOAD
);
3917 lhs
= gimple_call_lhs (last_stmt
);
3918 vectype1
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3922 rhs2
= gimple_call_arg (last_stmt
, 3);
3923 vectype1
= get_vectype_for_scalar_type (TREE_TYPE (rhs2
));
3926 rhs1
= gimple_call_arg (last_stmt
, 2);
3927 rhs1_type
= search_type_for_mask (rhs1
, vinfo
);
3930 vectype2
= get_mask_type_for_scalar_type (rhs1_type
);
3932 if (!vectype1
|| !vectype2
3933 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1
),
3934 TYPE_VECTOR_SUBPARTS (vectype2
)))
3937 tmp
= build_mask_conversion (rhs1
, vectype1
, stmt_vinfo
, vinfo
);
3941 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3943 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
3944 gimple_call_arg (last_stmt
, 0),
3945 gimple_call_arg (last_stmt
, 1),
3947 gimple_call_set_lhs (pattern_stmt
, lhs
);
3951 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
3952 gimple_call_arg (last_stmt
, 0),
3953 gimple_call_arg (last_stmt
, 1),
3955 gimple_call_arg (last_stmt
, 3));
3957 gimple_call_set_nothrow (pattern_stmt
, true);
3959 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
3960 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3961 STMT_VINFO_DATA_REF (pattern_stmt_info
)
3962 = STMT_VINFO_DATA_REF (stmt_vinfo
);
3963 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info
)
3964 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo
);
3966 *type_out
= vectype1
;
3967 *type_in
= vectype1
;
3968 stmts
->safe_push (last_stmt
);
3969 if (dump_enabled_p ())
3970 dump_printf_loc (MSG_NOTE
, vect_location
,
3971 "vect_recog_mask_conversion_pattern: detected:\n");
3973 return pattern_stmt
;
3976 if (!is_gimple_assign (last_stmt
))
3979 gimple
*pattern_stmt
;
3980 lhs
= gimple_assign_lhs (last_stmt
);
3981 rhs1
= gimple_assign_rhs1 (last_stmt
);
3982 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3984 /* Check for cond expression requiring mask conversion. */
3985 if (rhs_code
== COND_EXPR
)
3987 /* vect_recog_mixed_size_cond_pattern could apply.
3989 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
3992 vectype1
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3994 if (TREE_CODE (rhs1
) == SSA_NAME
)
3996 rhs1_type
= search_type_for_mask (rhs1
, vinfo
);
4000 else if (COMPARISON_CLASS_P (rhs1
))
4002 /* Check whether we're comparing scalar booleans and (if so)
4003 whether a better mask type exists than the mask associated
4004 with boolean-sized elements. This avoids unnecessary packs
4005 and unpacks if the booleans are set from comparisons of
4006 wider types. E.g. in:
4008 int x1, x2, x3, x4, y1, y1;
4010 bool b1 = (x1 == x2);
4011 bool b2 = (x3 == x4);
4012 ... = b1 == b2 ? y1 : y2;
4014 it is better for b1 and b2 to use the mask type associated
4015 with int elements rather bool (byte) elements. */
4016 rhs1_type
= search_type_for_mask (TREE_OPERAND (rhs1
, 0), vinfo
);
4018 rhs1_type
= TREE_TYPE (TREE_OPERAND (rhs1
, 0));
4023 vectype2
= get_mask_type_for_scalar_type (rhs1_type
);
4025 if (!vectype1
|| !vectype2
)
4028 /* Continue if a conversion is needed. Also continue if we have
4029 a comparison whose vector type would normally be different from
4030 VECTYPE2 when considered in isolation. In that case we'll
4031 replace the comparison with an SSA name (so that we can record
4032 its vector type) and behave as though the comparison was an SSA
4033 name from the outset. */
4034 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1
),
4035 TYPE_VECTOR_SUBPARTS (vectype2
))
4036 && (TREE_CODE (rhs1
) == SSA_NAME
4037 || rhs1_type
== TREE_TYPE (TREE_OPERAND (rhs1
, 0))))
4040 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4041 in place, we can handle it in vectorizable_condition. This avoids
4042 unnecessary promotion stmts and increased vectorization factor. */
4043 if (COMPARISON_CLASS_P (rhs1
)
4044 && INTEGRAL_TYPE_P (rhs1_type
)
4045 && known_le (TYPE_VECTOR_SUBPARTS (vectype1
),
4046 TYPE_VECTOR_SUBPARTS (vectype2
)))
4049 enum vect_def_type dt
;
4050 if (vect_is_simple_use (TREE_OPERAND (rhs1
, 0), stmt_vinfo
->vinfo
,
4052 && dt
== vect_external_def
4053 && vect_is_simple_use (TREE_OPERAND (rhs1
, 1), stmt_vinfo
->vinfo
,
4055 && (dt
== vect_external_def
4056 || dt
== vect_constant_def
))
4058 tree wide_scalar_type
= build_nonstandard_integer_type
4059 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1
))),
4060 TYPE_UNSIGNED (rhs1_type
));
4061 tree vectype3
= get_vectype_for_scalar_type (wide_scalar_type
);
4062 if (expand_vec_cond_expr_p (vectype1
, vectype3
, TREE_CODE (rhs1
)))
4067 /* If rhs1 is a comparison we need to move it into a
4068 separate statement. */
4069 if (TREE_CODE (rhs1
) != SSA_NAME
)
4071 tmp
= vect_recog_temp_ssa_var (TREE_TYPE (rhs1
), NULL
);
4072 pattern_stmt
= gimple_build_assign (tmp
, rhs1
);
4075 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
4076 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
4077 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vectype2
;
4078 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
4081 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
4082 TYPE_VECTOR_SUBPARTS (vectype2
)))
4083 tmp
= build_mask_conversion (rhs1
, vectype1
, stmt_vinfo
, vinfo
);
4087 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
4088 pattern_stmt
= gimple_build_assign (lhs
, COND_EXPR
, tmp
,
4089 gimple_assign_rhs2 (last_stmt
),
4090 gimple_assign_rhs3 (last_stmt
));
4092 *type_out
= vectype1
;
4093 *type_in
= vectype1
;
4094 stmts
->safe_push (last_stmt
);
4095 if (dump_enabled_p ())
4096 dump_printf_loc (MSG_NOTE
, vect_location
,
4097 "vect_recog_mask_conversion_pattern: detected:\n");
4099 return pattern_stmt
;
4102 /* Now check for binary boolean operations requiring conversion for
4104 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs
)))
4107 if (rhs_code
!= BIT_IOR_EXPR
4108 && rhs_code
!= BIT_XOR_EXPR
4109 && rhs_code
!= BIT_AND_EXPR
4110 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
)
4113 rhs2
= gimple_assign_rhs2 (last_stmt
);
4115 rhs1_type
= search_type_for_mask (rhs1
, vinfo
);
4116 rhs2_type
= search_type_for_mask (rhs2
, vinfo
);
4118 if (!rhs1_type
|| !rhs2_type
4119 || TYPE_PRECISION (rhs1_type
) == TYPE_PRECISION (rhs2_type
))
4122 if (TYPE_PRECISION (rhs1_type
) < TYPE_PRECISION (rhs2_type
))
4124 vectype1
= get_mask_type_for_scalar_type (rhs1_type
);
4127 rhs2
= build_mask_conversion (rhs2
, vectype1
, stmt_vinfo
, vinfo
);
4131 vectype1
= get_mask_type_for_scalar_type (rhs2_type
);
4134 rhs1
= build_mask_conversion (rhs1
, vectype1
, stmt_vinfo
, vinfo
);
4137 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
4138 pattern_stmt
= gimple_build_assign (lhs
, rhs_code
, rhs1
, rhs2
);
4140 *type_out
= vectype1
;
4141 *type_in
= vectype1
;
4142 stmts
->safe_push (last_stmt
);
4143 if (dump_enabled_p ())
4144 dump_printf_loc (MSG_NOTE
, vect_location
,
4145 "vect_recog_mask_conversion_pattern: detected:\n");
4147 return pattern_stmt
;
4150 /* STMT is a load or store. If the load or store is conditional, return
4151 the boolean condition under which it occurs, otherwise return null. */
4154 vect_get_load_store_mask (gimple
*stmt
)
4156 if (gassign
*def_assign
= dyn_cast
<gassign
*> (stmt
))
4158 gcc_assert (gimple_assign_single_p (def_assign
));
4162 if (gcall
*def_call
= dyn_cast
<gcall
*> (stmt
))
4164 internal_fn ifn
= gimple_call_internal_fn (def_call
);
4165 int mask_index
= internal_fn_mask_index (ifn
);
4166 return gimple_call_arg (def_call
, mask_index
);
4172 /* Return the scalar offset type that an internal gather/scatter function
4173 should use. GS_INFO describes the gather/scatter operation. */
4176 vect_get_gather_scatter_offset_type (gather_scatter_info
*gs_info
)
4178 tree offset_type
= TREE_TYPE (gs_info
->offset
);
4179 unsigned int element_bits
= tree_to_uhwi (TYPE_SIZE (gs_info
->element_type
));
4181 /* Enforced by vect_check_gather_scatter. */
4182 unsigned int offset_bits
= TYPE_PRECISION (offset_type
);
4183 gcc_assert (element_bits
>= offset_bits
);
4185 /* If the offset is narrower than the elements, extend it according
4187 if (element_bits
> offset_bits
)
4188 return build_nonstandard_integer_type (element_bits
,
4189 TYPE_UNSIGNED (offset_type
));
4194 /* Return MASK if MASK is suitable for masking an operation on vectors
4195 of type VECTYPE, otherwise convert it into such a form and return
4196 the result. Associate any conversion statements with STMT_INFO's
4200 vect_convert_mask_for_vectype (tree mask
, tree vectype
,
4201 stmt_vec_info stmt_info
, vec_info
*vinfo
)
4203 tree mask_type
= search_type_for_mask (mask
, vinfo
);
4206 tree mask_vectype
= get_mask_type_for_scalar_type (mask_type
);
4208 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
),
4209 TYPE_VECTOR_SUBPARTS (mask_vectype
)))
4210 mask
= build_mask_conversion (mask
, vectype
, stmt_info
, vinfo
);
4215 /* Return the equivalent of:
4217 fold_convert (TYPE, VALUE)
4219 with the expectation that the operation will be vectorized.
4220 If new statements are needed, add them as pattern statements
4224 vect_add_conversion_to_patterm (tree type
, tree value
,
4225 stmt_vec_info stmt_info
,
4228 if (useless_type_conversion_p (type
, TREE_TYPE (value
)))
4231 tree new_value
= vect_recog_temp_ssa_var (type
, NULL
);
4232 gassign
*conversion
= gimple_build_assign (new_value
, CONVERT_EXPR
, value
);
4233 stmt_vec_info new_stmt_info
= new_stmt_vec_info (conversion
, vinfo
);
4234 set_vinfo_for_stmt (conversion
, new_stmt_info
);
4235 STMT_VINFO_VECTYPE (new_stmt_info
) = get_vectype_for_scalar_type (type
);
4236 append_pattern_def_seq (stmt_info
, conversion
);
4240 /* Try to convert STMT into a call to a gather load or scatter store
4241 internal function. Return the final statement on success and set
4242 *TYPE_IN and *TYPE_OUT to the vector type being loaded or stored.
4244 This function only handles gathers and scatters that were recognized
4245 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4248 vect_try_gather_scatter_pattern (gimple
*stmt
, stmt_vec_info last_stmt_info
,
4249 tree
*type_in
, tree
*type_out
)
4251 /* Currently we only support this for loop vectorization. */
4252 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4253 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (stmt_info
->vinfo
);
4257 /* Make sure that we're looking at a gather load or scatter store. */
4258 data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
4259 if (!dr
|| !STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
4262 /* Get the boolean that controls whether the load or store happens.
4263 This is null if the operation is unconditional. */
4264 tree mask
= vect_get_load_store_mask (stmt
);
4266 /* Make sure that the target supports an appropriate internal
4267 function for the gather/scatter operation. */
4268 gather_scatter_info gs_info
;
4269 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, &gs_info
)
4273 /* Convert the mask to the right form. */
4274 tree gs_vectype
= get_vectype_for_scalar_type (gs_info
.element_type
);
4276 mask
= vect_convert_mask_for_vectype (mask
, gs_vectype
, last_stmt_info
,
4279 /* Get the invariant base and non-invariant offset, converting the
4280 latter to the same width as the vector elements. */
4281 tree base
= gs_info
.base
;
4282 tree offset_type
= vect_get_gather_scatter_offset_type (&gs_info
);
4283 tree offset
= vect_add_conversion_to_patterm (offset_type
, gs_info
.offset
,
4284 last_stmt_info
, loop_vinfo
);
4286 /* Build the new pattern statement. */
4287 tree scale
= size_int (gs_info
.scale
);
4288 gcall
*pattern_stmt
;
4289 if (DR_IS_READ (dr
))
4292 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 4, base
,
4293 offset
, scale
, mask
);
4295 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 3, base
,
4297 tree load_lhs
= vect_recog_temp_ssa_var (gs_info
.element_type
, NULL
);
4298 gimple_call_set_lhs (pattern_stmt
, load_lhs
);
4302 tree rhs
= vect_get_store_rhs (stmt
);
4304 pattern_stmt
= gimple_build_call_internal (IFN_MASK_SCATTER_STORE
, 5,
4305 base
, offset
, scale
, rhs
,
4308 pattern_stmt
= gimple_build_call_internal (IFN_SCATTER_STORE
, 4,
4309 base
, offset
, scale
, rhs
);
4311 gimple_call_set_nothrow (pattern_stmt
, true);
4313 /* Copy across relevant vectorization info and associate DR with the
4314 new pattern statement instead of the original statement. */
4315 stmt_vec_info pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
,
4317 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
4318 STMT_VINFO_DATA_REF (pattern_stmt_info
) = dr
;
4319 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info
)
4320 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info
);
4321 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info
)
4322 = STMT_VINFO_GATHER_SCATTER_P (stmt_info
);
4324 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4325 *type_out
= vectype
;
4328 if (dump_enabled_p ())
4329 dump_printf_loc (MSG_NOTE
, vect_location
,
4330 "gather/scatter pattern detected:\n");
4332 return pattern_stmt
;
4335 /* Pattern wrapper around vect_try_gather_scatter_pattern. */
4338 vect_recog_gather_scatter_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
4341 gimple
*last_stmt
= stmts
->pop ();
4342 stmt_vec_info last_stmt_info
= vinfo_for_stmt (last_stmt
);
4343 gimple
*pattern_stmt
= vect_try_gather_scatter_pattern (last_stmt
,
4347 stmts
->safe_push (last_stmt
);
4348 return pattern_stmt
;
4351 /* Mark statements that are involved in a pattern. */
4354 vect_mark_pattern_stmts (gimple
*orig_stmt
, gimple
*pattern_stmt
,
4355 tree pattern_vectype
)
4357 stmt_vec_info pattern_stmt_info
, def_stmt_info
;
4358 stmt_vec_info orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
4359 vec_info
*vinfo
= orig_stmt_info
->vinfo
;
4362 pattern_stmt_info
= vinfo_for_stmt (pattern_stmt
);
4363 if (pattern_stmt_info
== NULL
)
4365 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, vinfo
);
4366 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
4368 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt
));
4370 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt
;
4371 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
4372 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
4373 STMT_VINFO_VECTYPE (pattern_stmt_info
) = pattern_vectype
;
4374 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
4375 STMT_VINFO_RELATED_STMT (orig_stmt_info
) = pattern_stmt
;
4376 if (gimple
*def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
))
4377 for (gimple_stmt_iterator si
= gsi_start (def_seq
);
4378 !gsi_end_p (si
); gsi_next (&si
))
4380 def_stmt
= gsi_stmt (si
);
4381 def_stmt_info
= vinfo_for_stmt (def_stmt
);
4382 if (def_stmt_info
== NULL
)
4384 def_stmt_info
= new_stmt_vec_info (def_stmt
, vinfo
);
4385 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
4387 gimple_set_bb (def_stmt
, gimple_bb (orig_stmt
));
4388 STMT_VINFO_RELATED_STMT (def_stmt_info
) = orig_stmt
;
4389 STMT_VINFO_DEF_TYPE (def_stmt_info
) = vect_internal_def
;
4390 if (STMT_VINFO_VECTYPE (def_stmt_info
) == NULL_TREE
)
4391 STMT_VINFO_VECTYPE (def_stmt_info
) = pattern_vectype
;
4395 /* Function vect_pattern_recog_1
4398 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4399 computation pattern.
4400 STMT: A stmt from which the pattern search should start.
4402 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
4403 expression that computes the same functionality and can be used to
4404 replace the sequence of stmts that are involved in the pattern.
4407 This function checks if the expression returned by PATTERN_RECOG_FUNC is
4408 supported in vector form by the target. We use 'TYPE_IN' to obtain the
4409 relevant vector type. If 'TYPE_IN' is already a vector type, then this
4410 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
4411 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
4412 to the available target pattern.
4414 This function also does some bookkeeping, as explained in the documentation
4415 for vect_recog_pattern. */
4418 vect_pattern_recog_1 (vect_recog_func
*recog_func
,
4419 gimple_stmt_iterator si
,
4420 vec
<gimple
*> *stmts_to_replace
)
4422 gimple
*stmt
= gsi_stmt (si
), *pattern_stmt
;
4423 stmt_vec_info stmt_info
;
4424 loop_vec_info loop_vinfo
;
4425 tree pattern_vectype
;
4426 tree type_in
, type_out
;
4427 enum tree_code code
;
4430 stmts_to_replace
->truncate (0);
4431 stmts_to_replace
->quick_push (stmt
);
4432 pattern_stmt
= recog_func
->fn (stmts_to_replace
, &type_in
, &type_out
);
4435 /* Clear related stmt info that analysis might have noted for
4436 to be replaced stmts. */
4437 for (i
= 0; stmts_to_replace
->iterate (i
, &stmt
)
4438 && (unsigned) i
< stmts_to_replace
->length ();
4441 stmt_info
= vinfo_for_stmt (stmt
);
4442 STMT_VINFO_RELATED_STMT (stmt_info
) = NULL
;
4447 stmt
= stmts_to_replace
->last ();
4448 stmt_info
= vinfo_for_stmt (stmt
);
4449 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4451 if (VECTOR_BOOLEAN_TYPE_P (type_in
)
4452 || VECTOR_TYPE_P (type_in
))
4454 /* No need to check target support (already checked by the pattern
4455 recognition function). */
4456 pattern_vectype
= type_out
? type_out
: type_in
;
4460 /* Check target support */
4461 type_in
= get_vectype_for_scalar_type (type_in
);
4465 type_out
= get_vectype_for_scalar_type (type_out
);
4470 pattern_vectype
= type_out
;
4472 if (is_gimple_assign (pattern_stmt
))
4474 enum insn_code icode
;
4475 code
= gimple_assign_rhs_code (pattern_stmt
);
4476 optab optab
= optab_for_tree_code (code
, type_in
, optab_default
);
4477 machine_mode vec_mode
= TYPE_MODE (type_in
);
4479 || (icode
= optab_handler (optab
, vec_mode
)) == CODE_FOR_nothing
4480 || (insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (type_out
)))
4484 gcc_assert (is_gimple_call (pattern_stmt
));
4487 /* Found a vectorizable pattern. */
4488 if (dump_enabled_p ())
4490 dump_printf_loc (MSG_NOTE
, vect_location
,
4491 "%s pattern recognized: ", recog_func
->name
);
4492 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
4495 /* Mark the stmts that are involved in the pattern. */
4496 vect_mark_pattern_stmts (stmt
, pattern_stmt
, pattern_vectype
);
4498 /* Patterns cannot be vectorized using SLP, because they change the order of
4504 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo
), ix
, ix2
,
4505 elem_ptr
, *elem_ptr
== stmt
);
4508 /* It is possible that additional pattern stmts are created and inserted in
4509 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
4510 relevant statements. */
4511 for (i
= 0; stmts_to_replace
->iterate (i
, &stmt
)
4512 && (unsigned) i
< (stmts_to_replace
->length () - 1);
4515 stmt_info
= vinfo_for_stmt (stmt
);
4516 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
4517 if (dump_enabled_p ())
4519 dump_printf_loc (MSG_NOTE
, vect_location
,
4520 "additional pattern stmt: ");
4521 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
4524 vect_mark_pattern_stmts (stmt
, pattern_stmt
, NULL_TREE
);
4531 /* Function vect_pattern_recog
4534 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4537 Output - for each computation idiom that is detected we create a new stmt
4538 that provides the same functionality and that can be vectorized. We
4539 also record some information in the struct_stmt_info of the relevant
4540 stmts, as explained below:
4542 At the entry to this function we have the following stmts, with the
4543 following initial value in the STMT_VINFO fields:
4545 stmt in_pattern_p related_stmt vec_stmt
4546 S1: a_i = .... - - -
4547 S2: a_2 = ..use(a_i).. - - -
4548 S3: a_1 = ..use(a_2).. - - -
4549 S4: a_0 = ..use(a_1).. - - -
4550 S5: ... = ..use(a_0).. - - -
4552 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4553 represented by a single stmt. We then:
4554 - create a new stmt S6 equivalent to the pattern (the stmt is not
4555 inserted into the code)
4556 - fill in the STMT_VINFO fields as follows:
4558 in_pattern_p related_stmt vec_stmt
4559 S1: a_i = .... - - -
4560 S2: a_2 = ..use(a_i).. - - -
4561 S3: a_1 = ..use(a_2).. - - -
4562 S4: a_0 = ..use(a_1).. true S6 -
4563 '---> S6: a_new = .... - S4 -
4564 S5: ... = ..use(a_0).. - - -
4566 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4567 to each other through the RELATED_STMT field).
4569 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4570 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4571 remain irrelevant unless used by stmts other than S4.
4573 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4574 (because they are marked as irrelevant). It will vectorize S6, and record
4575 a pointer to the new vector stmt VS6 from S6 (as usual).
4576 S4 will be skipped, and S5 will be vectorized as usual:
4578 in_pattern_p related_stmt vec_stmt
4579 S1: a_i = .... - - -
4580 S2: a_2 = ..use(a_i).. - - -
4581 S3: a_1 = ..use(a_2).. - - -
4582 > VS6: va_new = .... - - -
4583 S4: a_0 = ..use(a_1).. true S6 VS6
4584 '---> S6: a_new = .... - S4 VS6
4585 > VS5: ... = ..vuse(va_new).. - - -
4586 S5: ... = ..use(a_0).. - - -
4588 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4589 elsewhere), and we'll end up with:
4592 VS5: ... = ..vuse(va_new)..
4594 In case of more than one pattern statements, e.g., widen-mult with
4598 S2 a_T = (TYPE) a_t;
4599 '--> S3: a_it = (interm_type) a_t;
4600 S4 prod_T = a_T * CONST;
4601 '--> S5: prod_T' = a_it w* CONST;
4603 there may be other users of a_T outside the pattern. In that case S2 will
4604 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4605 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4606 be recorded in S3. */
4609 vect_pattern_recog (vec_info
*vinfo
)
4614 gimple_stmt_iterator si
;
4616 auto_vec
<gimple
*, 1> stmts_to_replace
;
4618 DUMP_VECT_SCOPE ("vect_pattern_recog");
4620 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
4622 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4623 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
4624 nbbs
= loop
->num_nodes
;
4626 /* Scan through the loop stmts, applying the pattern recognition
4627 functions starting at each stmt visited: */
4628 for (i
= 0; i
< nbbs
; i
++)
4630 basic_block bb
= bbs
[i
];
4631 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
4633 gimple
*stmt
= gsi_stmt (si
);
4634 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4635 if (stmt_info
&& STMT_VINFO_IN_PATTERN_P (stmt_info
))
4637 /* Scan over all generic vect_recog_xxx_pattern functions. */
4638 for (j
= 0; j
< NUM_PATTERNS
; j
++)
4639 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs
[j
], si
,
4647 bb_vec_info bb_vinfo
= as_a
<bb_vec_info
> (vinfo
);
4648 for (si
= bb_vinfo
->region_begin
;
4649 gsi_stmt (si
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&si
))
4651 gimple
*stmt
= gsi_stmt (si
);
4652 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4654 && (!STMT_VINFO_VECTORIZABLE (stmt_info
)
4655 || STMT_VINFO_IN_PATTERN_P (stmt_info
)))
4658 /* Scan over all generic vect_recog_xxx_pattern functions. */
4659 for (j
= 0; j
< NUM_PATTERNS
; j
++)
4660 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs
[j
], si
,