1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
28 #include "basic-block.h"
29 #include "gimple-pretty-print.h"
35 #include "tree-data-ref.h"
36 #include "tree-vectorizer.h"
37 #include "recog.h" /* FIXME: for insn_data */
38 #include "diagnostic-core.h"
41 /* Pattern recognition functions */
42 static gimple
vect_recog_widen_sum_pattern (vec
<gimple
> *, tree
*,
44 static gimple
vect_recog_widen_mult_pattern (vec
<gimple
> *, tree
*,
46 static gimple
vect_recog_dot_prod_pattern (vec
<gimple
> *, tree
*,
48 static gimple
vect_recog_pow_pattern (vec
<gimple
> *, tree
*, tree
*);
49 static gimple
vect_recog_over_widening_pattern (vec
<gimple
> *, tree
*,
51 static gimple
vect_recog_widen_shift_pattern (vec
<gimple
> *,
53 static gimple
vect_recog_rotate_pattern (vec
<gimple
> *, tree
*, tree
*);
54 static gimple
vect_recog_vector_vector_shift_pattern (vec
<gimple
> *,
56 static gimple
vect_recog_divmod_pattern (vec
<gimple
> *,
58 static gimple
vect_recog_mixed_size_cond_pattern (vec
<gimple
> *,
60 static gimple
vect_recog_bool_pattern (vec
<gimple
> *, tree
*, tree
*);
61 static vect_recog_func_ptr vect_vect_recog_func_ptrs
[NUM_PATTERNS
] = {
62 vect_recog_widen_mult_pattern
,
63 vect_recog_widen_sum_pattern
,
64 vect_recog_dot_prod_pattern
,
65 vect_recog_pow_pattern
,
66 vect_recog_widen_shift_pattern
,
67 vect_recog_over_widening_pattern
,
68 vect_recog_rotate_pattern
,
69 vect_recog_vector_vector_shift_pattern
,
70 vect_recog_divmod_pattern
,
71 vect_recog_mixed_size_cond_pattern
,
72 vect_recog_bool_pattern
};
75 append_pattern_def_seq (stmt_vec_info stmt_info
, gimple stmt
)
77 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
82 new_pattern_def_seq (stmt_vec_info stmt_info
, gimple stmt
)
84 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
85 append_pattern_def_seq (stmt_info
, stmt
);
88 /* Check whether STMT2 is in the same loop or basic block as STMT1.
89 Which of the two applies depends on whether we're currently doing
90 loop-based or basic-block-based vectorization, as determined by
91 the vinfo_for_stmt for STMT1 (which must be defined).
93 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
94 to be defined as well. */
97 vect_same_loop_or_bb_p (gimple stmt1
, gimple stmt2
)
99 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt1
);
100 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
101 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
103 if (!gimple_bb (stmt2
))
108 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
109 if (!flow_bb_inside_loop_p (loop
, gimple_bb (stmt2
)))
114 if (gimple_bb (stmt2
) != BB_VINFO_BB (bb_vinfo
)
115 || gimple_code (stmt2
) == GIMPLE_PHI
)
119 gcc_assert (vinfo_for_stmt (stmt2
));
123 /* If the LHS of DEF_STMT has a single use, and that statement is
124 in the same loop or basic block, return it. */
127 vect_single_imm_use (gimple def_stmt
)
129 tree lhs
= gimple_assign_lhs (def_stmt
);
133 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
136 if (!vect_same_loop_or_bb_p (def_stmt
, use_stmt
))
142 /* Check whether NAME, an ssa-name used in USE_STMT,
143 is a result of a type promotion or demotion, such that:
144 DEF_STMT: NAME = NOP (name0)
145 where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
146 If CHECK_SIGN is TRUE, check that either both types are signed or both are
150 type_conversion_p (tree name
, gimple use_stmt
, bool check_sign
,
151 tree
*orig_type
, gimple
*def_stmt
, bool *promotion
)
155 loop_vec_info loop_vinfo
;
156 stmt_vec_info stmt_vinfo
;
157 tree type
= TREE_TYPE (name
);
159 enum vect_def_type dt
;
161 bb_vec_info bb_vinfo
;
163 stmt_vinfo
= vinfo_for_stmt (use_stmt
);
164 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
165 bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
166 if (!vect_is_simple_use (name
, use_stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
170 if (dt
!= vect_internal_def
171 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
177 if (!is_gimple_assign (*def_stmt
))
180 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt
)))
183 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
185 *orig_type
= TREE_TYPE (oprnd0
);
186 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*orig_type
)
187 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*orig_type
)) && check_sign
))
190 if (TYPE_PRECISION (type
) >= (TYPE_PRECISION (*orig_type
) * 2))
192 else if (TYPE_PRECISION (*orig_type
) >= (TYPE_PRECISION (type
) * 2))
197 if (!vect_is_simple_use (oprnd0
, *def_stmt
, loop_vinfo
,
198 bb_vinfo
, &dummy_gimple
, &dummy
, &dt
))
204 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
205 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
208 vect_recog_temp_ssa_var (tree type
, gimple stmt
)
210 return make_temp_ssa_name (type
, stmt
, "patt");
213 /* Function vect_recog_dot_prod_pattern
215 Try to find the following pattern:
221 sum_0 = phi <init, sum_1>
224 S3 x_T = (TYPE1) x_t;
225 S4 y_T = (TYPE1) y_t;
227 [S6 prod = (TYPE2) prod; #optional]
228 S7 sum_1 = prod + sum_0;
230 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
231 same size of 'TYPE1' or bigger. This is a special case of a reduction
236 * STMTS: Contains a stmt from which the pattern search begins. In the
237 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
242 * TYPE_IN: The type of the input arguments to the pattern.
244 * TYPE_OUT: The type of the output of this pattern.
246 * Return value: A new stmt that will be used to replace the sequence of
247 stmts that constitute the pattern. In this case it will be:
248 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
250 Note: The dot-prod idiom is a widening reduction pattern that is
251 vectorized without preserving all the intermediate results. It
252 produces only N/2 (widened) results (by summing up pairs of
253 intermediate results) rather than all N results. Therefore, we
254 cannot allow this pattern when we want to get all the results and in
255 the correct order (as is the case when this computation is in an
256 inner-loop nested in an outer-loop that us being vectorized). */
259 vect_recog_dot_prod_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
262 gimple stmt
, last_stmt
= (*stmts
)[0];
264 tree oprnd00
, oprnd01
;
265 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
266 tree type
, half_type
;
269 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
277 loop
= LOOP_VINFO_LOOP (loop_info
);
279 if (!is_gimple_assign (last_stmt
))
282 type
= gimple_expr_type (last_stmt
);
284 /* Look for the following pattern
288 DDPROD = (TYPE2) DPROD;
289 sum_1 = DDPROD + sum_0;
291 - DX is double the size of X
292 - DY is double the size of Y
293 - DX, DY, DPROD all have the same type
294 - sum is the same size of DPROD or bigger
295 - sum has been recognized as a reduction variable.
297 This is equivalent to:
298 DPROD = X w* Y; #widen mult
299 sum_1 = DPROD w+ sum_0; #widen summation
301 DPROD = X w* Y; #widen mult
302 sum_1 = DPROD + sum_0; #summation
305 /* Starting from LAST_STMT, follow the defs of its uses in search
306 of the above pattern. */
308 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
311 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
313 /* Has been detected as widening-summation? */
315 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
316 type
= gimple_expr_type (stmt
);
317 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
319 oprnd0
= gimple_assign_rhs1 (stmt
);
320 oprnd1
= gimple_assign_rhs2 (stmt
);
321 half_type
= TREE_TYPE (oprnd0
);
327 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
)
329 oprnd0
= gimple_assign_rhs1 (last_stmt
);
330 oprnd1
= gimple_assign_rhs2 (last_stmt
);
331 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
332 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
336 if (type_conversion_p (oprnd0
, stmt
, true, &half_type
, &def_stmt
,
341 oprnd0
= gimple_assign_rhs1 (stmt
);
347 /* So far so good. Since last_stmt was detected as a (summation) reduction,
348 we know that oprnd1 is the reduction variable (defined by a loop-header
349 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
350 Left to check that oprnd0 is defined by a (widen_)mult_expr */
351 if (TREE_CODE (oprnd0
) != SSA_NAME
)
354 prod_type
= half_type
;
355 stmt
= SSA_NAME_DEF_STMT (oprnd0
);
357 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
358 if (!gimple_bb (stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
361 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
362 inside the loop (in case we are analyzing an outer-loop). */
363 if (!is_gimple_assign (stmt
))
365 stmt_vinfo
= vinfo_for_stmt (stmt
);
366 gcc_assert (stmt_vinfo
);
367 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
369 if (gimple_assign_rhs_code (stmt
) != MULT_EXPR
)
371 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
373 /* Has been detected as a widening multiplication? */
375 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
376 if (gimple_assign_rhs_code (stmt
) != WIDEN_MULT_EXPR
)
378 stmt_vinfo
= vinfo_for_stmt (stmt
);
379 gcc_assert (stmt_vinfo
);
380 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_internal_def
);
381 oprnd00
= gimple_assign_rhs1 (stmt
);
382 oprnd01
= gimple_assign_rhs2 (stmt
);
386 tree half_type0
, half_type1
;
390 oprnd0
= gimple_assign_rhs1 (stmt
);
391 oprnd1
= gimple_assign_rhs2 (stmt
);
392 if (!types_compatible_p (TREE_TYPE (oprnd0
), prod_type
)
393 || !types_compatible_p (TREE_TYPE (oprnd1
), prod_type
))
395 if (!type_conversion_p (oprnd0
, stmt
, true, &half_type0
, &def_stmt
,
399 oprnd00
= gimple_assign_rhs1 (def_stmt
);
400 if (!type_conversion_p (oprnd1
, stmt
, true, &half_type1
, &def_stmt
,
404 oprnd01
= gimple_assign_rhs1 (def_stmt
);
405 if (!types_compatible_p (half_type0
, half_type1
))
407 if (TYPE_PRECISION (prod_type
) != TYPE_PRECISION (half_type0
) * 2)
411 half_type
= TREE_TYPE (oprnd00
);
412 *type_in
= half_type
;
415 /* Pattern detected. Create a stmt to be used to replace the pattern: */
416 var
= vect_recog_temp_ssa_var (type
, NULL
);
417 pattern_stmt
= gimple_build_assign_with_ops (DOT_PROD_EXPR
, var
,
418 oprnd00
, oprnd01
, oprnd1
);
420 if (dump_enabled_p ())
422 dump_printf_loc (MSG_NOTE
, vect_location
,
423 "vect_recog_dot_prod_pattern: detected: ");
424 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
425 dump_printf (MSG_NOTE
, "\n");
428 /* We don't allow changing the order of the computation in the inner-loop
429 when doing outer-loop vectorization. */
430 gcc_assert (!nested_in_vect_loop_p (loop
, last_stmt
));
436 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
439 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
440 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
442 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
443 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
444 that satisfies the above restrictions, we can perform a widening opeartion
445 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
446 with a_it = (interm_type) a_t; */
449 vect_handle_widen_op_by_const (gimple stmt
, enum tree_code code
,
450 tree const_oprnd
, tree
*oprnd
,
451 vec
<gimple
> *stmts
, tree type
,
452 tree
*half_type
, gimple def_stmt
)
454 tree new_type
, new_oprnd
;
457 if (code
!= MULT_EXPR
&& code
!= LSHIFT_EXPR
)
460 if (((code
== MULT_EXPR
&& int_fits_type_p (const_oprnd
, *half_type
))
461 || (code
== LSHIFT_EXPR
462 && compare_tree_int (const_oprnd
, TYPE_PRECISION (*half_type
))
464 && TYPE_PRECISION (type
) == (TYPE_PRECISION (*half_type
) * 2))
466 /* CONST_OPRND is a constant of HALF_TYPE. */
467 *oprnd
= gimple_assign_rhs1 (def_stmt
);
471 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (*half_type
) * 4))
474 if (!vect_same_loop_or_bb_p (stmt
, def_stmt
))
477 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
478 a type 2 times bigger than HALF_TYPE. */
479 new_type
= build_nonstandard_integer_type (TYPE_PRECISION (type
) / 2,
480 TYPE_UNSIGNED (type
));
481 if ((code
== MULT_EXPR
&& !int_fits_type_p (const_oprnd
, new_type
))
482 || (code
== LSHIFT_EXPR
483 && compare_tree_int (const_oprnd
, TYPE_PRECISION (new_type
)) == 1))
486 /* Use NEW_TYPE for widening operation. */
487 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
489 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
490 /* Check if the already created pattern stmt is what we need. */
491 if (!is_gimple_assign (new_stmt
)
492 || gimple_assign_rhs_code (new_stmt
) != NOP_EXPR
493 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != new_type
)
496 stmts
->safe_push (def_stmt
);
497 *oprnd
= gimple_assign_lhs (new_stmt
);
501 /* Create a_T = (NEW_TYPE) a_t; */
502 *oprnd
= gimple_assign_rhs1 (def_stmt
);
503 new_oprnd
= make_ssa_name (new_type
, NULL
);
504 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
, *oprnd
,
506 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
507 stmts
->safe_push (def_stmt
);
511 *half_type
= new_type
;
516 /* Function vect_recog_widen_mult_pattern
518 Try to find the following pattern:
521 TYPE a_T, b_T, prod_T;
527 S5 prod_T = a_T * b_T;
529 where type 'TYPE' is at least double the size of type 'type'.
531 Also detect unsigned cases:
533 unsigned type a_t, b_t;
534 unsigned TYPE u_prod_T;
535 TYPE a_T, b_T, prod_T;
541 S5 prod_T = a_T * b_T;
542 S6 u_prod_T = (unsigned TYPE) prod_T;
544 and multiplication by constants:
551 S5 prod_T = a_T * CONST;
553 A special case of multiplication by constants is when 'TYPE' is 4 times
554 bigger than 'type', but CONST fits an intermediate type 2 times smaller
555 than 'TYPE'. In that case we create an additional pattern stmt for S3
556 to create a variable of the intermediate type, and perform widen-mult
557 on the intermediate type as well:
561 TYPE a_T, prod_T, prod_T';
565 '--> a_it = (interm_type) a_t;
566 S5 prod_T = a_T * CONST;
567 '--> prod_T' = a_it w* CONST;
571 * STMTS: Contains a stmt from which the pattern search begins. In the
572 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
573 is detected. In case of unsigned widen-mult, the original stmt (S5) is
574 replaced with S6 in STMTS. In case of multiplication by a constant
575 of an intermediate type (the last case above), STMTS also contains S3
576 (inserted before S5).
580 * TYPE_IN: The type of the input arguments to the pattern.
582 * TYPE_OUT: The type of the output of this pattern.
584 * Return value: A new stmt that will be used to replace the sequence of
585 stmts that constitute the pattern. In this case it will be:
586 WIDEN_MULT <a_t, b_t>
590 vect_recog_widen_mult_pattern (vec
<gimple
> *stmts
,
591 tree
*type_in
, tree
*type_out
)
593 gimple last_stmt
= stmts
->pop ();
594 gimple def_stmt0
, def_stmt1
;
596 tree type
, half_type0
, half_type1
;
598 tree vectype
, vectype_out
= NULL_TREE
;
600 enum tree_code dummy_code
;
606 if (!is_gimple_assign (last_stmt
))
609 type
= gimple_expr_type (last_stmt
);
611 /* Starting from LAST_STMT, follow the defs of its uses in search
612 of the above pattern. */
614 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
617 oprnd0
= gimple_assign_rhs1 (last_stmt
);
618 oprnd1
= gimple_assign_rhs2 (last_stmt
);
619 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
620 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
623 /* Check argument 0. */
624 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
628 /* Check argument 1. */
629 op1_ok
= type_conversion_p (oprnd1
, last_stmt
, false, &half_type1
,
630 &def_stmt1
, &promotion
);
632 if (op1_ok
&& promotion
)
634 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
635 oprnd1
= gimple_assign_rhs1 (def_stmt1
);
639 if (TREE_CODE (oprnd1
) == INTEGER_CST
640 && TREE_CODE (half_type0
) == INTEGER_TYPE
641 && vect_handle_widen_op_by_const (last_stmt
, MULT_EXPR
, oprnd1
,
642 &oprnd0
, stmts
, type
,
643 &half_type0
, def_stmt0
))
645 half_type1
= half_type0
;
646 oprnd1
= fold_convert (half_type1
, oprnd1
);
652 /* Handle unsigned case. Look for
653 S6 u_prod_T = (unsigned TYPE) prod_T;
654 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
655 if (TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type0
))
661 if (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (half_type1
))
664 use_stmt
= vect_single_imm_use (last_stmt
);
665 if (!use_stmt
|| !is_gimple_assign (use_stmt
)
666 || gimple_assign_rhs_code (use_stmt
) != NOP_EXPR
)
669 use_lhs
= gimple_assign_lhs (use_stmt
);
670 use_type
= TREE_TYPE (use_lhs
);
671 if (!INTEGRAL_TYPE_P (use_type
)
672 || (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (use_type
))
673 || (TYPE_PRECISION (type
) != TYPE_PRECISION (use_type
)))
677 last_stmt
= use_stmt
;
680 if (!types_compatible_p (half_type0
, half_type1
))
683 /* Pattern detected. */
684 if (dump_enabled_p ())
685 dump_printf_loc (MSG_NOTE
, vect_location
,
686 "vect_recog_widen_mult_pattern: detected:\n");
688 /* Check target support */
689 vectype
= get_vectype_for_scalar_type (half_type0
);
690 vectype_out
= get_vectype_for_scalar_type (type
);
693 || !supportable_widening_operation (WIDEN_MULT_EXPR
, last_stmt
,
694 vectype_out
, vectype
,
695 &dummy_code
, &dummy_code
,
696 &dummy_int
, &dummy_vec
))
700 *type_out
= vectype_out
;
702 /* Pattern supported. Create a stmt to be used to replace the pattern: */
703 var
= vect_recog_temp_ssa_var (type
, NULL
);
704 pattern_stmt
= gimple_build_assign_with_ops (WIDEN_MULT_EXPR
, var
, oprnd0
,
707 if (dump_enabled_p ())
708 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
710 stmts
->safe_push (last_stmt
);
715 /* Function vect_recog_pow_pattern
717 Try to find the following pattern:
721 with POW being one of pow, powf, powi, powif and N being
726 * LAST_STMT: A stmt from which the pattern search begins.
730 * TYPE_IN: The type of the input arguments to the pattern.
732 * TYPE_OUT: The type of the output of this pattern.
734 * Return value: A new stmt that will be used to replace the sequence of
735 stmts that constitute the pattern. In this case it will be:
742 vect_recog_pow_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
745 gimple last_stmt
= (*stmts
)[0];
746 tree fn
, base
, exp
= NULL
;
750 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
753 fn
= gimple_call_fndecl (last_stmt
);
754 if (fn
== NULL_TREE
|| DECL_BUILT_IN_CLASS (fn
) != BUILT_IN_NORMAL
)
757 switch (DECL_FUNCTION_CODE (fn
))
763 base
= gimple_call_arg (last_stmt
, 0);
764 exp
= gimple_call_arg (last_stmt
, 1);
765 if (TREE_CODE (exp
) != REAL_CST
766 && TREE_CODE (exp
) != INTEGER_CST
)
774 /* We now have a pow or powi builtin function call with a constant
777 *type_out
= NULL_TREE
;
779 /* Catch squaring. */
780 if ((host_integerp (exp
, 0)
781 && tree_low_cst (exp
, 0) == 2)
782 || (TREE_CODE (exp
) == REAL_CST
783 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconst2
)))
785 *type_in
= TREE_TYPE (base
);
787 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
788 stmt
= gimple_build_assign_with_ops (MULT_EXPR
, var
, base
, base
);
792 /* Catch square root. */
793 if (TREE_CODE (exp
) == REAL_CST
794 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconsthalf
))
796 tree newfn
= mathfn_built_in (TREE_TYPE (base
), BUILT_IN_SQRT
);
797 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (base
));
800 gimple stmt
= gimple_build_call (newfn
, 1, base
);
801 if (vectorizable_function (stmt
, *type_in
, *type_in
)
804 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
805 gimple_call_set_lhs (stmt
, var
);
815 /* Function vect_recog_widen_sum_pattern
817 Try to find the following pattern:
820 TYPE x_T, sum = init;
822 sum_0 = phi <init, sum_1>
825 S3 sum_1 = x_T + sum_0;
827 where type 'TYPE' is at least double the size of type 'type', i.e - we're
828 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
829 a special case of a reduction computation.
833 * LAST_STMT: A stmt from which the pattern search begins. In the example,
834 when this function is called with S3, the pattern {S2,S3} will be detected.
838 * TYPE_IN: The type of the input arguments to the pattern.
840 * TYPE_OUT: The type of the output of this pattern.
842 * Return value: A new stmt that will be used to replace the sequence of
843 stmts that constitute the pattern. In this case it will be:
844 WIDEN_SUM <x_t, sum_0>
846 Note: The widening-sum idiom is a widening reduction pattern that is
847 vectorized without preserving all the intermediate results. It
848 produces only N/2 (widened) results (by summing up pairs of
849 intermediate results) rather than all N results. Therefore, we
850 cannot allow this pattern when we want to get all the results and in
851 the correct order (as is the case when this computation is in an
852 inner-loop nested in an outer-loop that us being vectorized). */
855 vect_recog_widen_sum_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
858 gimple stmt
, last_stmt
= (*stmts
)[0];
860 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
861 tree type
, half_type
;
863 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
871 loop
= LOOP_VINFO_LOOP (loop_info
);
873 if (!is_gimple_assign (last_stmt
))
876 type
= gimple_expr_type (last_stmt
);
878 /* Look for the following pattern
881 In which DX is at least double the size of X, and sum_1 has been
882 recognized as a reduction variable.
885 /* Starting from LAST_STMT, follow the defs of its uses in search
886 of the above pattern. */
888 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
891 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
)
894 oprnd0
= gimple_assign_rhs1 (last_stmt
);
895 oprnd1
= gimple_assign_rhs2 (last_stmt
);
896 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
897 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
900 /* So far so good. Since last_stmt was detected as a (summation) reduction,
901 we know that oprnd1 is the reduction variable (defined by a loop-header
902 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
903 Left to check that oprnd0 is defined by a cast from type 'type' to type
906 if (!type_conversion_p (oprnd0
, last_stmt
, true, &half_type
, &stmt
,
911 oprnd0
= gimple_assign_rhs1 (stmt
);
912 *type_in
= half_type
;
915 /* Pattern detected. Create a stmt to be used to replace the pattern: */
916 var
= vect_recog_temp_ssa_var (type
, NULL
);
917 pattern_stmt
= gimple_build_assign_with_ops (WIDEN_SUM_EXPR
, var
,
920 if (dump_enabled_p ())
922 dump_printf_loc (MSG_NOTE
, vect_location
,
923 "vect_recog_widen_sum_pattern: detected: ");
924 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
925 dump_printf (MSG_NOTE
, "\n");
928 /* We don't allow changing the order of the computation in the inner-loop
929 when doing outer-loop vectorization. */
930 gcc_assert (!nested_in_vect_loop_p (loop
, last_stmt
));
936 /* Return TRUE if the operation in STMT can be performed on a smaller type.
939 STMT - a statement to check.
940 DEF - we support operations with two operands, one of which is constant.
941 The other operand can be defined by a demotion operation, or by a
942 previous statement in a sequence of over-promoted operations. In the
943 later case DEF is used to replace that operand. (It is defined by a
944 pattern statement we created for the previous statement in the
948 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
949 NULL, it's the type of DEF.
950 STMTS - additional pattern statements. If a pattern statement (type
951 conversion) is created in this function, its original statement is
955 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
956 operands to use in the new pattern statement for STMT (will be created
957 in vect_recog_over_widening_pattern ()).
958 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
959 statements for STMT: the first one is a type promotion and the second
960 one is the operation itself. We return the type promotion statement
961 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
962 the second pattern statement. */
965 vect_operation_fits_smaller_type (gimple stmt
, tree def
, tree
*new_type
,
966 tree
*op0
, tree
*op1
, gimple
*new_def_stmt
,
970 tree const_oprnd
, oprnd
;
971 tree interm_type
= NULL_TREE
, half_type
, new_oprnd
, type
;
972 gimple def_stmt
, new_stmt
;
978 *new_def_stmt
= NULL
;
980 if (!is_gimple_assign (stmt
))
983 code
= gimple_assign_rhs_code (stmt
);
984 if (code
!= LSHIFT_EXPR
&& code
!= RSHIFT_EXPR
985 && code
!= BIT_IOR_EXPR
&& code
!= BIT_XOR_EXPR
&& code
!= BIT_AND_EXPR
)
988 oprnd
= gimple_assign_rhs1 (stmt
);
989 const_oprnd
= gimple_assign_rhs2 (stmt
);
990 type
= gimple_expr_type (stmt
);
992 if (TREE_CODE (oprnd
) != SSA_NAME
993 || TREE_CODE (const_oprnd
) != INTEGER_CST
)
996 /* If oprnd has other uses besides that in stmt we cannot mark it
997 as being part of a pattern only. */
998 if (!has_single_use (oprnd
))
1001 /* If we are in the middle of a sequence, we use DEF from a previous
1002 statement. Otherwise, OPRND has to be a result of type promotion. */
1005 half_type
= *new_type
;
1011 if (!type_conversion_p (oprnd
, stmt
, false, &half_type
, &def_stmt
,
1014 || !vect_same_loop_or_bb_p (stmt
, def_stmt
))
1018 /* Can we perform the operation on a smaller type? */
1024 if (!int_fits_type_p (const_oprnd
, half_type
))
1026 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1027 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1030 interm_type
= build_nonstandard_integer_type (
1031 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1032 if (!int_fits_type_p (const_oprnd
, interm_type
))
1039 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1040 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1043 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1044 (e.g., if the original value was char, the shift amount is at most 8
1045 if we want to use short). */
1046 if (compare_tree_int (const_oprnd
, TYPE_PRECISION (half_type
)) == 1)
1049 interm_type
= build_nonstandard_integer_type (
1050 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1052 if (!vect_supportable_shift (code
, interm_type
))
1058 if (vect_supportable_shift (code
, half_type
))
1061 /* Try intermediate type - HALF_TYPE is not supported. */
1062 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1065 interm_type
= build_nonstandard_integer_type (
1066 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1068 if (!vect_supportable_shift (code
, interm_type
))
1077 /* There are four possible cases:
1078 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1079 the first statement in the sequence)
1080 a. The original, HALF_TYPE, is not enough - we replace the promotion
1081 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1082 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1084 2. OPRND is defined by a pattern statement we created.
1085 a. Its type is not sufficient for the operation, we create a new stmt:
1086 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1087 this statement in NEW_DEF_STMT, and it is later put in
1088 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1089 b. OPRND is good to use in the new statement. */
1094 /* Replace the original type conversion HALF_TYPE->TYPE with
1095 HALF_TYPE->INTERM_TYPE. */
1096 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
1098 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
1099 /* Check if the already created pattern stmt is what we need. */
1100 if (!is_gimple_assign (new_stmt
)
1101 || gimple_assign_rhs_code (new_stmt
) != NOP_EXPR
1102 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != interm_type
)
1105 stmts
->safe_push (def_stmt
);
1106 oprnd
= gimple_assign_lhs (new_stmt
);
1110 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1111 oprnd
= gimple_assign_rhs1 (def_stmt
);
1112 new_oprnd
= make_ssa_name (interm_type
, NULL
);
1113 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
1115 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
1116 stmts
->safe_push (def_stmt
);
1122 /* Retrieve the operand before the type promotion. */
1123 oprnd
= gimple_assign_rhs1 (def_stmt
);
1130 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1131 new_oprnd
= make_ssa_name (interm_type
, NULL
);
1132 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
1135 *new_def_stmt
= new_stmt
;
1138 /* Otherwise, OPRND is already set. */
1142 *new_type
= interm_type
;
1144 *new_type
= half_type
;
1147 *op1
= fold_convert (*new_type
, const_oprnd
);
1153 /* Try to find a statement or a sequence of statements that can be performed
1157 TYPE x_T, res0_T, res1_T;
1160 S2 x_T = (TYPE) x_t;
1161 S3 res0_T = op (x_T, C0);
1162 S4 res1_T = op (res0_T, C1);
1163 S5 ... = () res1_T; - type demotion
1165 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1167 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1168 be 'type' or some intermediate type. For now, we expect S5 to be a type
1169 demotion operation. We also check that S3 and S4 have only one use. */
1172 vect_recog_over_widening_pattern (vec
<gimple
> *stmts
,
1173 tree
*type_in
, tree
*type_out
)
1175 gimple stmt
= stmts
->pop ();
1176 gimple pattern_stmt
= NULL
, new_def_stmt
, prev_stmt
= NULL
, use_stmt
= NULL
;
1177 tree op0
, op1
, vectype
= NULL_TREE
, use_lhs
, use_type
;
1178 tree var
= NULL_TREE
, new_type
= NULL_TREE
, new_oprnd
;
1185 if (!vinfo_for_stmt (stmt
)
1186 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt
)))
1189 new_def_stmt
= NULL
;
1190 if (!vect_operation_fits_smaller_type (stmt
, var
, &new_type
,
1191 &op0
, &op1
, &new_def_stmt
,
1200 /* STMT can be performed on a smaller type. Check its uses. */
1201 use_stmt
= vect_single_imm_use (stmt
);
1202 if (!use_stmt
|| !is_gimple_assign (use_stmt
))
1205 /* Create pattern statement for STMT. */
1206 vectype
= get_vectype_for_scalar_type (new_type
);
1210 /* We want to collect all the statements for which we create pattern
1211 statetments, except for the case when the last statement in the
1212 sequence doesn't have a corresponding pattern statement. In such
1213 case we associate the last pattern statement with the last statement
1214 in the sequence. Therefore, we only add the original statement to
1215 the list if we know that it is not the last. */
1217 stmts
->safe_push (prev_stmt
);
1219 var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1221 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt
), var
,
1223 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
1224 new_pattern_def_seq (vinfo_for_stmt (stmt
), new_def_stmt
);
1226 if (dump_enabled_p ())
1228 dump_printf_loc (MSG_NOTE
, vect_location
,
1229 "created pattern stmt: ");
1230 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1231 dump_printf (MSG_NOTE
, "\n");
1234 type
= gimple_expr_type (stmt
);
1241 /* We got a sequence. We expect it to end with a type demotion operation.
1242 Otherwise, we quit (for now). There are three possible cases: the
1243 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1244 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1245 NEW_TYPE differs (we create a new conversion statement). */
1246 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
1248 use_lhs
= gimple_assign_lhs (use_stmt
);
1249 use_type
= TREE_TYPE (use_lhs
);
1250 /* Support only type demotion or signedess change. */
1251 if (!INTEGRAL_TYPE_P (use_type
)
1252 || TYPE_PRECISION (type
) <= TYPE_PRECISION (use_type
))
1255 /* Check that NEW_TYPE is not bigger than the conversion result. */
1256 if (TYPE_PRECISION (new_type
) > TYPE_PRECISION (use_type
))
1259 if (TYPE_UNSIGNED (new_type
) != TYPE_UNSIGNED (use_type
)
1260 || TYPE_PRECISION (new_type
) != TYPE_PRECISION (use_type
))
1262 /* Create NEW_TYPE->USE_TYPE conversion. */
1263 new_oprnd
= make_ssa_name (use_type
, NULL
);
1264 pattern_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
1266 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt
)) = pattern_stmt
;
1268 *type_in
= get_vectype_for_scalar_type (new_type
);
1269 *type_out
= get_vectype_for_scalar_type (use_type
);
1271 /* We created a pattern statement for the last statement in the
1272 sequence, so we don't need to associate it with the pattern
1273 statement created for PREV_STMT. Therefore, we add PREV_STMT
1274 to the list in order to mark it later in vect_pattern_recog_1. */
1276 stmts
->safe_push (prev_stmt
);
1281 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt
))
1282 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt
));
1285 *type_out
= NULL_TREE
;
1288 stmts
->safe_push (use_stmt
);
1291 /* TODO: support general case, create a conversion to the correct type. */
1294 /* Pattern detected. */
1295 if (dump_enabled_p ())
1297 dump_printf_loc (MSG_NOTE
, vect_location
,
1298 "vect_recog_over_widening_pattern: detected: ");
1299 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1300 dump_printf (MSG_NOTE
, "\n");
1303 return pattern_stmt
;
1306 /* Detect widening shift pattern:
1312 S2 a_T = (TYPE) a_t;
1313 S3 res_T = a_T << CONST;
1315 where type 'TYPE' is at least double the size of type 'type'.
1317 Also detect cases where the shift result is immediately converted
1318 to another type 'result_type' that is no larger in size than 'TYPE'.
1319 In those cases we perform a widen-shift that directly results in
1320 'result_type', to avoid a possible over-widening situation:
1324 result_type res_result;
1327 S2 a_T = (TYPE) a_t;
1328 S3 res_T = a_T << CONST;
1329 S4 res_result = (result_type) res_T;
1330 '--> res_result' = a_t w<< CONST;
1332 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1333 create an additional pattern stmt for S2 to create a variable of an
1334 intermediate type, and perform widen-shift on the intermediate type:
1338 TYPE a_T, res_T, res_T';
1341 S2 a_T = (TYPE) a_t;
1342 '--> a_it = (interm_type) a_t;
1343 S3 res_T = a_T << CONST;
1344 '--> res_T' = a_it <<* CONST;
1348 * STMTS: Contains a stmt from which the pattern search begins.
1349 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1350 in STMTS. When an intermediate type is used and a pattern statement is
1351 created for S2, we also put S2 here (before S3).
1355 * TYPE_IN: The type of the input arguments to the pattern.
1357 * TYPE_OUT: The type of the output of this pattern.
1359 * Return value: A new stmt that will be used to replace the sequence of
1360 stmts that constitute the pattern. In this case it will be:
1361 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1364 vect_recog_widen_shift_pattern (vec
<gimple
> *stmts
,
1365 tree
*type_in
, tree
*type_out
)
1367 gimple last_stmt
= stmts
->pop ();
1369 tree oprnd0
, oprnd1
;
1370 tree type
, half_type0
;
1371 gimple pattern_stmt
;
1372 tree vectype
, vectype_out
= NULL_TREE
;
1374 enum tree_code dummy_code
;
1376 vec
<tree
> dummy_vec
;
1380 if (!is_gimple_assign (last_stmt
) || !vinfo_for_stmt (last_stmt
))
1383 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt
)))
1386 if (gimple_assign_rhs_code (last_stmt
) != LSHIFT_EXPR
)
1389 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1390 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1391 if (TREE_CODE (oprnd0
) != SSA_NAME
|| TREE_CODE (oprnd1
) != INTEGER_CST
)
1394 /* Check operand 0: it has to be defined by a type promotion. */
1395 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
1400 /* Check operand 1: has to be positive. We check that it fits the type
1401 in vect_handle_widen_op_by_const (). */
1402 if (tree_int_cst_compare (oprnd1
, size_zero_node
) <= 0)
1405 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
1406 type
= gimple_expr_type (last_stmt
);
1408 /* Check for subsequent conversion to another type. */
1409 use_stmt
= vect_single_imm_use (last_stmt
);
1410 if (use_stmt
&& is_gimple_assign (use_stmt
)
1411 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
))
1412 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
1414 tree use_lhs
= gimple_assign_lhs (use_stmt
);
1415 tree use_type
= TREE_TYPE (use_lhs
);
1417 if (INTEGRAL_TYPE_P (use_type
)
1418 && TYPE_PRECISION (use_type
) <= TYPE_PRECISION (type
))
1420 last_stmt
= use_stmt
;
1425 /* Check if this a widening operation. */
1426 if (!vect_handle_widen_op_by_const (last_stmt
, LSHIFT_EXPR
, oprnd1
,
1428 type
, &half_type0
, def_stmt0
))
1431 /* Pattern detected. */
1432 if (dump_enabled_p ())
1433 dump_printf_loc (MSG_NOTE
, vect_location
,
1434 "vect_recog_widen_shift_pattern: detected:\n");
1436 /* Check target support. */
1437 vectype
= get_vectype_for_scalar_type (half_type0
);
1438 vectype_out
= get_vectype_for_scalar_type (type
);
1442 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR
, last_stmt
,
1443 vectype_out
, vectype
,
1444 &dummy_code
, &dummy_code
,
1445 &dummy_int
, &dummy_vec
))
1449 *type_out
= vectype_out
;
1451 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1452 var
= vect_recog_temp_ssa_var (type
, NULL
);
1454 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR
, var
, oprnd0
, oprnd1
);
1456 if (dump_enabled_p ())
1457 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1459 stmts
->safe_push (last_stmt
);
1460 return pattern_stmt
;
1463 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1467 S0 a_t = b_t r<< c_t;
1471 * STMTS: Contains a stmt from which the pattern search begins,
1472 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1476 S2 e_t = d_t & (B - 1);
1477 S3 f_t = b_t << c_t;
1478 S4 g_t = b_t >> e_t;
1481 where B is element bitsize of type.
1485 * TYPE_IN: The type of the input arguments to the pattern.
1487 * TYPE_OUT: The type of the output of this pattern.
1489 * Return value: A new stmt that will be used to replace the rotate
1493 vect_recog_rotate_pattern (vec
<gimple
> *stmts
, tree
*type_in
, tree
*type_out
)
1495 gimple last_stmt
= stmts
->pop ();
1496 tree oprnd0
, oprnd1
, lhs
, var
, var1
, var2
, vectype
, type
, stype
, def
, def2
;
1497 gimple pattern_stmt
, def_stmt
;
1498 enum tree_code rhs_code
;
1499 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1500 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1501 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1502 enum vect_def_type dt
;
1503 optab optab1
, optab2
;
1504 edge ext_def
= NULL
;
1506 if (!is_gimple_assign (last_stmt
))
1509 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1519 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
1522 lhs
= gimple_assign_lhs (last_stmt
);
1523 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1524 type
= TREE_TYPE (oprnd0
);
1525 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1526 if (TREE_CODE (oprnd0
) != SSA_NAME
1527 || TYPE_PRECISION (TREE_TYPE (lhs
)) != TYPE_PRECISION (type
)
1528 || !INTEGRAL_TYPE_P (type
)
1529 || !TYPE_UNSIGNED (type
))
1532 if (!vect_is_simple_use (oprnd1
, last_stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
1536 if (dt
!= vect_internal_def
1537 && dt
!= vect_constant_def
1538 && dt
!= vect_external_def
)
1541 vectype
= get_vectype_for_scalar_type (type
);
1542 if (vectype
== NULL_TREE
)
1545 /* If vector/vector or vector/scalar rotate is supported by the target,
1546 don't do anything here. */
1547 optab1
= optab_for_tree_code (rhs_code
, vectype
, optab_vector
);
1549 && optab_handler (optab1
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1552 if (bb_vinfo
!= NULL
|| dt
!= vect_internal_def
)
1554 optab2
= optab_for_tree_code (rhs_code
, vectype
, optab_scalar
);
1556 && optab_handler (optab2
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1560 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1561 don't do anything here either. */
1562 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_vector
);
1563 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_vector
);
1565 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1567 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1569 if (bb_vinfo
== NULL
&& dt
== vect_internal_def
)
1571 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_scalar
);
1572 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_scalar
);
1574 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1576 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1581 *type_out
= vectype
;
1582 if (*type_in
== NULL_TREE
)
1585 if (dt
== vect_external_def
1586 && TREE_CODE (oprnd1
) == SSA_NAME
1589 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1590 ext_def
= loop_preheader_edge (loop
);
1591 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1
))
1593 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (oprnd1
));
1595 || !dominated_by_p (CDI_DOMINATORS
, ext_def
->dest
, bb
))
1601 if (TREE_CODE (oprnd1
) == INTEGER_CST
1602 || TYPE_MODE (TREE_TYPE (oprnd1
)) == TYPE_MODE (type
))
1604 else if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
1606 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
1607 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (type
)
1608 && TYPE_PRECISION (TREE_TYPE (rhs1
))
1609 == TYPE_PRECISION (type
))
1613 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
1614 if (def
== NULL_TREE
)
1616 def
= vect_recog_temp_ssa_var (type
, NULL
);
1617 def_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, def
, oprnd1
,
1622 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1623 gcc_assert (!new_bb
);
1626 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1628 stype
= TREE_TYPE (def
);
1630 if (TREE_CODE (def
) == INTEGER_CST
)
1632 if (!host_integerp (def
, 1)
1633 || (unsigned HOST_WIDE_INT
) tree_low_cst (def
, 1)
1634 >= GET_MODE_PRECISION (TYPE_MODE (type
))
1635 || integer_zerop (def
))
1637 def2
= build_int_cst (stype
,
1638 GET_MODE_PRECISION (TYPE_MODE (type
))
1639 - tree_low_cst (def
, 1));
1643 tree vecstype
= get_vectype_for_scalar_type (stype
);
1644 stmt_vec_info def_stmt_vinfo
;
1646 if (vecstype
== NULL_TREE
)
1648 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1649 def_stmt
= gimple_build_assign_with_ops (NEGATE_EXPR
, def2
, def
,
1654 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1655 gcc_assert (!new_bb
);
1659 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
1660 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1661 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
1662 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1665 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1667 = build_int_cst (stype
, GET_MODE_PRECISION (TYPE_MODE (stype
)) - 1);
1668 def_stmt
= gimple_build_assign_with_ops (BIT_AND_EXPR
, def2
,
1669 gimple_assign_lhs (def_stmt
),
1674 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1675 gcc_assert (!new_bb
);
1679 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
1680 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1681 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
1682 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1686 var1
= vect_recog_temp_ssa_var (type
, NULL
);
1687 def_stmt
= gimple_build_assign_with_ops (rhs_code
== LROTATE_EXPR
1688 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
1690 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1692 var2
= vect_recog_temp_ssa_var (type
, NULL
);
1693 def_stmt
= gimple_build_assign_with_ops (rhs_code
== LROTATE_EXPR
1694 ? RSHIFT_EXPR
: LSHIFT_EXPR
,
1695 var2
, oprnd0
, def2
);
1696 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1698 /* Pattern detected. */
1699 if (dump_enabled_p ())
1700 dump_printf_loc (MSG_NOTE
, vect_location
,
1701 "vect_recog_rotate_pattern: detected:\n");
1703 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1704 var
= vect_recog_temp_ssa_var (type
, NULL
);
1705 pattern_stmt
= gimple_build_assign_with_ops (BIT_IOR_EXPR
, var
, var1
, var2
);
1707 if (dump_enabled_p ())
1708 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1710 stmts
->safe_push (last_stmt
);
1711 return pattern_stmt
;
1714 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1722 S3 res_T = b_T op a_t;
1724 where type 'TYPE' is a type with different size than 'type',
1725 and op is <<, >> or rotate.
1730 TYPE b_T, c_T, res_T;
1733 S1 a_t = (type) c_T;
1735 S3 res_T = b_T op a_t;
1739 * STMTS: Contains a stmt from which the pattern search begins,
1740 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1741 with a shift/rotate which has same type on both operands, in the
1742 second case just b_T op c_T, in the first case with added cast
1743 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1747 * TYPE_IN: The type of the input arguments to the pattern.
1749 * TYPE_OUT: The type of the output of this pattern.
1751 * Return value: A new stmt that will be used to replace the shift/rotate
1755 vect_recog_vector_vector_shift_pattern (vec
<gimple
> *stmts
,
1756 tree
*type_in
, tree
*type_out
)
1758 gimple last_stmt
= stmts
->pop ();
1759 tree oprnd0
, oprnd1
, lhs
, var
;
1760 gimple pattern_stmt
, def_stmt
;
1761 enum tree_code rhs_code
;
1762 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1763 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1764 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1765 enum vect_def_type dt
;
1768 if (!is_gimple_assign (last_stmt
))
1771 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1783 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
1786 lhs
= gimple_assign_lhs (last_stmt
);
1787 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1788 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1789 if (TREE_CODE (oprnd0
) != SSA_NAME
1790 || TREE_CODE (oprnd1
) != SSA_NAME
1791 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
1792 || TYPE_PRECISION (TREE_TYPE (oprnd1
))
1793 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1
)))
1794 || TYPE_PRECISION (TREE_TYPE (lhs
))
1795 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
1798 if (!vect_is_simple_use (oprnd1
, last_stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
1802 if (dt
!= vect_internal_def
)
1805 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (oprnd0
));
1806 *type_out
= *type_in
;
1807 if (*type_in
== NULL_TREE
)
1811 if (gimple_assign_cast_p (def_stmt
))
1813 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
1814 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
1815 && TYPE_PRECISION (TREE_TYPE (rhs1
))
1816 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
1820 if (def
== NULL_TREE
)
1822 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
1823 def_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, def
, oprnd1
,
1825 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
1828 /* Pattern detected. */
1829 if (dump_enabled_p ())
1830 dump_printf_loc (MSG_NOTE
, vect_location
,
1831 "vect_recog_vector_vector_shift_pattern: detected:\n");
1833 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1834 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
1835 pattern_stmt
= gimple_build_assign_with_ops (rhs_code
, var
, oprnd0
, def
);
1837 if (dump_enabled_p ())
1838 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1840 stmts
->safe_push (last_stmt
);
1841 return pattern_stmt
;
1844 /* Detect a signed division by a constant that wouldn't be
1845 otherwise vectorized:
1851 where type 'type' is an integral type and N is a constant.
1853 Similarly handle modulo by a constant:
1859 * STMTS: Contains a stmt from which the pattern search begins,
1860 i.e. the division stmt. S1 is replaced by if N is a power
1861 of two constant and type is signed:
1862 S3 y_t = b_t < 0 ? N - 1 : 0;
1864 S1' a_t = x_t >> log2 (N);
1866 S4 is replaced if N is a power of two constant and
1867 type is signed by (where *_T temporaries have unsigned type):
1868 S9 y_T = b_t < 0 ? -1U : 0U;
1869 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1870 S7 z_t = (type) z_T;
1872 S5 x_t = w_t & (N - 1);
1873 S4' a_t = x_t - z_t;
1877 * TYPE_IN: The type of the input arguments to the pattern.
1879 * TYPE_OUT: The type of the output of this pattern.
1881 * Return value: A new stmt that will be used to replace the division
1882 S1 or modulo S4 stmt. */
1885 vect_recog_divmod_pattern (vec
<gimple
> *stmts
,
1886 tree
*type_in
, tree
*type_out
)
1888 gimple last_stmt
= stmts
->pop ();
1889 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
1890 gimple pattern_stmt
, def_stmt
;
1891 enum tree_code rhs_code
;
1892 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1893 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1894 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1897 int dummy_int
, prec
;
1898 stmt_vec_info def_stmt_vinfo
;
1900 if (!is_gimple_assign (last_stmt
))
1903 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1906 case TRUNC_DIV_EXPR
:
1907 case TRUNC_MOD_EXPR
:
1913 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
1916 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1917 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1918 itype
= TREE_TYPE (oprnd0
);
1919 if (TREE_CODE (oprnd0
) != SSA_NAME
1920 || TREE_CODE (oprnd1
) != INTEGER_CST
1921 || TREE_CODE (itype
) != INTEGER_TYPE
1922 || TYPE_PRECISION (itype
) != GET_MODE_PRECISION (TYPE_MODE (itype
)))
1925 vectype
= get_vectype_for_scalar_type (itype
);
1926 if (vectype
== NULL_TREE
)
1929 /* If the target can handle vectorized division or modulo natively,
1930 don't attempt to optimize this. */
1931 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
1932 if (optab
!= unknown_optab
)
1934 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
1935 int icode
= (int) optab_handler (optab
, vec_mode
);
1936 if (icode
!= CODE_FOR_nothing
)
1940 prec
= TYPE_PRECISION (itype
);
1941 if (integer_pow2p (oprnd1
))
1943 if (TYPE_UNSIGNED (itype
) || tree_int_cst_sgn (oprnd1
) != 1)
1946 /* Pattern detected. */
1947 if (dump_enabled_p ())
1948 dump_printf_loc (MSG_NOTE
, vect_location
,
1949 "vect_recog_divmod_pattern: detected:\n");
1951 cond
= build2 (LT_EXPR
, boolean_type_node
, oprnd0
,
1952 build_int_cst (itype
, 0));
1953 if (rhs_code
== TRUNC_DIV_EXPR
)
1955 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
1958 = gimple_build_assign_with_ops (COND_EXPR
, var
, cond
,
1959 fold_build2 (MINUS_EXPR
, itype
,
1961 build_int_cst (itype
,
1963 build_int_cst (itype
, 0));
1964 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
1965 var
= vect_recog_temp_ssa_var (itype
, NULL
);
1967 = gimple_build_assign_with_ops (PLUS_EXPR
, var
, oprnd0
,
1968 gimple_assign_lhs (def_stmt
));
1969 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1971 shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
1973 = gimple_build_assign_with_ops (RSHIFT_EXPR
,
1974 vect_recog_temp_ssa_var (itype
,
1981 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
1982 if (compare_tree_int (oprnd1
, 2) == 0)
1984 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
1986 = gimple_build_assign_with_ops (COND_EXPR
, signmask
, cond
,
1987 build_int_cst (itype
, 1),
1988 build_int_cst (itype
, 0));
1989 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1994 = build_nonstandard_integer_type (prec
, 1);
1995 tree vecutype
= get_vectype_for_scalar_type (utype
);
1997 = build_int_cst (utype
, GET_MODE_BITSIZE (TYPE_MODE (itype
))
1998 - tree_log2 (oprnd1
));
1999 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
2002 = gimple_build_assign_with_ops (COND_EXPR
, var
, cond
,
2003 build_int_cst (utype
, -1),
2004 build_int_cst (utype
, 0));
2006 = new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2007 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2008 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2009 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2010 var
= vect_recog_temp_ssa_var (utype
, NULL
);
2012 = gimple_build_assign_with_ops (RSHIFT_EXPR
, var
,
2013 gimple_assign_lhs (def_stmt
),
2016 = new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2017 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2018 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2019 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2020 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2022 = gimple_build_assign_with_ops (NOP_EXPR
, signmask
, var
,
2024 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2027 = gimple_build_assign_with_ops (PLUS_EXPR
,
2028 vect_recog_temp_ssa_var (itype
,
2031 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2033 = gimple_build_assign_with_ops (BIT_AND_EXPR
,
2034 vect_recog_temp_ssa_var (itype
,
2036 gimple_assign_lhs (def_stmt
),
2037 fold_build2 (MINUS_EXPR
, itype
,
2039 build_int_cst (itype
,
2041 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2044 = gimple_build_assign_with_ops (MINUS_EXPR
,
2045 vect_recog_temp_ssa_var (itype
,
2047 gimple_assign_lhs (def_stmt
),
2051 if (dump_enabled_p ())
2052 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
,
2055 stmts
->safe_push (last_stmt
);
2058 *type_out
= vectype
;
2059 return pattern_stmt
;
2062 if (!host_integerp (oprnd1
, TYPE_UNSIGNED (itype
))
2063 || integer_zerop (oprnd1
)
2064 || prec
> HOST_BITS_PER_WIDE_INT
)
2067 if (!can_mult_highpart_p (TYPE_MODE (vectype
), TYPE_UNSIGNED (itype
)))
2070 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2072 if (TYPE_UNSIGNED (itype
))
2074 unsigned HOST_WIDE_INT mh
, ml
;
2075 int pre_shift
, post_shift
;
2076 unsigned HOST_WIDE_INT d
= tree_low_cst (oprnd1
, 1)
2077 & GET_MODE_MASK (TYPE_MODE (itype
));
2078 tree t1
, t2
, t3
, t4
;
2080 if (d
>= ((unsigned HOST_WIDE_INT
) 1 << (prec
- 1)))
2081 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2084 /* Find a suitable multiplier and right shift count
2085 instead of multiplying with D. */
2086 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
2088 /* If the suggested multiplier is more than SIZE bits, we can do better
2089 for even divisors, using an initial right shift. */
2090 if (mh
!= 0 && (d
& 1) == 0)
2092 pre_shift
= floor_log2 (d
& -d
);
2093 mh
= choose_multiplier (d
>> pre_shift
, prec
, prec
- pre_shift
,
2094 &ml
, &post_shift
, &dummy_int
);
2102 if (post_shift
- 1 >= prec
)
2105 /* t1 = oprnd0 h* ml;
2109 q = t4 >> (post_shift - 1); */
2110 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2112 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR
, t1
, oprnd0
,
2113 build_int_cst (itype
, ml
));
2114 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2116 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2118 = gimple_build_assign_with_ops (MINUS_EXPR
, t2
, oprnd0
, t1
);
2119 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2121 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2123 = gimple_build_assign_with_ops (RSHIFT_EXPR
, t3
, t2
,
2125 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2127 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2129 = gimple_build_assign_with_ops (PLUS_EXPR
, t4
, t1
, t3
);
2131 if (post_shift
!= 1)
2133 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2135 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2137 = gimple_build_assign_with_ops (RSHIFT_EXPR
, q
, t4
,
2138 build_int_cst (itype
,
2145 pattern_stmt
= def_stmt
;
2150 if (pre_shift
>= prec
|| post_shift
>= prec
)
2153 /* t1 = oprnd0 >> pre_shift;
2155 q = t2 >> post_shift; */
2158 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2160 = gimple_build_assign_with_ops (RSHIFT_EXPR
, t1
, oprnd0
,
2161 build_int_cst (NULL
,
2163 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2168 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2170 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR
, t2
, t1
,
2171 build_int_cst (itype
, ml
));
2175 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2177 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2179 = gimple_build_assign_with_ops (RSHIFT_EXPR
, q
, t2
,
2180 build_int_cst (itype
,
2186 pattern_stmt
= def_stmt
;
2191 unsigned HOST_WIDE_INT ml
;
2193 HOST_WIDE_INT d
= tree_low_cst (oprnd1
, 0);
2194 unsigned HOST_WIDE_INT abs_d
;
2196 tree t1
, t2
, t3
, t4
;
2198 /* Give up for -1. */
2202 /* Since d might be INT_MIN, we have to cast to
2203 unsigned HOST_WIDE_INT before negating to avoid
2204 undefined signed overflow. */
2206 ? (unsigned HOST_WIDE_INT
) d
2207 : - (unsigned HOST_WIDE_INT
) d
);
2209 /* n rem d = n rem -d */
2210 if (rhs_code
== TRUNC_MOD_EXPR
&& d
< 0)
2213 oprnd1
= build_int_cst (itype
, abs_d
);
2215 else if (HOST_BITS_PER_WIDE_INT
>= prec
2216 && abs_d
== (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2217 /* This case is not handled correctly below. */
2220 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
, &post_shift
, &dummy_int
);
2221 if (ml
>= (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2224 ml
|= (~(unsigned HOST_WIDE_INT
) 0) << (prec
- 1);
2226 if (post_shift
>= prec
)
2229 /* t1 = oprnd1 h* ml; */
2230 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2232 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR
, t1
, oprnd0
,
2233 build_int_cst (itype
, ml
));
2234 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2238 /* t2 = t1 + oprnd0; */
2239 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2241 = gimple_build_assign_with_ops (PLUS_EXPR
, t2
, t1
, oprnd0
);
2242 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2249 /* t3 = t2 >> post_shift; */
2250 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2252 = gimple_build_assign_with_ops (RSHIFT_EXPR
, t3
, t2
,
2253 build_int_cst (itype
, post_shift
));
2254 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2259 /* t4 = oprnd0 >> (prec - 1); */
2260 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2262 = gimple_build_assign_with_ops (RSHIFT_EXPR
, t4
, oprnd0
,
2263 build_int_cst (itype
, prec
- 1));
2264 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2266 /* q = t3 - t4; or q = t4 - t3; */
2267 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2269 = gimple_build_assign_with_ops (MINUS_EXPR
, q
, d
< 0 ? t4
: t3
,
2273 if (rhs_code
== TRUNC_MOD_EXPR
)
2277 /* We divided. Now finish by:
2280 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2282 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2284 = gimple_build_assign_with_ops (MULT_EXPR
, t1
, q
, oprnd1
);
2285 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2287 r
= vect_recog_temp_ssa_var (itype
, NULL
);
2289 = gimple_build_assign_with_ops (MINUS_EXPR
, r
, oprnd0
, t1
);
2292 /* Pattern detected. */
2293 if (dump_enabled_p ())
2295 dump_printf_loc (MSG_NOTE
, vect_location
,
2296 "vect_recog_divmod_pattern: detected: ");
2297 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
2298 dump_printf (MSG_NOTE
, "\n");
2301 stmts
->safe_push (last_stmt
);
2304 *type_out
= vectype
;
2305 return pattern_stmt
;
2308 /* Function vect_recog_mixed_size_cond_pattern
2310 Try to find the following pattern:
2315 S1 a_T = x_t CMP y_t ? b_T : c_T;
2317 where type 'TYPE' is an integral type which has different size
2318 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2319 than 'type', the constants need to fit into an integer type
2320 with the same width as 'type') or results of conversion from 'type'.
2324 * LAST_STMT: A stmt from which the pattern search begins.
2328 * TYPE_IN: The type of the input arguments to the pattern.
2330 * TYPE_OUT: The type of the output of this pattern.
2332 * Return value: A new stmt that will be used to replace the pattern.
2333 Additionally a def_stmt is added.
2335 a_it = x_t CMP y_t ? b_it : c_it;
2336 a_T = (TYPE) a_it; */
2339 vect_recog_mixed_size_cond_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
2342 gimple last_stmt
= (*stmts
)[0];
2343 tree cond_expr
, then_clause
, else_clause
;
2344 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
), def_stmt_info
;
2345 tree type
, vectype
, comp_vectype
, itype
= NULL_TREE
, vecitype
;
2346 enum machine_mode cmpmode
;
2347 gimple pattern_stmt
, def_stmt
;
2348 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2349 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2350 tree orig_type0
= NULL_TREE
, orig_type1
= NULL_TREE
;
2351 gimple def_stmt0
= NULL
, def_stmt1
= NULL
;
2353 tree comp_scalar_type
;
2355 if (!is_gimple_assign (last_stmt
)
2356 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
2357 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
2360 cond_expr
= gimple_assign_rhs1 (last_stmt
);
2361 then_clause
= gimple_assign_rhs2 (last_stmt
);
2362 else_clause
= gimple_assign_rhs3 (last_stmt
);
2364 if (!COMPARISON_CLASS_P (cond_expr
))
2367 comp_scalar_type
= TREE_TYPE (TREE_OPERAND (cond_expr
, 0));
2368 comp_vectype
= get_vectype_for_scalar_type (comp_scalar_type
);
2369 if (comp_vectype
== NULL_TREE
)
2372 type
= gimple_expr_type (last_stmt
);
2373 if (types_compatible_p (type
, comp_scalar_type
)
2374 || ((TREE_CODE (then_clause
) != INTEGER_CST
2375 || TREE_CODE (else_clause
) != INTEGER_CST
)
2376 && !INTEGRAL_TYPE_P (comp_scalar_type
))
2377 || !INTEGRAL_TYPE_P (type
))
2380 if ((TREE_CODE (then_clause
) != INTEGER_CST
2381 && !type_conversion_p (then_clause
, last_stmt
, false, &orig_type0
,
2382 &def_stmt0
, &promotion
))
2383 || (TREE_CODE (else_clause
) != INTEGER_CST
2384 && !type_conversion_p (else_clause
, last_stmt
, false, &orig_type1
,
2385 &def_stmt1
, &promotion
)))
2388 if (orig_type0
&& orig_type1
2389 && !types_compatible_p (orig_type0
, orig_type1
))
2394 if (!types_compatible_p (orig_type0
, comp_scalar_type
))
2396 then_clause
= gimple_assign_rhs1 (def_stmt0
);
2402 if (!types_compatible_p (orig_type1
, comp_scalar_type
))
2404 else_clause
= gimple_assign_rhs1 (def_stmt1
);
2408 cmpmode
= GET_MODE_INNER (TYPE_MODE (comp_vectype
));
2410 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) == GET_MODE_BITSIZE (cmpmode
))
2413 vectype
= get_vectype_for_scalar_type (type
);
2414 if (vectype
== NULL_TREE
)
2417 if (expand_vec_cond_expr_p (vectype
, comp_vectype
))
2420 if (itype
== NULL_TREE
)
2421 itype
= build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode
),
2422 TYPE_UNSIGNED (type
));
2424 if (itype
== NULL_TREE
2425 || GET_MODE_BITSIZE (TYPE_MODE (itype
)) != GET_MODE_BITSIZE (cmpmode
))
2428 vecitype
= get_vectype_for_scalar_type (itype
);
2429 if (vecitype
== NULL_TREE
)
2432 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
))
2435 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) > GET_MODE_BITSIZE (cmpmode
))
2437 if ((TREE_CODE (then_clause
) == INTEGER_CST
2438 && !int_fits_type_p (then_clause
, itype
))
2439 || (TREE_CODE (else_clause
) == INTEGER_CST
2440 && !int_fits_type_p (else_clause
, itype
)))
2445 = gimple_build_assign_with_ops (COND_EXPR
,
2446 vect_recog_temp_ssa_var (itype
, NULL
),
2447 unshare_expr (cond_expr
),
2448 fold_convert (itype
, then_clause
),
2449 fold_convert (itype
, else_clause
));
2451 = gimple_build_assign_with_ops (NOP_EXPR
,
2452 vect_recog_temp_ssa_var (type
, NULL
),
2453 gimple_assign_lhs (def_stmt
), NULL_TREE
);
2455 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2456 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2457 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
2458 STMT_VINFO_VECTYPE (def_stmt_info
) = vecitype
;
2459 *type_in
= vecitype
;
2460 *type_out
= vectype
;
2462 if (dump_enabled_p ())
2463 dump_printf_loc (MSG_NOTE
, vect_location
,
2464 "vect_recog_mixed_size_cond_pattern: detected:\n");
2466 return pattern_stmt
;
2470 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2471 true if bool VAR can be optimized that way. */
2474 check_bool_pattern (tree var
, loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
2477 enum vect_def_type dt
;
2479 enum tree_code rhs_code
;
2481 if (!vect_is_simple_use (var
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
, &def
,
2485 if (dt
!= vect_internal_def
)
2488 if (!is_gimple_assign (def_stmt
))
2491 if (!has_single_use (def
))
2494 rhs1
= gimple_assign_rhs1 (def_stmt
);
2495 rhs_code
= gimple_assign_rhs_code (def_stmt
);
2499 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2502 if ((TYPE_PRECISION (TREE_TYPE (rhs1
)) != 1
2503 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
2504 && TREE_CODE (TREE_TYPE (rhs1
)) != BOOLEAN_TYPE
)
2506 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2509 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2514 if (!check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
))
2516 return check_bool_pattern (gimple_assign_rhs2 (def_stmt
), loop_vinfo
,
2520 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
2522 tree vecitype
, comp_vectype
;
2524 /* If the comparison can throw, then is_gimple_condexpr will be
2525 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2526 if (stmt_could_throw_p (def_stmt
))
2529 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
2530 if (comp_vectype
== NULL_TREE
)
2533 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
2535 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
2537 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
2538 vecitype
= get_vectype_for_scalar_type (itype
);
2539 if (vecitype
== NULL_TREE
)
2543 vecitype
= comp_vectype
;
2544 return expand_vec_cond_expr_p (vecitype
, comp_vectype
);
2551 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2552 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2553 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2556 adjust_bool_pattern_cast (tree type
, tree var
)
2558 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (SSA_NAME_DEF_STMT (var
));
2559 gimple cast_stmt
, pattern_stmt
;
2561 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
));
2562 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
2563 new_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2565 = gimple_build_assign_with_ops (NOP_EXPR
,
2566 vect_recog_temp_ssa_var (type
, NULL
),
2567 gimple_assign_lhs (pattern_stmt
),
2569 STMT_VINFO_RELATED_STMT (stmt_vinfo
) = cast_stmt
;
2570 return gimple_assign_lhs (cast_stmt
);
2574 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2575 recursively. VAR is an SSA_NAME that should be transformed from bool
2576 to a wider integer type, OUT_TYPE is the desired final integer type of
2577 the whole pattern, TRUEVAL should be NULL unless optimizing
2578 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2579 in the then_clause, STMTS is where statements with added pattern stmts
2580 should be pushed to. */
2583 adjust_bool_pattern (tree var
, tree out_type
, tree trueval
,
2586 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2587 enum tree_code rhs_code
, def_rhs_code
;
2588 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
2590 gimple pattern_stmt
, def_stmt
;
2592 rhs1
= gimple_assign_rhs1 (stmt
);
2593 rhs2
= gimple_assign_rhs2 (stmt
);
2594 rhs_code
= gimple_assign_rhs_code (stmt
);
2595 loc
= gimple_location (stmt
);
2600 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2601 itype
= TREE_TYPE (irhs1
);
2603 = gimple_build_assign_with_ops (SSA_NAME
,
2604 vect_recog_temp_ssa_var (itype
, NULL
),
2609 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2610 itype
= TREE_TYPE (irhs1
);
2612 = gimple_build_assign_with_ops (BIT_XOR_EXPR
,
2613 vect_recog_temp_ssa_var (itype
, NULL
),
2614 irhs1
, build_int_cst (itype
, 1));
2618 /* Try to optimize x = y & (a < b ? 1 : 0); into
2619 x = (a < b ? y : 0);
2625 S1 a_b = x1 CMP1 y1;
2626 S2 b_b = x2 CMP2 y2;
2628 S4 d_T = (TYPE) c_b;
2630 we would normally emit:
2632 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2633 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2634 S3' c_T = a_T & b_T;
2637 but we can save one stmt by using the
2638 result of one of the COND_EXPRs in the other COND_EXPR and leave
2639 BIT_AND_EXPR stmt out:
2641 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2642 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2645 At least when VEC_COND_EXPR is implemented using masks
2646 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2647 computes the comparison masks and ands it, in one case with
2648 all ones vector, in the other case with a vector register.
2649 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2650 often more expensive. */
2651 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
2652 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
2653 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
2655 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
2656 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2657 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
2658 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
2661 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
2662 irhs2
= adjust_bool_pattern (rhs2
, out_type
, irhs1
, stmts
);
2663 tstmt
= stmts
->pop ();
2664 gcc_assert (tstmt
== def_stmt
);
2665 stmts
->quick_push (stmt
);
2666 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
2667 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
2668 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
2669 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
2673 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
2676 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
2677 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
2678 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
2680 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
2681 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
2682 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
2683 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
2686 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
2687 irhs1
= adjust_bool_pattern (rhs1
, out_type
, irhs2
, stmts
);
2688 tstmt
= stmts
->pop ();
2689 gcc_assert (tstmt
== def_stmt
);
2690 stmts
->quick_push (stmt
);
2691 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
2692 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
2693 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
2694 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
2698 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2704 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2705 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
2707 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
2708 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
2710 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
2711 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
2712 int out_prec
= TYPE_PRECISION (out_type
);
2713 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
2714 irhs2
= adjust_bool_pattern_cast (TREE_TYPE (irhs1
), rhs2
);
2715 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
2716 irhs1
= adjust_bool_pattern_cast (TREE_TYPE (irhs2
), rhs1
);
2719 irhs1
= adjust_bool_pattern_cast (out_type
, rhs1
);
2720 irhs2
= adjust_bool_pattern_cast (out_type
, rhs2
);
2723 itype
= TREE_TYPE (irhs1
);
2725 = gimple_build_assign_with_ops (rhs_code
,
2726 vect_recog_temp_ssa_var (itype
, NULL
),
2731 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
2732 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
2733 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
))
2734 || (TYPE_PRECISION (TREE_TYPE (rhs1
))
2735 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1
)))))
2737 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
2739 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
2742 itype
= TREE_TYPE (rhs1
);
2743 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
2744 if (trueval
== NULL_TREE
)
2745 trueval
= build_int_cst (itype
, 1);
2747 gcc_checking_assert (useless_type_conversion_p (itype
,
2748 TREE_TYPE (trueval
)));
2750 = gimple_build_assign_with_ops (COND_EXPR
,
2751 vect_recog_temp_ssa_var (itype
, NULL
),
2753 build_int_cst (itype
, 0));
2757 stmts
->safe_push (stmt
);
2758 gimple_set_location (pattern_stmt
, loc
);
2759 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
2760 return gimple_assign_lhs (pattern_stmt
);
2764 /* Function vect_recog_bool_pattern
2766 Try to find pattern like following:
2768 bool a_b, b_b, c_b, d_b, e_b;
2771 S1 a_b = x1 CMP1 y1;
2772 S2 b_b = x2 CMP2 y2;
2774 S4 d_b = x3 CMP3 y3;
2776 S6 f_T = (TYPE) e_b;
2778 where type 'TYPE' is an integral type.
2782 * LAST_STMT: A stmt at the end from which the pattern
2783 search begins, i.e. cast of a bool to
2788 * TYPE_IN: The type of the input arguments to the pattern.
2790 * TYPE_OUT: The type of the output of this pattern.
2792 * Return value: A new stmt that will be used to replace the pattern.
2794 Assuming size of TYPE is the same as size of all comparisons
2795 (otherwise some casts would be added where needed), the above
2796 sequence we create related pattern stmts:
2797 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2798 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2799 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2800 S5' e_T = c_T | d_T;
2803 Instead of the above S3' we could emit:
2804 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2805 S3' c_T = a_T | b_T;
2806 but the above is more efficient. */
2809 vect_recog_bool_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
2812 gimple last_stmt
= stmts
->pop ();
2813 enum tree_code rhs_code
;
2814 tree var
, lhs
, rhs
, vectype
;
2815 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2816 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2817 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2818 gimple pattern_stmt
;
2820 if (!is_gimple_assign (last_stmt
))
2823 var
= gimple_assign_rhs1 (last_stmt
);
2824 lhs
= gimple_assign_lhs (last_stmt
);
2826 if ((TYPE_PRECISION (TREE_TYPE (var
)) != 1
2827 || !TYPE_UNSIGNED (TREE_TYPE (var
)))
2828 && TREE_CODE (TREE_TYPE (var
)) != BOOLEAN_TYPE
)
2831 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2832 if (CONVERT_EXPR_CODE_P (rhs_code
))
2834 if (TREE_CODE (TREE_TYPE (lhs
)) != INTEGER_TYPE
2835 || TYPE_PRECISION (TREE_TYPE (lhs
)) == 1)
2837 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
2838 if (vectype
== NULL_TREE
)
2841 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
2844 rhs
= adjust_bool_pattern (var
, TREE_TYPE (lhs
), NULL_TREE
, stmts
);
2845 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
2846 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
2848 = gimple_build_assign_with_ops (SSA_NAME
, lhs
, rhs
, NULL_TREE
);
2851 = gimple_build_assign_with_ops (NOP_EXPR
, lhs
, rhs
, NULL_TREE
);
2852 *type_out
= vectype
;
2854 stmts
->safe_push (last_stmt
);
2855 if (dump_enabled_p ())
2856 dump_printf_loc (MSG_NOTE
, vect_location
,
2857 "vect_recog_bool_pattern: detected:\n");
2859 return pattern_stmt
;
2861 else if (rhs_code
== SSA_NAME
2862 && STMT_VINFO_DATA_REF (stmt_vinfo
))
2864 stmt_vec_info pattern_stmt_info
;
2865 vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
2866 gcc_assert (vectype
!= NULL_TREE
);
2867 if (!VECTOR_MODE_P (TYPE_MODE (vectype
)))
2869 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
2872 rhs
= adjust_bool_pattern (var
, TREE_TYPE (vectype
), NULL_TREE
, stmts
);
2873 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
2874 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
2876 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
2878 = gimple_build_assign_with_ops (NOP_EXPR
, rhs2
, rhs
, NULL_TREE
);
2879 new_pattern_def_seq (stmt_vinfo
, cast_stmt
);
2883 = gimple_build_assign_with_ops (SSA_NAME
, lhs
, rhs
, NULL_TREE
);
2884 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
,
2886 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
2887 STMT_VINFO_DATA_REF (pattern_stmt_info
)
2888 = STMT_VINFO_DATA_REF (stmt_vinfo
);
2889 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info
)
2890 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo
);
2891 STMT_VINFO_DR_INIT (pattern_stmt_info
) = STMT_VINFO_DR_INIT (stmt_vinfo
);
2892 STMT_VINFO_DR_OFFSET (pattern_stmt_info
)
2893 = STMT_VINFO_DR_OFFSET (stmt_vinfo
);
2894 STMT_VINFO_DR_STEP (pattern_stmt_info
) = STMT_VINFO_DR_STEP (stmt_vinfo
);
2895 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info
)
2896 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo
);
2897 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo
)) = pattern_stmt
;
2898 *type_out
= vectype
;
2900 stmts
->safe_push (last_stmt
);
2901 if (dump_enabled_p ())
2902 dump_printf_loc (MSG_NOTE
, vect_location
,
2903 "vect_recog_bool_pattern: detected:\n");
2904 return pattern_stmt
;
2911 /* Mark statements that are involved in a pattern. */
2914 vect_mark_pattern_stmts (gimple orig_stmt
, gimple pattern_stmt
,
2915 tree pattern_vectype
)
2917 stmt_vec_info pattern_stmt_info
, def_stmt_info
;
2918 stmt_vec_info orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
2919 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (orig_stmt_info
);
2920 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (orig_stmt_info
);
2923 pattern_stmt_info
= vinfo_for_stmt (pattern_stmt
);
2924 if (pattern_stmt_info
== NULL
)
2926 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
,
2928 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
2930 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt
));
2932 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt
;
2933 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
2934 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
2935 STMT_VINFO_VECTYPE (pattern_stmt_info
) = pattern_vectype
;
2936 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
2937 STMT_VINFO_RELATED_STMT (orig_stmt_info
) = pattern_stmt
;
2938 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
)
2939 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
2940 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
))
2942 gimple_stmt_iterator si
;
2943 for (si
= gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
));
2944 !gsi_end_p (si
); gsi_next (&si
))
2946 def_stmt
= gsi_stmt (si
);
2947 def_stmt_info
= vinfo_for_stmt (def_stmt
);
2948 if (def_stmt_info
== NULL
)
2950 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
,
2952 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
2954 gimple_set_bb (def_stmt
, gimple_bb (orig_stmt
));
2955 STMT_VINFO_RELATED_STMT (def_stmt_info
) = orig_stmt
;
2956 STMT_VINFO_DEF_TYPE (def_stmt_info
)
2957 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
2958 if (STMT_VINFO_VECTYPE (def_stmt_info
) == NULL_TREE
)
2959 STMT_VINFO_VECTYPE (def_stmt_info
) = pattern_vectype
;
2964 /* Function vect_pattern_recog_1
2967 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
2968 computation pattern.
2969 STMT: A stmt from which the pattern search should start.
2971 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
2972 expression that computes the same functionality and can be used to
2973 replace the sequence of stmts that are involved in the pattern.
2976 This function checks if the expression returned by PATTERN_RECOG_FUNC is
2977 supported in vector form by the target. We use 'TYPE_IN' to obtain the
2978 relevant vector type. If 'TYPE_IN' is already a vector type, then this
2979 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
2980 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
2981 to the available target pattern.
2983 This function also does some bookkeeping, as explained in the documentation
2984 for vect_recog_pattern. */
2987 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func
,
2988 gimple_stmt_iterator si
,
2989 vec
<gimple
> *stmts_to_replace
)
2991 gimple stmt
= gsi_stmt (si
), pattern_stmt
;
2992 stmt_vec_info stmt_info
;
2993 loop_vec_info loop_vinfo
;
2994 tree pattern_vectype
;
2995 tree type_in
, type_out
;
2996 enum tree_code code
;
3000 stmts_to_replace
->truncate (0);
3001 stmts_to_replace
->quick_push (stmt
);
3002 pattern_stmt
= (* vect_recog_func
) (stmts_to_replace
, &type_in
, &type_out
);
3006 stmt
= stmts_to_replace
->last ();
3007 stmt_info
= vinfo_for_stmt (stmt
);
3008 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3010 if (VECTOR_MODE_P (TYPE_MODE (type_in
)))
3012 /* No need to check target support (already checked by the pattern
3013 recognition function). */
3014 pattern_vectype
= type_out
? type_out
: type_in
;
3018 enum machine_mode vec_mode
;
3019 enum insn_code icode
;
3022 /* Check target support */
3023 type_in
= get_vectype_for_scalar_type (type_in
);
3027 type_out
= get_vectype_for_scalar_type (type_out
);
3032 pattern_vectype
= type_out
;
3034 if (is_gimple_assign (pattern_stmt
))
3035 code
= gimple_assign_rhs_code (pattern_stmt
);
3038 gcc_assert (is_gimple_call (pattern_stmt
));
3042 optab
= optab_for_tree_code (code
, type_in
, optab_default
);
3043 vec_mode
= TYPE_MODE (type_in
);
3045 || (icode
= optab_handler (optab
, vec_mode
)) == CODE_FOR_nothing
3046 || (insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (type_out
)))
3050 /* Found a vectorizable pattern. */
3051 if (dump_enabled_p ())
3053 dump_printf_loc (MSG_NOTE
, vect_location
,
3054 "pattern recognized: ");
3055 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3056 dump_printf (MSG_NOTE
, "\n");
3059 /* Mark the stmts that are involved in the pattern. */
3060 vect_mark_pattern_stmts (stmt
, pattern_stmt
, pattern_vectype
);
3062 /* Patterns cannot be vectorized using SLP, because they change the order of
3065 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
, next
)
3067 LOOP_VINFO_REDUCTIONS (loop_vinfo
).ordered_remove (i
);
3069 /* It is possible that additional pattern stmts are created and inserted in
3070 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3071 relevant statements. */
3072 for (i
= 0; stmts_to_replace
->iterate (i
, &stmt
)
3073 && (unsigned) i
< (stmts_to_replace
->length () - 1);
3076 stmt_info
= vinfo_for_stmt (stmt
);
3077 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
3078 if (dump_enabled_p ())
3080 dump_printf_loc (MSG_NOTE
, vect_location
,
3081 "additional pattern stmt: ");
3082 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3083 dump_printf (MSG_NOTE
, "\n");
3086 vect_mark_pattern_stmts (stmt
, pattern_stmt
, NULL_TREE
);
3091 /* Function vect_pattern_recog
3094 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3097 Output - for each computation idiom that is detected we create a new stmt
3098 that provides the same functionality and that can be vectorized. We
3099 also record some information in the struct_stmt_info of the relevant
3100 stmts, as explained below:
3102 At the entry to this function we have the following stmts, with the
3103 following initial value in the STMT_VINFO fields:
3105 stmt in_pattern_p related_stmt vec_stmt
3106 S1: a_i = .... - - -
3107 S2: a_2 = ..use(a_i).. - - -
3108 S3: a_1 = ..use(a_2).. - - -
3109 S4: a_0 = ..use(a_1).. - - -
3110 S5: ... = ..use(a_0).. - - -
3112 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3113 represented by a single stmt. We then:
3114 - create a new stmt S6 equivalent to the pattern (the stmt is not
3115 inserted into the code)
3116 - fill in the STMT_VINFO fields as follows:
3118 in_pattern_p related_stmt vec_stmt
3119 S1: a_i = .... - - -
3120 S2: a_2 = ..use(a_i).. - - -
3121 S3: a_1 = ..use(a_2).. - - -
3122 S4: a_0 = ..use(a_1).. true S6 -
3123 '---> S6: a_new = .... - S4 -
3124 S5: ... = ..use(a_0).. - - -
3126 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3127 to each other through the RELATED_STMT field).
3129 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3130 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3131 remain irrelevant unless used by stmts other than S4.
3133 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3134 (because they are marked as irrelevant). It will vectorize S6, and record
3135 a pointer to the new vector stmt VS6 from S6 (as usual).
3136 S4 will be skipped, and S5 will be vectorized as usual:
3138 in_pattern_p related_stmt vec_stmt
3139 S1: a_i = .... - - -
3140 S2: a_2 = ..use(a_i).. - - -
3141 S3: a_1 = ..use(a_2).. - - -
3142 > VS6: va_new = .... - - -
3143 S4: a_0 = ..use(a_1).. true S6 VS6
3144 '---> S6: a_new = .... - S4 VS6
3145 > VS5: ... = ..vuse(va_new).. - - -
3146 S5: ... = ..use(a_0).. - - -
3148 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3149 elsewhere), and we'll end up with:
3152 VS5: ... = ..vuse(va_new)..
3154 In case of more than one pattern statements, e.g., widen-mult with
3158 S2 a_T = (TYPE) a_t;
3159 '--> S3: a_it = (interm_type) a_t;
3160 S4 prod_T = a_T * CONST;
3161 '--> S5: prod_T' = a_it w* CONST;
3163 there may be other users of a_T outside the pattern. In that case S2 will
3164 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3165 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3166 be recorded in S3. */
3169 vect_pattern_recog (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
3174 gimple_stmt_iterator si
;
3176 vect_recog_func_ptr vect_recog_func
;
3177 vec
<gimple
> stmts_to_replace
;
3178 stmts_to_replace
.create (1);
3181 if (dump_enabled_p ())
3182 dump_printf_loc (MSG_NOTE
, vect_location
,
3183 "=== vect_pattern_recog ===\n");
3187 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3188 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
3189 nbbs
= loop
->num_nodes
;
3193 bbs
= &BB_VINFO_BB (bb_vinfo
);
3197 /* Scan through the loop stmts, applying the pattern recognition
3198 functions starting at each stmt visited: */
3199 for (i
= 0; i
< nbbs
; i
++)
3201 basic_block bb
= bbs
[i
];
3202 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
3204 if (bb_vinfo
&& (stmt
= gsi_stmt (si
))
3205 && vinfo_for_stmt (stmt
)
3206 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
3209 /* Scan over all generic vect_recog_xxx_pattern functions. */
3210 for (j
= 0; j
< NUM_PATTERNS
; j
++)
3212 vect_recog_func
= vect_vect_recog_func_ptrs
[j
];
3213 vect_pattern_recog_1 (vect_recog_func
, si
,
3219 stmts_to_replace
.release ();