1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
26 #include "stor-layout.h"
28 #include "basic-block.h"
29 #include "gimple-pretty-print.h"
30 #include "tree-ssa-alias.h"
31 #include "internal-fn.h"
33 #include "gimple-expr.h"
37 #include "gimple-iterator.h"
38 #include "gimple-ssa.h"
39 #include "tree-phinodes.h"
40 #include "ssa-iterators.h"
41 #include "stringpool.h"
42 #include "tree-ssanames.h"
47 #include "tree-data-ref.h"
48 #include "tree-vectorizer.h"
49 #include "recog.h" /* FIXME: for insn_data */
50 #include "diagnostic-core.h"
53 /* Pattern recognition functions */
54 static gimple
vect_recog_widen_sum_pattern (vec
<gimple
> *, tree
*,
56 static gimple
vect_recog_widen_mult_pattern (vec
<gimple
> *, tree
*,
58 static gimple
vect_recog_dot_prod_pattern (vec
<gimple
> *, tree
*,
60 static gimple
vect_recog_pow_pattern (vec
<gimple
> *, tree
*, tree
*);
61 static gimple
vect_recog_over_widening_pattern (vec
<gimple
> *, tree
*,
63 static gimple
vect_recog_widen_shift_pattern (vec
<gimple
> *,
65 static gimple
vect_recog_rotate_pattern (vec
<gimple
> *, tree
*, tree
*);
66 static gimple
vect_recog_vector_vector_shift_pattern (vec
<gimple
> *,
68 static gimple
vect_recog_divmod_pattern (vec
<gimple
> *,
70 static gimple
vect_recog_mixed_size_cond_pattern (vec
<gimple
> *,
72 static gimple
vect_recog_bool_pattern (vec
<gimple
> *, tree
*, tree
*);
73 static vect_recog_func_ptr vect_vect_recog_func_ptrs
[NUM_PATTERNS
] = {
74 vect_recog_widen_mult_pattern
,
75 vect_recog_widen_sum_pattern
,
76 vect_recog_dot_prod_pattern
,
77 vect_recog_pow_pattern
,
78 vect_recog_widen_shift_pattern
,
79 vect_recog_over_widening_pattern
,
80 vect_recog_rotate_pattern
,
81 vect_recog_vector_vector_shift_pattern
,
82 vect_recog_divmod_pattern
,
83 vect_recog_mixed_size_cond_pattern
,
84 vect_recog_bool_pattern
};
87 append_pattern_def_seq (stmt_vec_info stmt_info
, gimple stmt
)
89 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
94 new_pattern_def_seq (stmt_vec_info stmt_info
, gimple stmt
)
96 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
97 append_pattern_def_seq (stmt_info
, stmt
);
100 /* Check whether STMT2 is in the same loop or basic block as STMT1.
101 Which of the two applies depends on whether we're currently doing
102 loop-based or basic-block-based vectorization, as determined by
103 the vinfo_for_stmt for STMT1 (which must be defined).
105 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
106 to be defined as well. */
109 vect_same_loop_or_bb_p (gimple stmt1
, gimple stmt2
)
111 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt1
);
112 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
113 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
115 if (!gimple_bb (stmt2
))
120 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
121 if (!flow_bb_inside_loop_p (loop
, gimple_bb (stmt2
)))
126 if (gimple_bb (stmt2
) != BB_VINFO_BB (bb_vinfo
)
127 || gimple_code (stmt2
) == GIMPLE_PHI
)
131 gcc_assert (vinfo_for_stmt (stmt2
));
135 /* If the LHS of DEF_STMT has a single use, and that statement is
136 in the same loop or basic block, return it. */
139 vect_single_imm_use (gimple def_stmt
)
141 tree lhs
= gimple_assign_lhs (def_stmt
);
145 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
148 if (!vect_same_loop_or_bb_p (def_stmt
, use_stmt
))
154 /* Check whether NAME, an ssa-name used in USE_STMT,
155 is a result of a type promotion or demotion, such that:
156 DEF_STMT: NAME = NOP (name0)
157 where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
158 If CHECK_SIGN is TRUE, check that either both types are signed or both are
162 type_conversion_p (tree name
, gimple use_stmt
, bool check_sign
,
163 tree
*orig_type
, gimple
*def_stmt
, bool *promotion
)
167 loop_vec_info loop_vinfo
;
168 stmt_vec_info stmt_vinfo
;
169 tree type
= TREE_TYPE (name
);
171 enum vect_def_type dt
;
173 bb_vec_info bb_vinfo
;
175 stmt_vinfo
= vinfo_for_stmt (use_stmt
);
176 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
177 bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
178 if (!vect_is_simple_use (name
, use_stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
182 if (dt
!= vect_internal_def
183 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
189 if (!is_gimple_assign (*def_stmt
))
192 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt
)))
195 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
197 *orig_type
= TREE_TYPE (oprnd0
);
198 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*orig_type
)
199 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*orig_type
)) && check_sign
))
202 if (TYPE_PRECISION (type
) >= (TYPE_PRECISION (*orig_type
) * 2))
204 else if (TYPE_PRECISION (*orig_type
) >= (TYPE_PRECISION (type
) * 2))
209 if (!vect_is_simple_use (oprnd0
, *def_stmt
, loop_vinfo
,
210 bb_vinfo
, &dummy_gimple
, &dummy
, &dt
))
216 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
217 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
220 vect_recog_temp_ssa_var (tree type
, gimple stmt
)
222 return make_temp_ssa_name (type
, stmt
, "patt");
225 /* Function vect_recog_dot_prod_pattern
227 Try to find the following pattern:
233 sum_0 = phi <init, sum_1>
236 S3 x_T = (TYPE1) x_t;
237 S4 y_T = (TYPE1) y_t;
239 [S6 prod = (TYPE2) prod; #optional]
240 S7 sum_1 = prod + sum_0;
242 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
243 same size of 'TYPE1' or bigger. This is a special case of a reduction
248 * STMTS: Contains a stmt from which the pattern search begins. In the
249 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
254 * TYPE_IN: The type of the input arguments to the pattern.
256 * TYPE_OUT: The type of the output of this pattern.
258 * Return value: A new stmt that will be used to replace the sequence of
259 stmts that constitute the pattern. In this case it will be:
260 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
262 Note: The dot-prod idiom is a widening reduction pattern that is
263 vectorized without preserving all the intermediate results. It
264 produces only N/2 (widened) results (by summing up pairs of
265 intermediate results) rather than all N results. Therefore, we
266 cannot allow this pattern when we want to get all the results and in
267 the correct order (as is the case when this computation is in an
268 inner-loop nested in an outer-loop that us being vectorized). */
271 vect_recog_dot_prod_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
274 gimple stmt
, last_stmt
= (*stmts
)[0];
276 tree oprnd00
, oprnd01
;
277 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
278 tree type
, half_type
;
281 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
289 loop
= LOOP_VINFO_LOOP (loop_info
);
291 if (!is_gimple_assign (last_stmt
))
294 type
= gimple_expr_type (last_stmt
);
296 /* Look for the following pattern
300 DDPROD = (TYPE2) DPROD;
301 sum_1 = DDPROD + sum_0;
303 - DX is double the size of X
304 - DY is double the size of Y
305 - DX, DY, DPROD all have the same type
306 - sum is the same size of DPROD or bigger
307 - sum has been recognized as a reduction variable.
309 This is equivalent to:
310 DPROD = X w* Y; #widen mult
311 sum_1 = DPROD w+ sum_0; #widen summation
313 DPROD = X w* Y; #widen mult
314 sum_1 = DPROD + sum_0; #summation
317 /* Starting from LAST_STMT, follow the defs of its uses in search
318 of the above pattern. */
320 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
323 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
325 /* Has been detected as widening-summation? */
327 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
328 type
= gimple_expr_type (stmt
);
329 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
331 oprnd0
= gimple_assign_rhs1 (stmt
);
332 oprnd1
= gimple_assign_rhs2 (stmt
);
333 half_type
= TREE_TYPE (oprnd0
);
339 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
)
341 oprnd0
= gimple_assign_rhs1 (last_stmt
);
342 oprnd1
= gimple_assign_rhs2 (last_stmt
);
343 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
344 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
348 if (type_conversion_p (oprnd0
, stmt
, true, &half_type
, &def_stmt
,
353 oprnd0
= gimple_assign_rhs1 (stmt
);
359 /* So far so good. Since last_stmt was detected as a (summation) reduction,
360 we know that oprnd1 is the reduction variable (defined by a loop-header
361 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
362 Left to check that oprnd0 is defined by a (widen_)mult_expr */
363 if (TREE_CODE (oprnd0
) != SSA_NAME
)
366 prod_type
= half_type
;
367 stmt
= SSA_NAME_DEF_STMT (oprnd0
);
369 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
370 if (!gimple_bb (stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
373 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
374 inside the loop (in case we are analyzing an outer-loop). */
375 if (!is_gimple_assign (stmt
))
377 stmt_vinfo
= vinfo_for_stmt (stmt
);
378 gcc_assert (stmt_vinfo
);
379 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
381 if (gimple_assign_rhs_code (stmt
) != MULT_EXPR
)
383 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
385 /* Has been detected as a widening multiplication? */
387 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
388 if (gimple_assign_rhs_code (stmt
) != WIDEN_MULT_EXPR
)
390 stmt_vinfo
= vinfo_for_stmt (stmt
);
391 gcc_assert (stmt_vinfo
);
392 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_internal_def
);
393 oprnd00
= gimple_assign_rhs1 (stmt
);
394 oprnd01
= gimple_assign_rhs2 (stmt
);
398 tree half_type0
, half_type1
;
402 oprnd0
= gimple_assign_rhs1 (stmt
);
403 oprnd1
= gimple_assign_rhs2 (stmt
);
404 if (!types_compatible_p (TREE_TYPE (oprnd0
), prod_type
)
405 || !types_compatible_p (TREE_TYPE (oprnd1
), prod_type
))
407 if (!type_conversion_p (oprnd0
, stmt
, true, &half_type0
, &def_stmt
,
411 oprnd00
= gimple_assign_rhs1 (def_stmt
);
412 if (!type_conversion_p (oprnd1
, stmt
, true, &half_type1
, &def_stmt
,
416 oprnd01
= gimple_assign_rhs1 (def_stmt
);
417 if (!types_compatible_p (half_type0
, half_type1
))
419 if (TYPE_PRECISION (prod_type
) != TYPE_PRECISION (half_type0
) * 2)
423 half_type
= TREE_TYPE (oprnd00
);
424 *type_in
= half_type
;
427 /* Pattern detected. Create a stmt to be used to replace the pattern: */
428 var
= vect_recog_temp_ssa_var (type
, NULL
);
429 pattern_stmt
= gimple_build_assign_with_ops (DOT_PROD_EXPR
, var
,
430 oprnd00
, oprnd01
, oprnd1
);
432 if (dump_enabled_p ())
434 dump_printf_loc (MSG_NOTE
, vect_location
,
435 "vect_recog_dot_prod_pattern: detected: ");
436 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
437 dump_printf (MSG_NOTE
, "\n");
440 /* We don't allow changing the order of the computation in the inner-loop
441 when doing outer-loop vectorization. */
442 gcc_assert (!nested_in_vect_loop_p (loop
, last_stmt
));
448 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
451 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
452 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
454 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
455 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
456 that satisfies the above restrictions, we can perform a widening opeartion
457 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
458 with a_it = (interm_type) a_t; */
461 vect_handle_widen_op_by_const (gimple stmt
, enum tree_code code
,
462 tree const_oprnd
, tree
*oprnd
,
463 vec
<gimple
> *stmts
, tree type
,
464 tree
*half_type
, gimple def_stmt
)
466 tree new_type
, new_oprnd
;
469 if (code
!= MULT_EXPR
&& code
!= LSHIFT_EXPR
)
472 if (((code
== MULT_EXPR
&& int_fits_type_p (const_oprnd
, *half_type
))
473 || (code
== LSHIFT_EXPR
474 && compare_tree_int (const_oprnd
, TYPE_PRECISION (*half_type
))
476 && TYPE_PRECISION (type
) == (TYPE_PRECISION (*half_type
) * 2))
478 /* CONST_OPRND is a constant of HALF_TYPE. */
479 *oprnd
= gimple_assign_rhs1 (def_stmt
);
483 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (*half_type
) * 4))
486 if (!vect_same_loop_or_bb_p (stmt
, def_stmt
))
489 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
490 a type 2 times bigger than HALF_TYPE. */
491 new_type
= build_nonstandard_integer_type (TYPE_PRECISION (type
) / 2,
492 TYPE_UNSIGNED (type
));
493 if ((code
== MULT_EXPR
&& !int_fits_type_p (const_oprnd
, new_type
))
494 || (code
== LSHIFT_EXPR
495 && compare_tree_int (const_oprnd
, TYPE_PRECISION (new_type
)) == 1))
498 /* Use NEW_TYPE for widening operation. */
499 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
501 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
502 /* Check if the already created pattern stmt is what we need. */
503 if (!is_gimple_assign (new_stmt
)
504 || gimple_assign_rhs_code (new_stmt
) != NOP_EXPR
505 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != new_type
)
508 stmts
->safe_push (def_stmt
);
509 *oprnd
= gimple_assign_lhs (new_stmt
);
513 /* Create a_T = (NEW_TYPE) a_t; */
514 *oprnd
= gimple_assign_rhs1 (def_stmt
);
515 new_oprnd
= make_ssa_name (new_type
, NULL
);
516 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
, *oprnd
,
518 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
519 stmts
->safe_push (def_stmt
);
523 *half_type
= new_type
;
528 /* Function vect_recog_widen_mult_pattern
530 Try to find the following pattern:
533 TYPE a_T, b_T, prod_T;
539 S5 prod_T = a_T * b_T;
541 where type 'TYPE' is at least double the size of type 'type'.
543 Also detect unsigned cases:
545 unsigned type a_t, b_t;
546 unsigned TYPE u_prod_T;
547 TYPE a_T, b_T, prod_T;
553 S5 prod_T = a_T * b_T;
554 S6 u_prod_T = (unsigned TYPE) prod_T;
556 and multiplication by constants:
563 S5 prod_T = a_T * CONST;
565 A special case of multiplication by constants is when 'TYPE' is 4 times
566 bigger than 'type', but CONST fits an intermediate type 2 times smaller
567 than 'TYPE'. In that case we create an additional pattern stmt for S3
568 to create a variable of the intermediate type, and perform widen-mult
569 on the intermediate type as well:
573 TYPE a_T, prod_T, prod_T';
577 '--> a_it = (interm_type) a_t;
578 S5 prod_T = a_T * CONST;
579 '--> prod_T' = a_it w* CONST;
583 * STMTS: Contains a stmt from which the pattern search begins. In the
584 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
585 is detected. In case of unsigned widen-mult, the original stmt (S5) is
586 replaced with S6 in STMTS. In case of multiplication by a constant
587 of an intermediate type (the last case above), STMTS also contains S3
588 (inserted before S5).
592 * TYPE_IN: The type of the input arguments to the pattern.
594 * TYPE_OUT: The type of the output of this pattern.
596 * Return value: A new stmt that will be used to replace the sequence of
597 stmts that constitute the pattern. In this case it will be:
598 WIDEN_MULT <a_t, b_t>
602 vect_recog_widen_mult_pattern (vec
<gimple
> *stmts
,
603 tree
*type_in
, tree
*type_out
)
605 gimple last_stmt
= stmts
->pop ();
606 gimple def_stmt0
, def_stmt1
;
608 tree type
, half_type0
, half_type1
;
610 tree vectype
, vectype_out
= NULL_TREE
;
612 enum tree_code dummy_code
;
618 if (!is_gimple_assign (last_stmt
))
621 type
= gimple_expr_type (last_stmt
);
623 /* Starting from LAST_STMT, follow the defs of its uses in search
624 of the above pattern. */
626 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
629 oprnd0
= gimple_assign_rhs1 (last_stmt
);
630 oprnd1
= gimple_assign_rhs2 (last_stmt
);
631 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
632 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
635 /* Check argument 0. */
636 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
640 /* Check argument 1. */
641 op1_ok
= type_conversion_p (oprnd1
, last_stmt
, false, &half_type1
,
642 &def_stmt1
, &promotion
);
644 if (op1_ok
&& promotion
)
646 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
647 oprnd1
= gimple_assign_rhs1 (def_stmt1
);
651 if (TREE_CODE (oprnd1
) == INTEGER_CST
652 && TREE_CODE (half_type0
) == INTEGER_TYPE
653 && vect_handle_widen_op_by_const (last_stmt
, MULT_EXPR
, oprnd1
,
654 &oprnd0
, stmts
, type
,
655 &half_type0
, def_stmt0
))
657 half_type1
= half_type0
;
658 oprnd1
= fold_convert (half_type1
, oprnd1
);
664 /* Handle unsigned case. Look for
665 S6 u_prod_T = (unsigned TYPE) prod_T;
666 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
667 if (TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type0
))
673 if (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (half_type1
))
676 use_stmt
= vect_single_imm_use (last_stmt
);
677 if (!use_stmt
|| !is_gimple_assign (use_stmt
)
678 || gimple_assign_rhs_code (use_stmt
) != NOP_EXPR
)
681 use_lhs
= gimple_assign_lhs (use_stmt
);
682 use_type
= TREE_TYPE (use_lhs
);
683 if (!INTEGRAL_TYPE_P (use_type
)
684 || (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (use_type
))
685 || (TYPE_PRECISION (type
) != TYPE_PRECISION (use_type
)))
689 last_stmt
= use_stmt
;
692 if (!types_compatible_p (half_type0
, half_type1
))
695 /* Pattern detected. */
696 if (dump_enabled_p ())
697 dump_printf_loc (MSG_NOTE
, vect_location
,
698 "vect_recog_widen_mult_pattern: detected:\n");
700 /* Check target support */
701 vectype
= get_vectype_for_scalar_type (half_type0
);
702 vectype_out
= get_vectype_for_scalar_type (type
);
705 || !supportable_widening_operation (WIDEN_MULT_EXPR
, last_stmt
,
706 vectype_out
, vectype
,
707 &dummy_code
, &dummy_code
,
708 &dummy_int
, &dummy_vec
))
712 *type_out
= vectype_out
;
714 /* Pattern supported. Create a stmt to be used to replace the pattern: */
715 var
= vect_recog_temp_ssa_var (type
, NULL
);
716 pattern_stmt
= gimple_build_assign_with_ops (WIDEN_MULT_EXPR
, var
, oprnd0
,
719 if (dump_enabled_p ())
720 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
722 stmts
->safe_push (last_stmt
);
727 /* Function vect_recog_pow_pattern
729 Try to find the following pattern:
733 with POW being one of pow, powf, powi, powif and N being
738 * LAST_STMT: A stmt from which the pattern search begins.
742 * TYPE_IN: The type of the input arguments to the pattern.
744 * TYPE_OUT: The type of the output of this pattern.
746 * Return value: A new stmt that will be used to replace the sequence of
747 stmts that constitute the pattern. In this case it will be:
754 vect_recog_pow_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
757 gimple last_stmt
= (*stmts
)[0];
758 tree fn
, base
, exp
= NULL
;
762 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
765 fn
= gimple_call_fndecl (last_stmt
);
766 if (fn
== NULL_TREE
|| DECL_BUILT_IN_CLASS (fn
) != BUILT_IN_NORMAL
)
769 switch (DECL_FUNCTION_CODE (fn
))
775 base
= gimple_call_arg (last_stmt
, 0);
776 exp
= gimple_call_arg (last_stmt
, 1);
777 if (TREE_CODE (exp
) != REAL_CST
778 && TREE_CODE (exp
) != INTEGER_CST
)
786 /* We now have a pow or powi builtin function call with a constant
789 *type_out
= NULL_TREE
;
791 /* Catch squaring. */
792 if ((tree_fits_shwi_p (exp
)
793 && tree_to_shwi (exp
) == 2)
794 || (TREE_CODE (exp
) == REAL_CST
795 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconst2
)))
797 *type_in
= TREE_TYPE (base
);
799 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
800 stmt
= gimple_build_assign_with_ops (MULT_EXPR
, var
, base
, base
);
804 /* Catch square root. */
805 if (TREE_CODE (exp
) == REAL_CST
806 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconsthalf
))
808 tree newfn
= mathfn_built_in (TREE_TYPE (base
), BUILT_IN_SQRT
);
809 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (base
));
812 gimple stmt
= gimple_build_call (newfn
, 1, base
);
813 if (vectorizable_function (stmt
, *type_in
, *type_in
)
816 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
817 gimple_call_set_lhs (stmt
, var
);
827 /* Function vect_recog_widen_sum_pattern
829 Try to find the following pattern:
832 TYPE x_T, sum = init;
834 sum_0 = phi <init, sum_1>
837 S3 sum_1 = x_T + sum_0;
839 where type 'TYPE' is at least double the size of type 'type', i.e - we're
840 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
841 a special case of a reduction computation.
845 * LAST_STMT: A stmt from which the pattern search begins. In the example,
846 when this function is called with S3, the pattern {S2,S3} will be detected.
850 * TYPE_IN: The type of the input arguments to the pattern.
852 * TYPE_OUT: The type of the output of this pattern.
854 * Return value: A new stmt that will be used to replace the sequence of
855 stmts that constitute the pattern. In this case it will be:
856 WIDEN_SUM <x_t, sum_0>
858 Note: The widening-sum idiom is a widening reduction pattern that is
859 vectorized without preserving all the intermediate results. It
860 produces only N/2 (widened) results (by summing up pairs of
861 intermediate results) rather than all N results. Therefore, we
862 cannot allow this pattern when we want to get all the results and in
863 the correct order (as is the case when this computation is in an
864 inner-loop nested in an outer-loop that us being vectorized). */
867 vect_recog_widen_sum_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
870 gimple stmt
, last_stmt
= (*stmts
)[0];
872 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
873 tree type
, half_type
;
875 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
883 loop
= LOOP_VINFO_LOOP (loop_info
);
885 if (!is_gimple_assign (last_stmt
))
888 type
= gimple_expr_type (last_stmt
);
890 /* Look for the following pattern
893 In which DX is at least double the size of X, and sum_1 has been
894 recognized as a reduction variable.
897 /* Starting from LAST_STMT, follow the defs of its uses in search
898 of the above pattern. */
900 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
903 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
)
906 oprnd0
= gimple_assign_rhs1 (last_stmt
);
907 oprnd1
= gimple_assign_rhs2 (last_stmt
);
908 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
909 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
912 /* So far so good. Since last_stmt was detected as a (summation) reduction,
913 we know that oprnd1 is the reduction variable (defined by a loop-header
914 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
915 Left to check that oprnd0 is defined by a cast from type 'type' to type
918 if (!type_conversion_p (oprnd0
, last_stmt
, true, &half_type
, &stmt
,
923 oprnd0
= gimple_assign_rhs1 (stmt
);
924 *type_in
= half_type
;
927 /* Pattern detected. Create a stmt to be used to replace the pattern: */
928 var
= vect_recog_temp_ssa_var (type
, NULL
);
929 pattern_stmt
= gimple_build_assign_with_ops (WIDEN_SUM_EXPR
, var
,
932 if (dump_enabled_p ())
934 dump_printf_loc (MSG_NOTE
, vect_location
,
935 "vect_recog_widen_sum_pattern: detected: ");
936 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
937 dump_printf (MSG_NOTE
, "\n");
940 /* We don't allow changing the order of the computation in the inner-loop
941 when doing outer-loop vectorization. */
942 gcc_assert (!nested_in_vect_loop_p (loop
, last_stmt
));
948 /* Return TRUE if the operation in STMT can be performed on a smaller type.
951 STMT - a statement to check.
952 DEF - we support operations with two operands, one of which is constant.
953 The other operand can be defined by a demotion operation, or by a
954 previous statement in a sequence of over-promoted operations. In the
955 later case DEF is used to replace that operand. (It is defined by a
956 pattern statement we created for the previous statement in the
960 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
961 NULL, it's the type of DEF.
962 STMTS - additional pattern statements. If a pattern statement (type
963 conversion) is created in this function, its original statement is
967 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
968 operands to use in the new pattern statement for STMT (will be created
969 in vect_recog_over_widening_pattern ()).
970 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
971 statements for STMT: the first one is a type promotion and the second
972 one is the operation itself. We return the type promotion statement
973 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
974 the second pattern statement. */
977 vect_operation_fits_smaller_type (gimple stmt
, tree def
, tree
*new_type
,
978 tree
*op0
, tree
*op1
, gimple
*new_def_stmt
,
982 tree const_oprnd
, oprnd
;
983 tree interm_type
= NULL_TREE
, half_type
, new_oprnd
, type
;
984 gimple def_stmt
, new_stmt
;
990 *new_def_stmt
= NULL
;
992 if (!is_gimple_assign (stmt
))
995 code
= gimple_assign_rhs_code (stmt
);
996 if (code
!= LSHIFT_EXPR
&& code
!= RSHIFT_EXPR
997 && code
!= BIT_IOR_EXPR
&& code
!= BIT_XOR_EXPR
&& code
!= BIT_AND_EXPR
)
1000 oprnd
= gimple_assign_rhs1 (stmt
);
1001 const_oprnd
= gimple_assign_rhs2 (stmt
);
1002 type
= gimple_expr_type (stmt
);
1004 if (TREE_CODE (oprnd
) != SSA_NAME
1005 || TREE_CODE (const_oprnd
) != INTEGER_CST
)
1008 /* If oprnd has other uses besides that in stmt we cannot mark it
1009 as being part of a pattern only. */
1010 if (!has_single_use (oprnd
))
1013 /* If we are in the middle of a sequence, we use DEF from a previous
1014 statement. Otherwise, OPRND has to be a result of type promotion. */
1017 half_type
= *new_type
;
1023 if (!type_conversion_p (oprnd
, stmt
, false, &half_type
, &def_stmt
,
1026 || !vect_same_loop_or_bb_p (stmt
, def_stmt
))
1030 /* Can we perform the operation on a smaller type? */
1036 if (!int_fits_type_p (const_oprnd
, half_type
))
1038 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1039 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1042 interm_type
= build_nonstandard_integer_type (
1043 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1044 if (!int_fits_type_p (const_oprnd
, interm_type
))
1051 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1052 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1055 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1056 (e.g., if the original value was char, the shift amount is at most 8
1057 if we want to use short). */
1058 if (compare_tree_int (const_oprnd
, TYPE_PRECISION (half_type
)) == 1)
1061 interm_type
= build_nonstandard_integer_type (
1062 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1064 if (!vect_supportable_shift (code
, interm_type
))
1070 if (vect_supportable_shift (code
, half_type
))
1073 /* Try intermediate type - HALF_TYPE is not supported. */
1074 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1077 interm_type
= build_nonstandard_integer_type (
1078 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1080 if (!vect_supportable_shift (code
, interm_type
))
1089 /* There are four possible cases:
1090 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1091 the first statement in the sequence)
1092 a. The original, HALF_TYPE, is not enough - we replace the promotion
1093 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1094 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1096 2. OPRND is defined by a pattern statement we created.
1097 a. Its type is not sufficient for the operation, we create a new stmt:
1098 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1099 this statement in NEW_DEF_STMT, and it is later put in
1100 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1101 b. OPRND is good to use in the new statement. */
1106 /* Replace the original type conversion HALF_TYPE->TYPE with
1107 HALF_TYPE->INTERM_TYPE. */
1108 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
1110 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
1111 /* Check if the already created pattern stmt is what we need. */
1112 if (!is_gimple_assign (new_stmt
)
1113 || gimple_assign_rhs_code (new_stmt
) != NOP_EXPR
1114 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != interm_type
)
1117 stmts
->safe_push (def_stmt
);
1118 oprnd
= gimple_assign_lhs (new_stmt
);
1122 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1123 oprnd
= gimple_assign_rhs1 (def_stmt
);
1124 new_oprnd
= make_ssa_name (interm_type
, NULL
);
1125 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
1127 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
1128 stmts
->safe_push (def_stmt
);
1134 /* Retrieve the operand before the type promotion. */
1135 oprnd
= gimple_assign_rhs1 (def_stmt
);
1142 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1143 new_oprnd
= make_ssa_name (interm_type
, NULL
);
1144 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
1147 *new_def_stmt
= new_stmt
;
1150 /* Otherwise, OPRND is already set. */
1154 *new_type
= interm_type
;
1156 *new_type
= half_type
;
1159 *op1
= fold_convert (*new_type
, const_oprnd
);
1165 /* Try to find a statement or a sequence of statements that can be performed
1169 TYPE x_T, res0_T, res1_T;
1172 S2 x_T = (TYPE) x_t;
1173 S3 res0_T = op (x_T, C0);
1174 S4 res1_T = op (res0_T, C1);
1175 S5 ... = () res1_T; - type demotion
1177 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1179 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1180 be 'type' or some intermediate type. For now, we expect S5 to be a type
1181 demotion operation. We also check that S3 and S4 have only one use. */
1184 vect_recog_over_widening_pattern (vec
<gimple
> *stmts
,
1185 tree
*type_in
, tree
*type_out
)
1187 gimple stmt
= stmts
->pop ();
1188 gimple pattern_stmt
= NULL
, new_def_stmt
, prev_stmt
= NULL
, use_stmt
= NULL
;
1189 tree op0
, op1
, vectype
= NULL_TREE
, use_lhs
, use_type
;
1190 tree var
= NULL_TREE
, new_type
= NULL_TREE
, new_oprnd
;
1197 if (!vinfo_for_stmt (stmt
)
1198 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt
)))
1201 new_def_stmt
= NULL
;
1202 if (!vect_operation_fits_smaller_type (stmt
, var
, &new_type
,
1203 &op0
, &op1
, &new_def_stmt
,
1212 /* STMT can be performed on a smaller type. Check its uses. */
1213 use_stmt
= vect_single_imm_use (stmt
);
1214 if (!use_stmt
|| !is_gimple_assign (use_stmt
))
1217 /* Create pattern statement for STMT. */
1218 vectype
= get_vectype_for_scalar_type (new_type
);
1222 /* We want to collect all the statements for which we create pattern
1223 statetments, except for the case when the last statement in the
1224 sequence doesn't have a corresponding pattern statement. In such
1225 case we associate the last pattern statement with the last statement
1226 in the sequence. Therefore, we only add the original statement to
1227 the list if we know that it is not the last. */
1229 stmts
->safe_push (prev_stmt
);
1231 var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1233 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt
), var
,
1235 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
1236 new_pattern_def_seq (vinfo_for_stmt (stmt
), new_def_stmt
);
1238 if (dump_enabled_p ())
1240 dump_printf_loc (MSG_NOTE
, vect_location
,
1241 "created pattern stmt: ");
1242 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1243 dump_printf (MSG_NOTE
, "\n");
1246 type
= gimple_expr_type (stmt
);
1253 /* We got a sequence. We expect it to end with a type demotion operation.
1254 Otherwise, we quit (for now). There are three possible cases: the
1255 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1256 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1257 NEW_TYPE differs (we create a new conversion statement). */
1258 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
1260 use_lhs
= gimple_assign_lhs (use_stmt
);
1261 use_type
= TREE_TYPE (use_lhs
);
1262 /* Support only type demotion or signedess change. */
1263 if (!INTEGRAL_TYPE_P (use_type
)
1264 || TYPE_PRECISION (type
) <= TYPE_PRECISION (use_type
))
1267 /* Check that NEW_TYPE is not bigger than the conversion result. */
1268 if (TYPE_PRECISION (new_type
) > TYPE_PRECISION (use_type
))
1271 if (TYPE_UNSIGNED (new_type
) != TYPE_UNSIGNED (use_type
)
1272 || TYPE_PRECISION (new_type
) != TYPE_PRECISION (use_type
))
1274 /* Create NEW_TYPE->USE_TYPE conversion. */
1275 new_oprnd
= make_ssa_name (use_type
, NULL
);
1276 pattern_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
1278 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt
)) = pattern_stmt
;
1280 *type_in
= get_vectype_for_scalar_type (new_type
);
1281 *type_out
= get_vectype_for_scalar_type (use_type
);
1283 /* We created a pattern statement for the last statement in the
1284 sequence, so we don't need to associate it with the pattern
1285 statement created for PREV_STMT. Therefore, we add PREV_STMT
1286 to the list in order to mark it later in vect_pattern_recog_1. */
1288 stmts
->safe_push (prev_stmt
);
1293 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt
))
1294 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt
));
1297 *type_out
= NULL_TREE
;
1300 stmts
->safe_push (use_stmt
);
1303 /* TODO: support general case, create a conversion to the correct type. */
1306 /* Pattern detected. */
1307 if (dump_enabled_p ())
1309 dump_printf_loc (MSG_NOTE
, vect_location
,
1310 "vect_recog_over_widening_pattern: detected: ");
1311 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1312 dump_printf (MSG_NOTE
, "\n");
1315 return pattern_stmt
;
1318 /* Detect widening shift pattern:
1324 S2 a_T = (TYPE) a_t;
1325 S3 res_T = a_T << CONST;
1327 where type 'TYPE' is at least double the size of type 'type'.
1329 Also detect cases where the shift result is immediately converted
1330 to another type 'result_type' that is no larger in size than 'TYPE'.
1331 In those cases we perform a widen-shift that directly results in
1332 'result_type', to avoid a possible over-widening situation:
1336 result_type res_result;
1339 S2 a_T = (TYPE) a_t;
1340 S3 res_T = a_T << CONST;
1341 S4 res_result = (result_type) res_T;
1342 '--> res_result' = a_t w<< CONST;
1344 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1345 create an additional pattern stmt for S2 to create a variable of an
1346 intermediate type, and perform widen-shift on the intermediate type:
1350 TYPE a_T, res_T, res_T';
1353 S2 a_T = (TYPE) a_t;
1354 '--> a_it = (interm_type) a_t;
1355 S3 res_T = a_T << CONST;
1356 '--> res_T' = a_it <<* CONST;
1360 * STMTS: Contains a stmt from which the pattern search begins.
1361 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1362 in STMTS. When an intermediate type is used and a pattern statement is
1363 created for S2, we also put S2 here (before S3).
1367 * TYPE_IN: The type of the input arguments to the pattern.
1369 * TYPE_OUT: The type of the output of this pattern.
1371 * Return value: A new stmt that will be used to replace the sequence of
1372 stmts that constitute the pattern. In this case it will be:
1373 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1376 vect_recog_widen_shift_pattern (vec
<gimple
> *stmts
,
1377 tree
*type_in
, tree
*type_out
)
1379 gimple last_stmt
= stmts
->pop ();
1381 tree oprnd0
, oprnd1
;
1382 tree type
, half_type0
;
1383 gimple pattern_stmt
;
1384 tree vectype
, vectype_out
= NULL_TREE
;
1386 enum tree_code dummy_code
;
1388 vec
<tree
> dummy_vec
;
1392 if (!is_gimple_assign (last_stmt
) || !vinfo_for_stmt (last_stmt
))
1395 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt
)))
1398 if (gimple_assign_rhs_code (last_stmt
) != LSHIFT_EXPR
)
1401 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1402 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1403 if (TREE_CODE (oprnd0
) != SSA_NAME
|| TREE_CODE (oprnd1
) != INTEGER_CST
)
1406 /* Check operand 0: it has to be defined by a type promotion. */
1407 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
1412 /* Check operand 1: has to be positive. We check that it fits the type
1413 in vect_handle_widen_op_by_const (). */
1414 if (tree_int_cst_compare (oprnd1
, size_zero_node
) <= 0)
1417 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
1418 type
= gimple_expr_type (last_stmt
);
1420 /* Check for subsequent conversion to another type. */
1421 use_stmt
= vect_single_imm_use (last_stmt
);
1422 if (use_stmt
&& is_gimple_assign (use_stmt
)
1423 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
))
1424 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
1426 tree use_lhs
= gimple_assign_lhs (use_stmt
);
1427 tree use_type
= TREE_TYPE (use_lhs
);
1429 if (INTEGRAL_TYPE_P (use_type
)
1430 && TYPE_PRECISION (use_type
) <= TYPE_PRECISION (type
))
1432 last_stmt
= use_stmt
;
1437 /* Check if this a widening operation. */
1438 if (!vect_handle_widen_op_by_const (last_stmt
, LSHIFT_EXPR
, oprnd1
,
1440 type
, &half_type0
, def_stmt0
))
1443 /* Pattern detected. */
1444 if (dump_enabled_p ())
1445 dump_printf_loc (MSG_NOTE
, vect_location
,
1446 "vect_recog_widen_shift_pattern: detected:\n");
1448 /* Check target support. */
1449 vectype
= get_vectype_for_scalar_type (half_type0
);
1450 vectype_out
= get_vectype_for_scalar_type (type
);
1454 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR
, last_stmt
,
1455 vectype_out
, vectype
,
1456 &dummy_code
, &dummy_code
,
1457 &dummy_int
, &dummy_vec
))
1461 *type_out
= vectype_out
;
1463 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1464 var
= vect_recog_temp_ssa_var (type
, NULL
);
1466 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR
, var
, oprnd0
, oprnd1
);
1468 if (dump_enabled_p ())
1469 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1471 stmts
->safe_push (last_stmt
);
1472 return pattern_stmt
;
1475 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1479 S0 a_t = b_t r<< c_t;
1483 * STMTS: Contains a stmt from which the pattern search begins,
1484 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1488 S2 e_t = d_t & (B - 1);
1489 S3 f_t = b_t << c_t;
1490 S4 g_t = b_t >> e_t;
1493 where B is element bitsize of type.
1497 * TYPE_IN: The type of the input arguments to the pattern.
1499 * TYPE_OUT: The type of the output of this pattern.
1501 * Return value: A new stmt that will be used to replace the rotate
1505 vect_recog_rotate_pattern (vec
<gimple
> *stmts
, tree
*type_in
, tree
*type_out
)
1507 gimple last_stmt
= stmts
->pop ();
1508 tree oprnd0
, oprnd1
, lhs
, var
, var1
, var2
, vectype
, type
, stype
, def
, def2
;
1509 gimple pattern_stmt
, def_stmt
;
1510 enum tree_code rhs_code
;
1511 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1512 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1513 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1514 enum vect_def_type dt
;
1515 optab optab1
, optab2
;
1516 edge ext_def
= NULL
;
1518 if (!is_gimple_assign (last_stmt
))
1521 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1531 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
1534 lhs
= gimple_assign_lhs (last_stmt
);
1535 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1536 type
= TREE_TYPE (oprnd0
);
1537 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1538 if (TREE_CODE (oprnd0
) != SSA_NAME
1539 || TYPE_PRECISION (TREE_TYPE (lhs
)) != TYPE_PRECISION (type
)
1540 || !INTEGRAL_TYPE_P (type
)
1541 || !TYPE_UNSIGNED (type
))
1544 if (!vect_is_simple_use (oprnd1
, last_stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
1548 if (dt
!= vect_internal_def
1549 && dt
!= vect_constant_def
1550 && dt
!= vect_external_def
)
1553 vectype
= get_vectype_for_scalar_type (type
);
1554 if (vectype
== NULL_TREE
)
1557 /* If vector/vector or vector/scalar rotate is supported by the target,
1558 don't do anything here. */
1559 optab1
= optab_for_tree_code (rhs_code
, vectype
, optab_vector
);
1561 && optab_handler (optab1
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1564 if (bb_vinfo
!= NULL
|| dt
!= vect_internal_def
)
1566 optab2
= optab_for_tree_code (rhs_code
, vectype
, optab_scalar
);
1568 && optab_handler (optab2
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1572 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1573 don't do anything here either. */
1574 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_vector
);
1575 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_vector
);
1577 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1579 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1581 if (bb_vinfo
== NULL
&& dt
== vect_internal_def
)
1583 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_scalar
);
1584 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_scalar
);
1586 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1588 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1593 *type_out
= vectype
;
1594 if (*type_in
== NULL_TREE
)
1597 if (dt
== vect_external_def
1598 && TREE_CODE (oprnd1
) == SSA_NAME
1601 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1602 ext_def
= loop_preheader_edge (loop
);
1603 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1
))
1605 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (oprnd1
));
1607 || !dominated_by_p (CDI_DOMINATORS
, ext_def
->dest
, bb
))
1613 if (TREE_CODE (oprnd1
) == INTEGER_CST
1614 || TYPE_MODE (TREE_TYPE (oprnd1
)) == TYPE_MODE (type
))
1616 else if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
1618 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
1619 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (type
)
1620 && TYPE_PRECISION (TREE_TYPE (rhs1
))
1621 == TYPE_PRECISION (type
))
1625 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
1626 if (def
== NULL_TREE
)
1628 def
= vect_recog_temp_ssa_var (type
, NULL
);
1629 def_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, def
, oprnd1
,
1634 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1635 gcc_assert (!new_bb
);
1638 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1640 stype
= TREE_TYPE (def
);
1642 if (TREE_CODE (def
) == INTEGER_CST
)
1644 if (!tree_fits_uhwi_p (def
)
1645 || tree_to_uhwi (def
) >= GET_MODE_PRECISION (TYPE_MODE (type
))
1646 || integer_zerop (def
))
1648 def2
= build_int_cst (stype
,
1649 GET_MODE_PRECISION (TYPE_MODE (type
))
1650 - tree_to_uhwi (def
));
1654 tree vecstype
= get_vectype_for_scalar_type (stype
);
1655 stmt_vec_info def_stmt_vinfo
;
1657 if (vecstype
== NULL_TREE
)
1659 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1660 def_stmt
= gimple_build_assign_with_ops (NEGATE_EXPR
, def2
, def
,
1665 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1666 gcc_assert (!new_bb
);
1670 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
1671 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1672 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
1673 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1676 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1678 = build_int_cst (stype
, GET_MODE_PRECISION (TYPE_MODE (stype
)) - 1);
1679 def_stmt
= gimple_build_assign_with_ops (BIT_AND_EXPR
, def2
,
1680 gimple_assign_lhs (def_stmt
),
1685 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1686 gcc_assert (!new_bb
);
1690 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
1691 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1692 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
1693 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1697 var1
= vect_recog_temp_ssa_var (type
, NULL
);
1698 def_stmt
= gimple_build_assign_with_ops (rhs_code
== LROTATE_EXPR
1699 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
1701 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1703 var2
= vect_recog_temp_ssa_var (type
, NULL
);
1704 def_stmt
= gimple_build_assign_with_ops (rhs_code
== LROTATE_EXPR
1705 ? RSHIFT_EXPR
: LSHIFT_EXPR
,
1706 var2
, oprnd0
, def2
);
1707 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1709 /* Pattern detected. */
1710 if (dump_enabled_p ())
1711 dump_printf_loc (MSG_NOTE
, vect_location
,
1712 "vect_recog_rotate_pattern: detected:\n");
1714 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1715 var
= vect_recog_temp_ssa_var (type
, NULL
);
1716 pattern_stmt
= gimple_build_assign_with_ops (BIT_IOR_EXPR
, var
, var1
, var2
);
1718 if (dump_enabled_p ())
1719 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1721 stmts
->safe_push (last_stmt
);
1722 return pattern_stmt
;
1725 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1733 S3 res_T = b_T op a_t;
1735 where type 'TYPE' is a type with different size than 'type',
1736 and op is <<, >> or rotate.
1741 TYPE b_T, c_T, res_T;
1744 S1 a_t = (type) c_T;
1746 S3 res_T = b_T op a_t;
1750 * STMTS: Contains a stmt from which the pattern search begins,
1751 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1752 with a shift/rotate which has same type on both operands, in the
1753 second case just b_T op c_T, in the first case with added cast
1754 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1758 * TYPE_IN: The type of the input arguments to the pattern.
1760 * TYPE_OUT: The type of the output of this pattern.
1762 * Return value: A new stmt that will be used to replace the shift/rotate
1766 vect_recog_vector_vector_shift_pattern (vec
<gimple
> *stmts
,
1767 tree
*type_in
, tree
*type_out
)
1769 gimple last_stmt
= stmts
->pop ();
1770 tree oprnd0
, oprnd1
, lhs
, var
;
1771 gimple pattern_stmt
, def_stmt
;
1772 enum tree_code rhs_code
;
1773 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1774 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1775 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1776 enum vect_def_type dt
;
1779 if (!is_gimple_assign (last_stmt
))
1782 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1794 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
1797 lhs
= gimple_assign_lhs (last_stmt
);
1798 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1799 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1800 if (TREE_CODE (oprnd0
) != SSA_NAME
1801 || TREE_CODE (oprnd1
) != SSA_NAME
1802 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
1803 || TYPE_PRECISION (TREE_TYPE (oprnd1
))
1804 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1
)))
1805 || TYPE_PRECISION (TREE_TYPE (lhs
))
1806 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
1809 if (!vect_is_simple_use (oprnd1
, last_stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
1813 if (dt
!= vect_internal_def
)
1816 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (oprnd0
));
1817 *type_out
= *type_in
;
1818 if (*type_in
== NULL_TREE
)
1822 if (gimple_assign_cast_p (def_stmt
))
1824 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
1825 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
1826 && TYPE_PRECISION (TREE_TYPE (rhs1
))
1827 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
1831 if (def
== NULL_TREE
)
1833 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
1834 def_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, def
, oprnd1
,
1836 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
1839 /* Pattern detected. */
1840 if (dump_enabled_p ())
1841 dump_printf_loc (MSG_NOTE
, vect_location
,
1842 "vect_recog_vector_vector_shift_pattern: detected:\n");
1844 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1845 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
1846 pattern_stmt
= gimple_build_assign_with_ops (rhs_code
, var
, oprnd0
, def
);
1848 if (dump_enabled_p ())
1849 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1851 stmts
->safe_push (last_stmt
);
1852 return pattern_stmt
;
1855 /* Detect a signed division by a constant that wouldn't be
1856 otherwise vectorized:
1862 where type 'type' is an integral type and N is a constant.
1864 Similarly handle modulo by a constant:
1870 * STMTS: Contains a stmt from which the pattern search begins,
1871 i.e. the division stmt. S1 is replaced by if N is a power
1872 of two constant and type is signed:
1873 S3 y_t = b_t < 0 ? N - 1 : 0;
1875 S1' a_t = x_t >> log2 (N);
1877 S4 is replaced if N is a power of two constant and
1878 type is signed by (where *_T temporaries have unsigned type):
1879 S9 y_T = b_t < 0 ? -1U : 0U;
1880 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1881 S7 z_t = (type) z_T;
1883 S5 x_t = w_t & (N - 1);
1884 S4' a_t = x_t - z_t;
1888 * TYPE_IN: The type of the input arguments to the pattern.
1890 * TYPE_OUT: The type of the output of this pattern.
1892 * Return value: A new stmt that will be used to replace the division
1893 S1 or modulo S4 stmt. */
1896 vect_recog_divmod_pattern (vec
<gimple
> *stmts
,
1897 tree
*type_in
, tree
*type_out
)
1899 gimple last_stmt
= stmts
->pop ();
1900 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
1901 gimple pattern_stmt
, def_stmt
;
1902 enum tree_code rhs_code
;
1903 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1904 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1905 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1908 int dummy_int
, prec
;
1909 stmt_vec_info def_stmt_vinfo
;
1911 if (!is_gimple_assign (last_stmt
))
1914 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1917 case TRUNC_DIV_EXPR
:
1918 case TRUNC_MOD_EXPR
:
1924 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
1927 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1928 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1929 itype
= TREE_TYPE (oprnd0
);
1930 if (TREE_CODE (oprnd0
) != SSA_NAME
1931 || TREE_CODE (oprnd1
) != INTEGER_CST
1932 || TREE_CODE (itype
) != INTEGER_TYPE
1933 || TYPE_PRECISION (itype
) != GET_MODE_PRECISION (TYPE_MODE (itype
)))
1936 vectype
= get_vectype_for_scalar_type (itype
);
1937 if (vectype
== NULL_TREE
)
1940 /* If the target can handle vectorized division or modulo natively,
1941 don't attempt to optimize this. */
1942 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
1943 if (optab
!= unknown_optab
)
1945 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
1946 int icode
= (int) optab_handler (optab
, vec_mode
);
1947 if (icode
!= CODE_FOR_nothing
)
1951 prec
= TYPE_PRECISION (itype
);
1952 if (integer_pow2p (oprnd1
))
1954 if (TYPE_UNSIGNED (itype
) || tree_int_cst_sgn (oprnd1
) != 1)
1957 /* Pattern detected. */
1958 if (dump_enabled_p ())
1959 dump_printf_loc (MSG_NOTE
, vect_location
,
1960 "vect_recog_divmod_pattern: detected:\n");
1962 cond
= build2 (LT_EXPR
, boolean_type_node
, oprnd0
,
1963 build_int_cst (itype
, 0));
1964 if (rhs_code
== TRUNC_DIV_EXPR
)
1966 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
1969 = gimple_build_assign_with_ops (COND_EXPR
, var
, cond
,
1970 fold_build2 (MINUS_EXPR
, itype
,
1972 build_int_cst (itype
,
1974 build_int_cst (itype
, 0));
1975 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
1976 var
= vect_recog_temp_ssa_var (itype
, NULL
);
1978 = gimple_build_assign_with_ops (PLUS_EXPR
, var
, oprnd0
,
1979 gimple_assign_lhs (def_stmt
));
1980 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1982 shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
1984 = gimple_build_assign_with_ops (RSHIFT_EXPR
,
1985 vect_recog_temp_ssa_var (itype
,
1992 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
1993 if (compare_tree_int (oprnd1
, 2) == 0)
1995 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
1997 = gimple_build_assign_with_ops (COND_EXPR
, signmask
, cond
,
1998 build_int_cst (itype
, 1),
1999 build_int_cst (itype
, 0));
2000 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2005 = build_nonstandard_integer_type (prec
, 1);
2006 tree vecutype
= get_vectype_for_scalar_type (utype
);
2008 = build_int_cst (utype
, GET_MODE_BITSIZE (TYPE_MODE (itype
))
2009 - tree_log2 (oprnd1
));
2010 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
2013 = gimple_build_assign_with_ops (COND_EXPR
, var
, cond
,
2014 build_int_cst (utype
, -1),
2015 build_int_cst (utype
, 0));
2017 = new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2018 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2019 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2020 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2021 var
= vect_recog_temp_ssa_var (utype
, NULL
);
2023 = gimple_build_assign_with_ops (RSHIFT_EXPR
, var
,
2024 gimple_assign_lhs (def_stmt
),
2027 = new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2028 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2029 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2030 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2031 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2033 = gimple_build_assign_with_ops (NOP_EXPR
, signmask
, var
,
2035 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2038 = gimple_build_assign_with_ops (PLUS_EXPR
,
2039 vect_recog_temp_ssa_var (itype
,
2042 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2044 = gimple_build_assign_with_ops (BIT_AND_EXPR
,
2045 vect_recog_temp_ssa_var (itype
,
2047 gimple_assign_lhs (def_stmt
),
2048 fold_build2 (MINUS_EXPR
, itype
,
2050 build_int_cst (itype
,
2052 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2055 = gimple_build_assign_with_ops (MINUS_EXPR
,
2056 vect_recog_temp_ssa_var (itype
,
2058 gimple_assign_lhs (def_stmt
),
2062 if (dump_enabled_p ())
2063 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
,
2066 stmts
->safe_push (last_stmt
);
2069 *type_out
= vectype
;
2070 return pattern_stmt
;
2073 if (prec
> HOST_BITS_PER_WIDE_INT
2074 || integer_zerop (oprnd1
))
2077 if (!can_mult_highpart_p (TYPE_MODE (vectype
), TYPE_UNSIGNED (itype
)))
2080 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2082 if (TYPE_UNSIGNED (itype
))
2084 unsigned HOST_WIDE_INT mh
, ml
;
2085 int pre_shift
, post_shift
;
2086 unsigned HOST_WIDE_INT d
= (TREE_INT_CST_LOW (oprnd1
)
2087 & GET_MODE_MASK (TYPE_MODE (itype
)));
2088 tree t1
, t2
, t3
, t4
;
2090 if (d
>= ((unsigned HOST_WIDE_INT
) 1 << (prec
- 1)))
2091 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2094 /* Find a suitable multiplier and right shift count
2095 instead of multiplying with D. */
2096 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
2098 /* If the suggested multiplier is more than SIZE bits, we can do better
2099 for even divisors, using an initial right shift. */
2100 if (mh
!= 0 && (d
& 1) == 0)
2102 pre_shift
= floor_log2 (d
& -d
);
2103 mh
= choose_multiplier (d
>> pre_shift
, prec
, prec
- pre_shift
,
2104 &ml
, &post_shift
, &dummy_int
);
2112 if (post_shift
- 1 >= prec
)
2115 /* t1 = oprnd0 h* ml;
2119 q = t4 >> (post_shift - 1); */
2120 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2122 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR
, t1
, oprnd0
,
2123 build_int_cst (itype
, ml
));
2124 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2126 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2128 = gimple_build_assign_with_ops (MINUS_EXPR
, t2
, oprnd0
, t1
);
2129 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2131 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2133 = gimple_build_assign_with_ops (RSHIFT_EXPR
, t3
, t2
,
2135 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2137 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2139 = gimple_build_assign_with_ops (PLUS_EXPR
, t4
, t1
, t3
);
2141 if (post_shift
!= 1)
2143 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2145 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2147 = gimple_build_assign_with_ops (RSHIFT_EXPR
, q
, t4
,
2148 build_int_cst (itype
,
2155 pattern_stmt
= def_stmt
;
2160 if (pre_shift
>= prec
|| post_shift
>= prec
)
2163 /* t1 = oprnd0 >> pre_shift;
2165 q = t2 >> post_shift; */
2168 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2170 = gimple_build_assign_with_ops (RSHIFT_EXPR
, t1
, oprnd0
,
2171 build_int_cst (NULL
,
2173 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2178 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2180 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR
, t2
, t1
,
2181 build_int_cst (itype
, ml
));
2185 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2187 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2189 = gimple_build_assign_with_ops (RSHIFT_EXPR
, q
, t2
,
2190 build_int_cst (itype
,
2196 pattern_stmt
= def_stmt
;
2201 unsigned HOST_WIDE_INT ml
;
2203 HOST_WIDE_INT d
= TREE_INT_CST_LOW (oprnd1
);
2204 unsigned HOST_WIDE_INT abs_d
;
2206 tree t1
, t2
, t3
, t4
;
2208 /* Give up for -1. */
2212 /* Since d might be INT_MIN, we have to cast to
2213 unsigned HOST_WIDE_INT before negating to avoid
2214 undefined signed overflow. */
2216 ? (unsigned HOST_WIDE_INT
) d
2217 : - (unsigned HOST_WIDE_INT
) d
);
2219 /* n rem d = n rem -d */
2220 if (rhs_code
== TRUNC_MOD_EXPR
&& d
< 0)
2223 oprnd1
= build_int_cst (itype
, abs_d
);
2225 else if (HOST_BITS_PER_WIDE_INT
>= prec
2226 && abs_d
== (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2227 /* This case is not handled correctly below. */
2230 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
, &post_shift
, &dummy_int
);
2231 if (ml
>= (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2234 ml
|= (~(unsigned HOST_WIDE_INT
) 0) << (prec
- 1);
2236 if (post_shift
>= prec
)
2239 /* t1 = oprnd0 h* ml; */
2240 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2242 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR
, t1
, oprnd0
,
2243 build_int_cst (itype
, ml
));
2247 /* t2 = t1 + oprnd0; */
2248 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2249 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2251 = gimple_build_assign_with_ops (PLUS_EXPR
, t2
, t1
, oprnd0
);
2258 /* t3 = t2 >> post_shift; */
2259 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2260 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2262 = gimple_build_assign_with_ops (RSHIFT_EXPR
, t3
, t2
,
2263 build_int_cst (itype
, post_shift
));
2268 double_int oprnd0_min
, oprnd0_max
;
2270 if (get_range_info (oprnd0
, &oprnd0_min
, &oprnd0_max
) == VR_RANGE
)
2272 if (!oprnd0_min
.is_negative ())
2274 else if (oprnd0_max
.is_negative ())
2278 if (msb
== 0 && d
>= 0)
2282 pattern_stmt
= def_stmt
;
2286 /* t4 = oprnd0 >> (prec - 1);
2287 or if we know from VRP that oprnd0 >= 0
2289 or if we know from VRP that oprnd0 < 0
2291 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2292 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2295 = gimple_build_assign_with_ops (INTEGER_CST
,
2296 t4
, build_int_cst (itype
, msb
),
2300 = gimple_build_assign_with_ops (RSHIFT_EXPR
, t4
, oprnd0
,
2301 build_int_cst (itype
, prec
- 1));
2302 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2304 /* q = t3 - t4; or q = t4 - t3; */
2305 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2307 = gimple_build_assign_with_ops (MINUS_EXPR
, q
, d
< 0 ? t4
: t3
,
2312 if (rhs_code
== TRUNC_MOD_EXPR
)
2316 /* We divided. Now finish by:
2319 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2321 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2323 = gimple_build_assign_with_ops (MULT_EXPR
, t1
, q
, oprnd1
);
2324 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2326 r
= vect_recog_temp_ssa_var (itype
, NULL
);
2328 = gimple_build_assign_with_ops (MINUS_EXPR
, r
, oprnd0
, t1
);
2331 /* Pattern detected. */
2332 if (dump_enabled_p ())
2334 dump_printf_loc (MSG_NOTE
, vect_location
,
2335 "vect_recog_divmod_pattern: detected: ");
2336 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
2337 dump_printf (MSG_NOTE
, "\n");
2340 stmts
->safe_push (last_stmt
);
2343 *type_out
= vectype
;
2344 return pattern_stmt
;
2347 /* Function vect_recog_mixed_size_cond_pattern
2349 Try to find the following pattern:
2354 S1 a_T = x_t CMP y_t ? b_T : c_T;
2356 where type 'TYPE' is an integral type which has different size
2357 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2358 than 'type', the constants need to fit into an integer type
2359 with the same width as 'type') or results of conversion from 'type'.
2363 * LAST_STMT: A stmt from which the pattern search begins.
2367 * TYPE_IN: The type of the input arguments to the pattern.
2369 * TYPE_OUT: The type of the output of this pattern.
2371 * Return value: A new stmt that will be used to replace the pattern.
2372 Additionally a def_stmt is added.
2374 a_it = x_t CMP y_t ? b_it : c_it;
2375 a_T = (TYPE) a_it; */
2378 vect_recog_mixed_size_cond_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
2381 gimple last_stmt
= (*stmts
)[0];
2382 tree cond_expr
, then_clause
, else_clause
;
2383 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
), def_stmt_info
;
2384 tree type
, vectype
, comp_vectype
, itype
= NULL_TREE
, vecitype
;
2385 enum machine_mode cmpmode
;
2386 gimple pattern_stmt
, def_stmt
;
2387 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2388 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2389 tree orig_type0
= NULL_TREE
, orig_type1
= NULL_TREE
;
2390 gimple def_stmt0
= NULL
, def_stmt1
= NULL
;
2392 tree comp_scalar_type
;
2394 if (!is_gimple_assign (last_stmt
)
2395 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
2396 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
2399 cond_expr
= gimple_assign_rhs1 (last_stmt
);
2400 then_clause
= gimple_assign_rhs2 (last_stmt
);
2401 else_clause
= gimple_assign_rhs3 (last_stmt
);
2403 if (!COMPARISON_CLASS_P (cond_expr
))
2406 comp_scalar_type
= TREE_TYPE (TREE_OPERAND (cond_expr
, 0));
2407 comp_vectype
= get_vectype_for_scalar_type (comp_scalar_type
);
2408 if (comp_vectype
== NULL_TREE
)
2411 type
= gimple_expr_type (last_stmt
);
2412 if (types_compatible_p (type
, comp_scalar_type
)
2413 || ((TREE_CODE (then_clause
) != INTEGER_CST
2414 || TREE_CODE (else_clause
) != INTEGER_CST
)
2415 && !INTEGRAL_TYPE_P (comp_scalar_type
))
2416 || !INTEGRAL_TYPE_P (type
))
2419 if ((TREE_CODE (then_clause
) != INTEGER_CST
2420 && !type_conversion_p (then_clause
, last_stmt
, false, &orig_type0
,
2421 &def_stmt0
, &promotion
))
2422 || (TREE_CODE (else_clause
) != INTEGER_CST
2423 && !type_conversion_p (else_clause
, last_stmt
, false, &orig_type1
,
2424 &def_stmt1
, &promotion
)))
2427 if (orig_type0
&& orig_type1
2428 && !types_compatible_p (orig_type0
, orig_type1
))
2433 if (!types_compatible_p (orig_type0
, comp_scalar_type
))
2435 then_clause
= gimple_assign_rhs1 (def_stmt0
);
2441 if (!types_compatible_p (orig_type1
, comp_scalar_type
))
2443 else_clause
= gimple_assign_rhs1 (def_stmt1
);
2447 cmpmode
= GET_MODE_INNER (TYPE_MODE (comp_vectype
));
2449 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) == GET_MODE_BITSIZE (cmpmode
))
2452 vectype
= get_vectype_for_scalar_type (type
);
2453 if (vectype
== NULL_TREE
)
2456 if (expand_vec_cond_expr_p (vectype
, comp_vectype
))
2459 if (itype
== NULL_TREE
)
2460 itype
= build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode
),
2461 TYPE_UNSIGNED (type
));
2463 if (itype
== NULL_TREE
2464 || GET_MODE_BITSIZE (TYPE_MODE (itype
)) != GET_MODE_BITSIZE (cmpmode
))
2467 vecitype
= get_vectype_for_scalar_type (itype
);
2468 if (vecitype
== NULL_TREE
)
2471 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
))
2474 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) > GET_MODE_BITSIZE (cmpmode
))
2476 if ((TREE_CODE (then_clause
) == INTEGER_CST
2477 && !int_fits_type_p (then_clause
, itype
))
2478 || (TREE_CODE (else_clause
) == INTEGER_CST
2479 && !int_fits_type_p (else_clause
, itype
)))
2484 = gimple_build_assign_with_ops (COND_EXPR
,
2485 vect_recog_temp_ssa_var (itype
, NULL
),
2486 unshare_expr (cond_expr
),
2487 fold_convert (itype
, then_clause
),
2488 fold_convert (itype
, else_clause
));
2490 = gimple_build_assign_with_ops (NOP_EXPR
,
2491 vect_recog_temp_ssa_var (type
, NULL
),
2492 gimple_assign_lhs (def_stmt
), NULL_TREE
);
2494 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2495 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2496 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
2497 STMT_VINFO_VECTYPE (def_stmt_info
) = vecitype
;
2498 *type_in
= vecitype
;
2499 *type_out
= vectype
;
2501 if (dump_enabled_p ())
2502 dump_printf_loc (MSG_NOTE
, vect_location
,
2503 "vect_recog_mixed_size_cond_pattern: detected:\n");
2505 return pattern_stmt
;
2509 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2510 true if bool VAR can be optimized that way. */
2513 check_bool_pattern (tree var
, loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
2516 enum vect_def_type dt
;
2518 enum tree_code rhs_code
;
2520 if (!vect_is_simple_use (var
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
, &def
,
2524 if (dt
!= vect_internal_def
)
2527 if (!is_gimple_assign (def_stmt
))
2530 if (!has_single_use (def
))
2533 rhs1
= gimple_assign_rhs1 (def_stmt
);
2534 rhs_code
= gimple_assign_rhs_code (def_stmt
);
2538 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2541 if ((TYPE_PRECISION (TREE_TYPE (rhs1
)) != 1
2542 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
2543 && TREE_CODE (TREE_TYPE (rhs1
)) != BOOLEAN_TYPE
)
2545 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2548 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2553 if (!check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
))
2555 return check_bool_pattern (gimple_assign_rhs2 (def_stmt
), loop_vinfo
,
2559 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
2561 tree vecitype
, comp_vectype
;
2563 /* If the comparison can throw, then is_gimple_condexpr will be
2564 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2565 if (stmt_could_throw_p (def_stmt
))
2568 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
2569 if (comp_vectype
== NULL_TREE
)
2572 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
2574 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
2576 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
2577 vecitype
= get_vectype_for_scalar_type (itype
);
2578 if (vecitype
== NULL_TREE
)
2582 vecitype
= comp_vectype
;
2583 return expand_vec_cond_expr_p (vecitype
, comp_vectype
);
2590 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2591 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2592 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2595 adjust_bool_pattern_cast (tree type
, tree var
)
2597 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (SSA_NAME_DEF_STMT (var
));
2598 gimple cast_stmt
, pattern_stmt
;
2600 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
));
2601 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
2602 new_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2604 = gimple_build_assign_with_ops (NOP_EXPR
,
2605 vect_recog_temp_ssa_var (type
, NULL
),
2606 gimple_assign_lhs (pattern_stmt
),
2608 STMT_VINFO_RELATED_STMT (stmt_vinfo
) = cast_stmt
;
2609 return gimple_assign_lhs (cast_stmt
);
2613 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2614 recursively. VAR is an SSA_NAME that should be transformed from bool
2615 to a wider integer type, OUT_TYPE is the desired final integer type of
2616 the whole pattern, TRUEVAL should be NULL unless optimizing
2617 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2618 in the then_clause, STMTS is where statements with added pattern stmts
2619 should be pushed to. */
2622 adjust_bool_pattern (tree var
, tree out_type
, tree trueval
,
2625 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2626 enum tree_code rhs_code
, def_rhs_code
;
2627 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
2629 gimple pattern_stmt
, def_stmt
;
2631 rhs1
= gimple_assign_rhs1 (stmt
);
2632 rhs2
= gimple_assign_rhs2 (stmt
);
2633 rhs_code
= gimple_assign_rhs_code (stmt
);
2634 loc
= gimple_location (stmt
);
2639 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2640 itype
= TREE_TYPE (irhs1
);
2642 = gimple_build_assign_with_ops (SSA_NAME
,
2643 vect_recog_temp_ssa_var (itype
, NULL
),
2648 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2649 itype
= TREE_TYPE (irhs1
);
2651 = gimple_build_assign_with_ops (BIT_XOR_EXPR
,
2652 vect_recog_temp_ssa_var (itype
, NULL
),
2653 irhs1
, build_int_cst (itype
, 1));
2657 /* Try to optimize x = y & (a < b ? 1 : 0); into
2658 x = (a < b ? y : 0);
2664 S1 a_b = x1 CMP1 y1;
2665 S2 b_b = x2 CMP2 y2;
2667 S4 d_T = (TYPE) c_b;
2669 we would normally emit:
2671 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2672 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2673 S3' c_T = a_T & b_T;
2676 but we can save one stmt by using the
2677 result of one of the COND_EXPRs in the other COND_EXPR and leave
2678 BIT_AND_EXPR stmt out:
2680 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2681 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2684 At least when VEC_COND_EXPR is implemented using masks
2685 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2686 computes the comparison masks and ands it, in one case with
2687 all ones vector, in the other case with a vector register.
2688 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2689 often more expensive. */
2690 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
2691 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
2692 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
2694 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
2695 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2696 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
2697 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
2700 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
2701 irhs2
= adjust_bool_pattern (rhs2
, out_type
, irhs1
, stmts
);
2702 tstmt
= stmts
->pop ();
2703 gcc_assert (tstmt
== def_stmt
);
2704 stmts
->quick_push (stmt
);
2705 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
2706 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
2707 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
2708 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
2712 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
2715 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
2716 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
2717 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
2719 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
2720 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
2721 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
2722 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
2725 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
2726 irhs1
= adjust_bool_pattern (rhs1
, out_type
, irhs2
, stmts
);
2727 tstmt
= stmts
->pop ();
2728 gcc_assert (tstmt
== def_stmt
);
2729 stmts
->quick_push (stmt
);
2730 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
2731 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
2732 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
2733 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
2737 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2743 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2744 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
2746 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
2747 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
2749 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
2750 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
2751 int out_prec
= TYPE_PRECISION (out_type
);
2752 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
2753 irhs2
= adjust_bool_pattern_cast (TREE_TYPE (irhs1
), rhs2
);
2754 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
2755 irhs1
= adjust_bool_pattern_cast (TREE_TYPE (irhs2
), rhs1
);
2758 irhs1
= adjust_bool_pattern_cast (out_type
, rhs1
);
2759 irhs2
= adjust_bool_pattern_cast (out_type
, rhs2
);
2762 itype
= TREE_TYPE (irhs1
);
2764 = gimple_build_assign_with_ops (rhs_code
,
2765 vect_recog_temp_ssa_var (itype
, NULL
),
2770 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
2771 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
2772 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
))
2773 || (TYPE_PRECISION (TREE_TYPE (rhs1
))
2774 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1
)))))
2776 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
2778 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
2781 itype
= TREE_TYPE (rhs1
);
2782 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
2783 if (trueval
== NULL_TREE
)
2784 trueval
= build_int_cst (itype
, 1);
2786 gcc_checking_assert (useless_type_conversion_p (itype
,
2787 TREE_TYPE (trueval
)));
2789 = gimple_build_assign_with_ops (COND_EXPR
,
2790 vect_recog_temp_ssa_var (itype
, NULL
),
2792 build_int_cst (itype
, 0));
2796 stmts
->safe_push (stmt
);
2797 gimple_set_location (pattern_stmt
, loc
);
2798 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
2799 return gimple_assign_lhs (pattern_stmt
);
2803 /* Function vect_recog_bool_pattern
2805 Try to find pattern like following:
2807 bool a_b, b_b, c_b, d_b, e_b;
2810 S1 a_b = x1 CMP1 y1;
2811 S2 b_b = x2 CMP2 y2;
2813 S4 d_b = x3 CMP3 y3;
2815 S6 f_T = (TYPE) e_b;
2817 where type 'TYPE' is an integral type.
2821 * LAST_STMT: A stmt at the end from which the pattern
2822 search begins, i.e. cast of a bool to
2827 * TYPE_IN: The type of the input arguments to the pattern.
2829 * TYPE_OUT: The type of the output of this pattern.
2831 * Return value: A new stmt that will be used to replace the pattern.
2833 Assuming size of TYPE is the same as size of all comparisons
2834 (otherwise some casts would be added where needed), the above
2835 sequence we create related pattern stmts:
2836 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2837 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2838 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2839 S5' e_T = c_T | d_T;
2842 Instead of the above S3' we could emit:
2843 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2844 S3' c_T = a_T | b_T;
2845 but the above is more efficient. */
2848 vect_recog_bool_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
2851 gimple last_stmt
= stmts
->pop ();
2852 enum tree_code rhs_code
;
2853 tree var
, lhs
, rhs
, vectype
;
2854 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2855 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2856 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2857 gimple pattern_stmt
;
2859 if (!is_gimple_assign (last_stmt
))
2862 var
= gimple_assign_rhs1 (last_stmt
);
2863 lhs
= gimple_assign_lhs (last_stmt
);
2865 if ((TYPE_PRECISION (TREE_TYPE (var
)) != 1
2866 || !TYPE_UNSIGNED (TREE_TYPE (var
)))
2867 && TREE_CODE (TREE_TYPE (var
)) != BOOLEAN_TYPE
)
2870 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2871 if (CONVERT_EXPR_CODE_P (rhs_code
))
2873 if (TREE_CODE (TREE_TYPE (lhs
)) != INTEGER_TYPE
2874 || TYPE_PRECISION (TREE_TYPE (lhs
)) == 1)
2876 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
2877 if (vectype
== NULL_TREE
)
2880 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
2883 rhs
= adjust_bool_pattern (var
, TREE_TYPE (lhs
), NULL_TREE
, stmts
);
2884 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
2885 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
2887 = gimple_build_assign_with_ops (SSA_NAME
, lhs
, rhs
, NULL_TREE
);
2890 = gimple_build_assign_with_ops (NOP_EXPR
, lhs
, rhs
, NULL_TREE
);
2891 *type_out
= vectype
;
2893 stmts
->safe_push (last_stmt
);
2894 if (dump_enabled_p ())
2895 dump_printf_loc (MSG_NOTE
, vect_location
,
2896 "vect_recog_bool_pattern: detected:\n");
2898 return pattern_stmt
;
2900 else if (rhs_code
== SSA_NAME
2901 && STMT_VINFO_DATA_REF (stmt_vinfo
))
2903 stmt_vec_info pattern_stmt_info
;
2904 vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
2905 gcc_assert (vectype
!= NULL_TREE
);
2906 if (!VECTOR_MODE_P (TYPE_MODE (vectype
)))
2908 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
2911 rhs
= adjust_bool_pattern (var
, TREE_TYPE (vectype
), NULL_TREE
, stmts
);
2912 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
2913 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
2915 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
2917 = gimple_build_assign_with_ops (NOP_EXPR
, rhs2
, rhs
, NULL_TREE
);
2918 new_pattern_def_seq (stmt_vinfo
, cast_stmt
);
2922 = gimple_build_assign_with_ops (SSA_NAME
, lhs
, rhs
, NULL_TREE
);
2923 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
,
2925 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
2926 STMT_VINFO_DATA_REF (pattern_stmt_info
)
2927 = STMT_VINFO_DATA_REF (stmt_vinfo
);
2928 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info
)
2929 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo
);
2930 STMT_VINFO_DR_INIT (pattern_stmt_info
) = STMT_VINFO_DR_INIT (stmt_vinfo
);
2931 STMT_VINFO_DR_OFFSET (pattern_stmt_info
)
2932 = STMT_VINFO_DR_OFFSET (stmt_vinfo
);
2933 STMT_VINFO_DR_STEP (pattern_stmt_info
) = STMT_VINFO_DR_STEP (stmt_vinfo
);
2934 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info
)
2935 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo
);
2936 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo
)) = pattern_stmt
;
2937 *type_out
= vectype
;
2939 stmts
->safe_push (last_stmt
);
2940 if (dump_enabled_p ())
2941 dump_printf_loc (MSG_NOTE
, vect_location
,
2942 "vect_recog_bool_pattern: detected:\n");
2943 return pattern_stmt
;
2950 /* Mark statements that are involved in a pattern. */
2953 vect_mark_pattern_stmts (gimple orig_stmt
, gimple pattern_stmt
,
2954 tree pattern_vectype
)
2956 stmt_vec_info pattern_stmt_info
, def_stmt_info
;
2957 stmt_vec_info orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
2958 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (orig_stmt_info
);
2959 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (orig_stmt_info
);
2962 pattern_stmt_info
= vinfo_for_stmt (pattern_stmt
);
2963 if (pattern_stmt_info
== NULL
)
2965 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
,
2967 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
2969 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt
));
2971 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt
;
2972 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
2973 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
2974 STMT_VINFO_VECTYPE (pattern_stmt_info
) = pattern_vectype
;
2975 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
2976 STMT_VINFO_RELATED_STMT (orig_stmt_info
) = pattern_stmt
;
2977 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
)
2978 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
2979 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
))
2981 gimple_stmt_iterator si
;
2982 for (si
= gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
));
2983 !gsi_end_p (si
); gsi_next (&si
))
2985 def_stmt
= gsi_stmt (si
);
2986 def_stmt_info
= vinfo_for_stmt (def_stmt
);
2987 if (def_stmt_info
== NULL
)
2989 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
,
2991 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
2993 gimple_set_bb (def_stmt
, gimple_bb (orig_stmt
));
2994 STMT_VINFO_RELATED_STMT (def_stmt_info
) = orig_stmt
;
2995 STMT_VINFO_DEF_TYPE (def_stmt_info
)
2996 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
2997 if (STMT_VINFO_VECTYPE (def_stmt_info
) == NULL_TREE
)
2998 STMT_VINFO_VECTYPE (def_stmt_info
) = pattern_vectype
;
3003 /* Function vect_pattern_recog_1
3006 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3007 computation pattern.
3008 STMT: A stmt from which the pattern search should start.
3010 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3011 expression that computes the same functionality and can be used to
3012 replace the sequence of stmts that are involved in the pattern.
3015 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3016 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3017 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3018 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3019 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3020 to the available target pattern.
3022 This function also does some bookkeeping, as explained in the documentation
3023 for vect_recog_pattern. */
3026 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func
,
3027 gimple_stmt_iterator si
,
3028 vec
<gimple
> *stmts_to_replace
)
3030 gimple stmt
= gsi_stmt (si
), pattern_stmt
;
3031 stmt_vec_info stmt_info
;
3032 loop_vec_info loop_vinfo
;
3033 tree pattern_vectype
;
3034 tree type_in
, type_out
;
3035 enum tree_code code
;
3039 stmts_to_replace
->truncate (0);
3040 stmts_to_replace
->quick_push (stmt
);
3041 pattern_stmt
= (* vect_recog_func
) (stmts_to_replace
, &type_in
, &type_out
);
3045 stmt
= stmts_to_replace
->last ();
3046 stmt_info
= vinfo_for_stmt (stmt
);
3047 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3049 if (VECTOR_MODE_P (TYPE_MODE (type_in
)))
3051 /* No need to check target support (already checked by the pattern
3052 recognition function). */
3053 pattern_vectype
= type_out
? type_out
: type_in
;
3057 enum machine_mode vec_mode
;
3058 enum insn_code icode
;
3061 /* Check target support */
3062 type_in
= get_vectype_for_scalar_type (type_in
);
3066 type_out
= get_vectype_for_scalar_type (type_out
);
3071 pattern_vectype
= type_out
;
3073 if (is_gimple_assign (pattern_stmt
))
3074 code
= gimple_assign_rhs_code (pattern_stmt
);
3077 gcc_assert (is_gimple_call (pattern_stmt
));
3081 optab
= optab_for_tree_code (code
, type_in
, optab_default
);
3082 vec_mode
= TYPE_MODE (type_in
);
3084 || (icode
= optab_handler (optab
, vec_mode
)) == CODE_FOR_nothing
3085 || (insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (type_out
)))
3089 /* Found a vectorizable pattern. */
3090 if (dump_enabled_p ())
3092 dump_printf_loc (MSG_NOTE
, vect_location
,
3093 "pattern recognized: ");
3094 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3095 dump_printf (MSG_NOTE
, "\n");
3098 /* Mark the stmts that are involved in the pattern. */
3099 vect_mark_pattern_stmts (stmt
, pattern_stmt
, pattern_vectype
);
3101 /* Patterns cannot be vectorized using SLP, because they change the order of
3104 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
, next
)
3106 LOOP_VINFO_REDUCTIONS (loop_vinfo
).ordered_remove (i
);
3108 /* It is possible that additional pattern stmts are created and inserted in
3109 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3110 relevant statements. */
3111 for (i
= 0; stmts_to_replace
->iterate (i
, &stmt
)
3112 && (unsigned) i
< (stmts_to_replace
->length () - 1);
3115 stmt_info
= vinfo_for_stmt (stmt
);
3116 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
3117 if (dump_enabled_p ())
3119 dump_printf_loc (MSG_NOTE
, vect_location
,
3120 "additional pattern stmt: ");
3121 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3122 dump_printf (MSG_NOTE
, "\n");
3125 vect_mark_pattern_stmts (stmt
, pattern_stmt
, NULL_TREE
);
3130 /* Function vect_pattern_recog
3133 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3136 Output - for each computation idiom that is detected we create a new stmt
3137 that provides the same functionality and that can be vectorized. We
3138 also record some information in the struct_stmt_info of the relevant
3139 stmts, as explained below:
3141 At the entry to this function we have the following stmts, with the
3142 following initial value in the STMT_VINFO fields:
3144 stmt in_pattern_p related_stmt vec_stmt
3145 S1: a_i = .... - - -
3146 S2: a_2 = ..use(a_i).. - - -
3147 S3: a_1 = ..use(a_2).. - - -
3148 S4: a_0 = ..use(a_1).. - - -
3149 S5: ... = ..use(a_0).. - - -
3151 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3152 represented by a single stmt. We then:
3153 - create a new stmt S6 equivalent to the pattern (the stmt is not
3154 inserted into the code)
3155 - fill in the STMT_VINFO fields as follows:
3157 in_pattern_p related_stmt vec_stmt
3158 S1: a_i = .... - - -
3159 S2: a_2 = ..use(a_i).. - - -
3160 S3: a_1 = ..use(a_2).. - - -
3161 S4: a_0 = ..use(a_1).. true S6 -
3162 '---> S6: a_new = .... - S4 -
3163 S5: ... = ..use(a_0).. - - -
3165 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3166 to each other through the RELATED_STMT field).
3168 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3169 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3170 remain irrelevant unless used by stmts other than S4.
3172 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3173 (because they are marked as irrelevant). It will vectorize S6, and record
3174 a pointer to the new vector stmt VS6 from S6 (as usual).
3175 S4 will be skipped, and S5 will be vectorized as usual:
3177 in_pattern_p related_stmt vec_stmt
3178 S1: a_i = .... - - -
3179 S2: a_2 = ..use(a_i).. - - -
3180 S3: a_1 = ..use(a_2).. - - -
3181 > VS6: va_new = .... - - -
3182 S4: a_0 = ..use(a_1).. true S6 VS6
3183 '---> S6: a_new = .... - S4 VS6
3184 > VS5: ... = ..vuse(va_new).. - - -
3185 S5: ... = ..use(a_0).. - - -
3187 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3188 elsewhere), and we'll end up with:
3191 VS5: ... = ..vuse(va_new)..
3193 In case of more than one pattern statements, e.g., widen-mult with
3197 S2 a_T = (TYPE) a_t;
3198 '--> S3: a_it = (interm_type) a_t;
3199 S4 prod_T = a_T * CONST;
3200 '--> S5: prod_T' = a_it w* CONST;
3202 there may be other users of a_T outside the pattern. In that case S2 will
3203 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3204 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3205 be recorded in S3. */
3208 vect_pattern_recog (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
3213 gimple_stmt_iterator si
;
3215 vect_recog_func_ptr vect_recog_func
;
3216 auto_vec
<gimple
, 1> stmts_to_replace
;
3219 if (dump_enabled_p ())
3220 dump_printf_loc (MSG_NOTE
, vect_location
,
3221 "=== vect_pattern_recog ===\n");
3225 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3226 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
3227 nbbs
= loop
->num_nodes
;
3231 bbs
= &BB_VINFO_BB (bb_vinfo
);
3235 /* Scan through the loop stmts, applying the pattern recognition
3236 functions starting at each stmt visited: */
3237 for (i
= 0; i
< nbbs
; i
++)
3239 basic_block bb
= bbs
[i
];
3240 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
3242 if (bb_vinfo
&& (stmt
= gsi_stmt (si
))
3243 && vinfo_for_stmt (stmt
)
3244 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
3247 /* Scan over all generic vect_recog_xxx_pattern functions. */
3248 for (j
= 0; j
< NUM_PATTERNS
; j
++)
3250 vect_recog_func
= vect_vect_recog_func_ptrs
[j
];
3251 vect_pattern_recog_1 (vect_recog_func
, si
,