1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
26 #include "stor-layout.h"
28 #include "basic-block.h"
29 #include "gimple-pretty-print.h"
30 #include "tree-ssa-alias.h"
31 #include "internal-fn.h"
33 #include "gimple-expr.h"
37 #include "gimple-iterator.h"
38 #include "gimple-ssa.h"
39 #include "tree-phinodes.h"
40 #include "ssa-iterators.h"
41 #include "stringpool.h"
42 #include "tree-ssanames.h"
47 #include "tree-data-ref.h"
48 #include "tree-vectorizer.h"
49 #include "recog.h" /* FIXME: for insn_data */
50 #include "diagnostic-core.h"
53 /* Pattern recognition functions */
54 static gimple
vect_recog_widen_sum_pattern (vec
<gimple
> *, tree
*,
56 static gimple
vect_recog_widen_mult_pattern (vec
<gimple
> *, tree
*,
58 static gimple
vect_recog_dot_prod_pattern (vec
<gimple
> *, tree
*,
60 static gimple
vect_recog_pow_pattern (vec
<gimple
> *, tree
*, tree
*);
61 static gimple
vect_recog_over_widening_pattern (vec
<gimple
> *, tree
*,
63 static gimple
vect_recog_widen_shift_pattern (vec
<gimple
> *,
65 static gimple
vect_recog_rotate_pattern (vec
<gimple
> *, tree
*, tree
*);
66 static gimple
vect_recog_vector_vector_shift_pattern (vec
<gimple
> *,
68 static gimple
vect_recog_divmod_pattern (vec
<gimple
> *,
70 static gimple
vect_recog_mixed_size_cond_pattern (vec
<gimple
> *,
72 static gimple
vect_recog_bool_pattern (vec
<gimple
> *, tree
*, tree
*);
73 static vect_recog_func_ptr vect_vect_recog_func_ptrs
[NUM_PATTERNS
] = {
74 vect_recog_widen_mult_pattern
,
75 vect_recog_widen_sum_pattern
,
76 vect_recog_dot_prod_pattern
,
77 vect_recog_pow_pattern
,
78 vect_recog_widen_shift_pattern
,
79 vect_recog_over_widening_pattern
,
80 vect_recog_rotate_pattern
,
81 vect_recog_vector_vector_shift_pattern
,
82 vect_recog_divmod_pattern
,
83 vect_recog_mixed_size_cond_pattern
,
84 vect_recog_bool_pattern
};
87 append_pattern_def_seq (stmt_vec_info stmt_info
, gimple stmt
)
89 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
94 new_pattern_def_seq (stmt_vec_info stmt_info
, gimple stmt
)
96 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
97 append_pattern_def_seq (stmt_info
, stmt
);
100 /* Check whether STMT2 is in the same loop or basic block as STMT1.
101 Which of the two applies depends on whether we're currently doing
102 loop-based or basic-block-based vectorization, as determined by
103 the vinfo_for_stmt for STMT1 (which must be defined).
105 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
106 to be defined as well. */
109 vect_same_loop_or_bb_p (gimple stmt1
, gimple stmt2
)
111 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt1
);
112 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
113 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
115 if (!gimple_bb (stmt2
))
120 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
121 if (!flow_bb_inside_loop_p (loop
, gimple_bb (stmt2
)))
126 if (gimple_bb (stmt2
) != BB_VINFO_BB (bb_vinfo
)
127 || gimple_code (stmt2
) == GIMPLE_PHI
)
131 gcc_assert (vinfo_for_stmt (stmt2
));
135 /* If the LHS of DEF_STMT has a single use, and that statement is
136 in the same loop or basic block, return it. */
139 vect_single_imm_use (gimple def_stmt
)
141 tree lhs
= gimple_assign_lhs (def_stmt
);
145 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
148 if (!vect_same_loop_or_bb_p (def_stmt
, use_stmt
))
154 /* Check whether NAME, an ssa-name used in USE_STMT,
155 is a result of a type promotion or demotion, such that:
156 DEF_STMT: NAME = NOP (name0)
157 where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
158 If CHECK_SIGN is TRUE, check that either both types are signed or both are
162 type_conversion_p (tree name
, gimple use_stmt
, bool check_sign
,
163 tree
*orig_type
, gimple
*def_stmt
, bool *promotion
)
167 loop_vec_info loop_vinfo
;
168 stmt_vec_info stmt_vinfo
;
169 tree type
= TREE_TYPE (name
);
171 enum vect_def_type dt
;
173 bb_vec_info bb_vinfo
;
175 stmt_vinfo
= vinfo_for_stmt (use_stmt
);
176 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
177 bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
178 if (!vect_is_simple_use (name
, use_stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
182 if (dt
!= vect_internal_def
183 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
189 if (!is_gimple_assign (*def_stmt
))
192 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt
)))
195 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
197 *orig_type
= TREE_TYPE (oprnd0
);
198 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*orig_type
)
199 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*orig_type
)) && check_sign
))
202 if (TYPE_PRECISION (type
) >= (TYPE_PRECISION (*orig_type
) * 2))
204 else if (TYPE_PRECISION (*orig_type
) >= (TYPE_PRECISION (type
) * 2))
209 if (!vect_is_simple_use (oprnd0
, *def_stmt
, loop_vinfo
,
210 bb_vinfo
, &dummy_gimple
, &dummy
, &dt
))
216 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
217 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
220 vect_recog_temp_ssa_var (tree type
, gimple stmt
)
222 return make_temp_ssa_name (type
, stmt
, "patt");
225 /* Function vect_recog_dot_prod_pattern
227 Try to find the following pattern:
233 sum_0 = phi <init, sum_1>
236 S3 x_T = (TYPE1) x_t;
237 S4 y_T = (TYPE1) y_t;
239 [S6 prod = (TYPE2) prod; #optional]
240 S7 sum_1 = prod + sum_0;
242 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
243 same size of 'TYPE1' or bigger. This is a special case of a reduction
248 * STMTS: Contains a stmt from which the pattern search begins. In the
249 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
254 * TYPE_IN: The type of the input arguments to the pattern.
256 * TYPE_OUT: The type of the output of this pattern.
258 * Return value: A new stmt that will be used to replace the sequence of
259 stmts that constitute the pattern. In this case it will be:
260 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
262 Note: The dot-prod idiom is a widening reduction pattern that is
263 vectorized without preserving all the intermediate results. It
264 produces only N/2 (widened) results (by summing up pairs of
265 intermediate results) rather than all N results. Therefore, we
266 cannot allow this pattern when we want to get all the results and in
267 the correct order (as is the case when this computation is in an
268 inner-loop nested in an outer-loop that us being vectorized). */
271 vect_recog_dot_prod_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
274 gimple stmt
, last_stmt
= (*stmts
)[0];
276 tree oprnd00
, oprnd01
;
277 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
278 tree type
, half_type
;
281 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
289 loop
= LOOP_VINFO_LOOP (loop_info
);
291 if (!is_gimple_assign (last_stmt
))
294 type
= gimple_expr_type (last_stmt
);
296 /* Look for the following pattern
300 DDPROD = (TYPE2) DPROD;
301 sum_1 = DDPROD + sum_0;
303 - DX is double the size of X
304 - DY is double the size of Y
305 - DX, DY, DPROD all have the same type
306 - sum is the same size of DPROD or bigger
307 - sum has been recognized as a reduction variable.
309 This is equivalent to:
310 DPROD = X w* Y; #widen mult
311 sum_1 = DPROD w+ sum_0; #widen summation
313 DPROD = X w* Y; #widen mult
314 sum_1 = DPROD + sum_0; #summation
317 /* Starting from LAST_STMT, follow the defs of its uses in search
318 of the above pattern. */
320 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
323 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
325 /* Has been detected as widening-summation? */
327 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
328 type
= gimple_expr_type (stmt
);
329 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
331 oprnd0
= gimple_assign_rhs1 (stmt
);
332 oprnd1
= gimple_assign_rhs2 (stmt
);
333 half_type
= TREE_TYPE (oprnd0
);
339 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
)
341 oprnd0
= gimple_assign_rhs1 (last_stmt
);
342 oprnd1
= gimple_assign_rhs2 (last_stmt
);
343 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
344 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
348 if (type_conversion_p (oprnd0
, stmt
, true, &half_type
, &def_stmt
,
353 oprnd0
= gimple_assign_rhs1 (stmt
);
359 /* So far so good. Since last_stmt was detected as a (summation) reduction,
360 we know that oprnd1 is the reduction variable (defined by a loop-header
361 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
362 Left to check that oprnd0 is defined by a (widen_)mult_expr */
363 if (TREE_CODE (oprnd0
) != SSA_NAME
)
366 prod_type
= half_type
;
367 stmt
= SSA_NAME_DEF_STMT (oprnd0
);
369 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
370 if (!gimple_bb (stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
373 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
374 inside the loop (in case we are analyzing an outer-loop). */
375 if (!is_gimple_assign (stmt
))
377 stmt_vinfo
= vinfo_for_stmt (stmt
);
378 gcc_assert (stmt_vinfo
);
379 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
381 if (gimple_assign_rhs_code (stmt
) != MULT_EXPR
)
383 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
385 /* Has been detected as a widening multiplication? */
387 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
388 if (gimple_assign_rhs_code (stmt
) != WIDEN_MULT_EXPR
)
390 stmt_vinfo
= vinfo_for_stmt (stmt
);
391 gcc_assert (stmt_vinfo
);
392 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_internal_def
);
393 oprnd00
= gimple_assign_rhs1 (stmt
);
394 oprnd01
= gimple_assign_rhs2 (stmt
);
395 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt
))
396 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
);
400 tree half_type0
, half_type1
;
404 oprnd0
= gimple_assign_rhs1 (stmt
);
405 oprnd1
= gimple_assign_rhs2 (stmt
);
406 if (!types_compatible_p (TREE_TYPE (oprnd0
), prod_type
)
407 || !types_compatible_p (TREE_TYPE (oprnd1
), prod_type
))
409 if (!type_conversion_p (oprnd0
, stmt
, true, &half_type0
, &def_stmt
,
413 oprnd00
= gimple_assign_rhs1 (def_stmt
);
414 if (!type_conversion_p (oprnd1
, stmt
, true, &half_type1
, &def_stmt
,
418 oprnd01
= gimple_assign_rhs1 (def_stmt
);
419 if (!types_compatible_p (half_type0
, half_type1
))
421 if (TYPE_PRECISION (prod_type
) != TYPE_PRECISION (half_type0
) * 2)
425 half_type
= TREE_TYPE (oprnd00
);
426 *type_in
= half_type
;
429 /* Pattern detected. Create a stmt to be used to replace the pattern: */
430 var
= vect_recog_temp_ssa_var (type
, NULL
);
431 pattern_stmt
= gimple_build_assign_with_ops (DOT_PROD_EXPR
, var
,
432 oprnd00
, oprnd01
, oprnd1
);
434 if (dump_enabled_p ())
436 dump_printf_loc (MSG_NOTE
, vect_location
,
437 "vect_recog_dot_prod_pattern: detected: ");
438 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
439 dump_printf (MSG_NOTE
, "\n");
442 /* We don't allow changing the order of the computation in the inner-loop
443 when doing outer-loop vectorization. */
444 gcc_assert (!nested_in_vect_loop_p (loop
, last_stmt
));
450 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
453 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
454 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
456 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
457 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
458 that satisfies the above restrictions, we can perform a widening opeartion
459 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
460 with a_it = (interm_type) a_t; */
463 vect_handle_widen_op_by_const (gimple stmt
, enum tree_code code
,
464 tree const_oprnd
, tree
*oprnd
,
465 vec
<gimple
> *stmts
, tree type
,
466 tree
*half_type
, gimple def_stmt
)
468 tree new_type
, new_oprnd
;
471 if (code
!= MULT_EXPR
&& code
!= LSHIFT_EXPR
)
474 if (((code
== MULT_EXPR
&& int_fits_type_p (const_oprnd
, *half_type
))
475 || (code
== LSHIFT_EXPR
476 && compare_tree_int (const_oprnd
, TYPE_PRECISION (*half_type
))
478 && TYPE_PRECISION (type
) == (TYPE_PRECISION (*half_type
) * 2))
480 /* CONST_OPRND is a constant of HALF_TYPE. */
481 *oprnd
= gimple_assign_rhs1 (def_stmt
);
485 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (*half_type
) * 4))
488 if (!vect_same_loop_or_bb_p (stmt
, def_stmt
))
491 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
492 a type 2 times bigger than HALF_TYPE. */
493 new_type
= build_nonstandard_integer_type (TYPE_PRECISION (type
) / 2,
494 TYPE_UNSIGNED (type
));
495 if ((code
== MULT_EXPR
&& !int_fits_type_p (const_oprnd
, new_type
))
496 || (code
== LSHIFT_EXPR
497 && compare_tree_int (const_oprnd
, TYPE_PRECISION (new_type
)) == 1))
500 /* Use NEW_TYPE for widening operation. */
501 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
503 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
504 /* Check if the already created pattern stmt is what we need. */
505 if (!is_gimple_assign (new_stmt
)
506 || gimple_assign_rhs_code (new_stmt
) != NOP_EXPR
507 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != new_type
)
510 stmts
->safe_push (def_stmt
);
511 *oprnd
= gimple_assign_lhs (new_stmt
);
515 /* Create a_T = (NEW_TYPE) a_t; */
516 *oprnd
= gimple_assign_rhs1 (def_stmt
);
517 new_oprnd
= make_ssa_name (new_type
, NULL
);
518 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
, *oprnd
,
520 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
521 stmts
->safe_push (def_stmt
);
525 *half_type
= new_type
;
530 /* Function vect_recog_widen_mult_pattern
532 Try to find the following pattern:
536 TYPE a_T, b_T, prod_T;
542 S5 prod_T = a_T * b_T;
544 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
546 Also detect unsigned cases:
550 unsigned TYPE u_prod_T;
551 TYPE a_T, b_T, prod_T;
557 S5 prod_T = a_T * b_T;
558 S6 u_prod_T = (unsigned TYPE) prod_T;
560 and multiplication by constants:
567 S5 prod_T = a_T * CONST;
569 A special case of multiplication by constants is when 'TYPE' is 4 times
570 bigger than 'type', but CONST fits an intermediate type 2 times smaller
571 than 'TYPE'. In that case we create an additional pattern stmt for S3
572 to create a variable of the intermediate type, and perform widen-mult
573 on the intermediate type as well:
577 TYPE a_T, prod_T, prod_T';
581 '--> a_it = (interm_type) a_t;
582 S5 prod_T = a_T * CONST;
583 '--> prod_T' = a_it w* CONST;
587 * STMTS: Contains a stmt from which the pattern search begins. In the
588 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
589 is detected. In case of unsigned widen-mult, the original stmt (S5) is
590 replaced with S6 in STMTS. In case of multiplication by a constant
591 of an intermediate type (the last case above), STMTS also contains S3
592 (inserted before S5).
596 * TYPE_IN: The type of the input arguments to the pattern.
598 * TYPE_OUT: The type of the output of this pattern.
600 * Return value: A new stmt that will be used to replace the sequence of
601 stmts that constitute the pattern. In this case it will be:
602 WIDEN_MULT <a_t, b_t>
603 If the result of WIDEN_MULT needs to be converted to a larger type, the
604 returned stmt will be this type conversion stmt.
608 vect_recog_widen_mult_pattern (vec
<gimple
> *stmts
,
609 tree
*type_in
, tree
*type_out
)
611 gimple last_stmt
= stmts
->pop ();
612 gimple def_stmt0
, def_stmt1
;
614 tree type
, half_type0
, half_type1
;
615 gimple new_stmt
= NULL
, pattern_stmt
= NULL
;
616 tree vectype
, vecitype
;
618 enum tree_code dummy_code
;
624 if (!is_gimple_assign (last_stmt
))
627 type
= gimple_expr_type (last_stmt
);
629 /* Starting from LAST_STMT, follow the defs of its uses in search
630 of the above pattern. */
632 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
635 oprnd0
= gimple_assign_rhs1 (last_stmt
);
636 oprnd1
= gimple_assign_rhs2 (last_stmt
);
637 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
638 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
641 /* Check argument 0. */
642 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
646 /* Check argument 1. */
647 op1_ok
= type_conversion_p (oprnd1
, last_stmt
, false, &half_type1
,
648 &def_stmt1
, &promotion
);
650 if (op1_ok
&& promotion
)
652 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
653 oprnd1
= gimple_assign_rhs1 (def_stmt1
);
657 if (TREE_CODE (oprnd1
) == INTEGER_CST
658 && TREE_CODE (half_type0
) == INTEGER_TYPE
659 && vect_handle_widen_op_by_const (last_stmt
, MULT_EXPR
, oprnd1
,
660 &oprnd0
, stmts
, type
,
661 &half_type0
, def_stmt0
))
663 half_type1
= half_type0
;
664 oprnd1
= fold_convert (half_type1
, oprnd1
);
670 /* If the two arguments have different sizes, convert the one with
671 the smaller type into the larger type. */
672 if (TYPE_PRECISION (half_type0
) != TYPE_PRECISION (half_type1
))
675 gimple def_stmt
= NULL
;
677 if (TYPE_PRECISION (half_type0
) < TYPE_PRECISION (half_type1
))
679 def_stmt
= def_stmt0
;
680 half_type0
= half_type1
;
685 def_stmt
= def_stmt1
;
686 half_type1
= half_type0
;
690 tree old_oprnd
= gimple_assign_rhs1 (def_stmt
);
691 tree new_oprnd
= make_ssa_name (half_type0
, NULL
);
692 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
693 old_oprnd
, NULL_TREE
);
697 /* Handle unsigned case. Look for
698 S6 u_prod_T = (unsigned TYPE) prod_T;
699 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
700 if (TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type0
))
706 if (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (half_type1
))
709 use_stmt
= vect_single_imm_use (last_stmt
);
710 if (!use_stmt
|| !is_gimple_assign (use_stmt
)
711 || gimple_assign_rhs_code (use_stmt
) != NOP_EXPR
)
714 use_lhs
= gimple_assign_lhs (use_stmt
);
715 use_type
= TREE_TYPE (use_lhs
);
716 if (!INTEGRAL_TYPE_P (use_type
)
717 || (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (use_type
))
718 || (TYPE_PRECISION (type
) != TYPE_PRECISION (use_type
)))
722 last_stmt
= use_stmt
;
725 if (!types_compatible_p (half_type0
, half_type1
))
728 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
729 to get an intermediate result of type ITYPE. In this case we need
730 to build a statement to convert this intermediate result to type TYPE. */
732 if (TYPE_PRECISION (type
) > TYPE_PRECISION (half_type0
) * 2)
733 itype
= build_nonstandard_integer_type
734 (GET_MODE_BITSIZE (TYPE_MODE (half_type0
)) * 2,
735 TYPE_UNSIGNED (type
));
737 /* Pattern detected. */
738 if (dump_enabled_p ())
739 dump_printf_loc (MSG_NOTE
, vect_location
,
740 "vect_recog_widen_mult_pattern: detected:\n");
742 /* Check target support */
743 vectype
= get_vectype_for_scalar_type (half_type0
);
744 vecitype
= get_vectype_for_scalar_type (itype
);
747 || !supportable_widening_operation (WIDEN_MULT_EXPR
, last_stmt
,
749 &dummy_code
, &dummy_code
,
750 &dummy_int
, &dummy_vec
))
754 *type_out
= get_vectype_for_scalar_type (type
);
756 /* Pattern supported. Create a stmt to be used to replace the pattern: */
757 var
= vect_recog_temp_ssa_var (itype
, NULL
);
758 pattern_stmt
= gimple_build_assign_with_ops (WIDEN_MULT_EXPR
, var
, oprnd0
,
761 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
762 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
763 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
764 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
766 /* If the original two operands have different sizes, we may need to convert
767 the smaller one into the larget type. If this is the case, at this point
768 the new stmt is already built. */
771 append_pattern_def_seq (stmt_vinfo
, new_stmt
);
772 stmt_vec_info new_stmt_info
773 = new_stmt_vec_info (new_stmt
, loop_vinfo
, bb_vinfo
);
774 set_vinfo_for_stmt (new_stmt
, new_stmt_info
);
775 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
778 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
779 the result of the widen-mult operation into type TYPE. */
782 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
783 stmt_vec_info pattern_stmt_info
784 = new_stmt_vec_info (pattern_stmt
, loop_vinfo
, bb_vinfo
);
785 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
786 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vecitype
;
788 = gimple_build_assign_with_ops (NOP_EXPR
,
789 vect_recog_temp_ssa_var (type
, NULL
),
790 gimple_assign_lhs (pattern_stmt
),
794 if (dump_enabled_p ())
795 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
797 stmts
->safe_push (last_stmt
);
802 /* Function vect_recog_pow_pattern
804 Try to find the following pattern:
808 with POW being one of pow, powf, powi, powif and N being
813 * LAST_STMT: A stmt from which the pattern search begins.
817 * TYPE_IN: The type of the input arguments to the pattern.
819 * TYPE_OUT: The type of the output of this pattern.
821 * Return value: A new stmt that will be used to replace the sequence of
822 stmts that constitute the pattern. In this case it will be:
829 vect_recog_pow_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
832 gimple last_stmt
= (*stmts
)[0];
833 tree fn
, base
, exp
= NULL
;
837 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
840 fn
= gimple_call_fndecl (last_stmt
);
841 if (fn
== NULL_TREE
|| DECL_BUILT_IN_CLASS (fn
) != BUILT_IN_NORMAL
)
844 switch (DECL_FUNCTION_CODE (fn
))
850 base
= gimple_call_arg (last_stmt
, 0);
851 exp
= gimple_call_arg (last_stmt
, 1);
852 if (TREE_CODE (exp
) != REAL_CST
853 && TREE_CODE (exp
) != INTEGER_CST
)
861 /* We now have a pow or powi builtin function call with a constant
864 *type_out
= NULL_TREE
;
866 /* Catch squaring. */
867 if ((tree_fits_shwi_p (exp
)
868 && tree_to_shwi (exp
) == 2)
869 || (TREE_CODE (exp
) == REAL_CST
870 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconst2
)))
872 *type_in
= TREE_TYPE (base
);
874 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
875 stmt
= gimple_build_assign_with_ops (MULT_EXPR
, var
, base
, base
);
879 /* Catch square root. */
880 if (TREE_CODE (exp
) == REAL_CST
881 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconsthalf
))
883 tree newfn
= mathfn_built_in (TREE_TYPE (base
), BUILT_IN_SQRT
);
884 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (base
));
887 gimple stmt
= gimple_build_call (newfn
, 1, base
);
888 if (vectorizable_function (stmt
, *type_in
, *type_in
)
891 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
892 gimple_call_set_lhs (stmt
, var
);
902 /* Function vect_recog_widen_sum_pattern
904 Try to find the following pattern:
907 TYPE x_T, sum = init;
909 sum_0 = phi <init, sum_1>
912 S3 sum_1 = x_T + sum_0;
914 where type 'TYPE' is at least double the size of type 'type', i.e - we're
915 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
916 a special case of a reduction computation.
920 * LAST_STMT: A stmt from which the pattern search begins. In the example,
921 when this function is called with S3, the pattern {S2,S3} will be detected.
925 * TYPE_IN: The type of the input arguments to the pattern.
927 * TYPE_OUT: The type of the output of this pattern.
929 * Return value: A new stmt that will be used to replace the sequence of
930 stmts that constitute the pattern. In this case it will be:
931 WIDEN_SUM <x_t, sum_0>
933 Note: The widening-sum idiom is a widening reduction pattern that is
934 vectorized without preserving all the intermediate results. It
935 produces only N/2 (widened) results (by summing up pairs of
936 intermediate results) rather than all N results. Therefore, we
937 cannot allow this pattern when we want to get all the results and in
938 the correct order (as is the case when this computation is in an
939 inner-loop nested in an outer-loop that us being vectorized). */
942 vect_recog_widen_sum_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
945 gimple stmt
, last_stmt
= (*stmts
)[0];
947 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
948 tree type
, half_type
;
950 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
958 loop
= LOOP_VINFO_LOOP (loop_info
);
960 if (!is_gimple_assign (last_stmt
))
963 type
= gimple_expr_type (last_stmt
);
965 /* Look for the following pattern
968 In which DX is at least double the size of X, and sum_1 has been
969 recognized as a reduction variable.
972 /* Starting from LAST_STMT, follow the defs of its uses in search
973 of the above pattern. */
975 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
978 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
)
981 oprnd0
= gimple_assign_rhs1 (last_stmt
);
982 oprnd1
= gimple_assign_rhs2 (last_stmt
);
983 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
984 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
987 /* So far so good. Since last_stmt was detected as a (summation) reduction,
988 we know that oprnd1 is the reduction variable (defined by a loop-header
989 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
990 Left to check that oprnd0 is defined by a cast from type 'type' to type
993 if (!type_conversion_p (oprnd0
, last_stmt
, true, &half_type
, &stmt
,
998 oprnd0
= gimple_assign_rhs1 (stmt
);
999 *type_in
= half_type
;
1002 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1003 var
= vect_recog_temp_ssa_var (type
, NULL
);
1004 pattern_stmt
= gimple_build_assign_with_ops (WIDEN_SUM_EXPR
, var
,
1007 if (dump_enabled_p ())
1009 dump_printf_loc (MSG_NOTE
, vect_location
,
1010 "vect_recog_widen_sum_pattern: detected: ");
1011 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1012 dump_printf (MSG_NOTE
, "\n");
1015 /* We don't allow changing the order of the computation in the inner-loop
1016 when doing outer-loop vectorization. */
1017 gcc_assert (!nested_in_vect_loop_p (loop
, last_stmt
));
1019 return pattern_stmt
;
1023 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1026 STMT - a statement to check.
1027 DEF - we support operations with two operands, one of which is constant.
1028 The other operand can be defined by a demotion operation, or by a
1029 previous statement in a sequence of over-promoted operations. In the
1030 later case DEF is used to replace that operand. (It is defined by a
1031 pattern statement we created for the previous statement in the
1035 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1036 NULL, it's the type of DEF.
1037 STMTS - additional pattern statements. If a pattern statement (type
1038 conversion) is created in this function, its original statement is
1042 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1043 operands to use in the new pattern statement for STMT (will be created
1044 in vect_recog_over_widening_pattern ()).
1045 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1046 statements for STMT: the first one is a type promotion and the second
1047 one is the operation itself. We return the type promotion statement
1048 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1049 the second pattern statement. */
1052 vect_operation_fits_smaller_type (gimple stmt
, tree def
, tree
*new_type
,
1053 tree
*op0
, tree
*op1
, gimple
*new_def_stmt
,
1056 enum tree_code code
;
1057 tree const_oprnd
, oprnd
;
1058 tree interm_type
= NULL_TREE
, half_type
, new_oprnd
, type
;
1059 gimple def_stmt
, new_stmt
;
1065 *new_def_stmt
= NULL
;
1067 if (!is_gimple_assign (stmt
))
1070 code
= gimple_assign_rhs_code (stmt
);
1071 if (code
!= LSHIFT_EXPR
&& code
!= RSHIFT_EXPR
1072 && code
!= BIT_IOR_EXPR
&& code
!= BIT_XOR_EXPR
&& code
!= BIT_AND_EXPR
)
1075 oprnd
= gimple_assign_rhs1 (stmt
);
1076 const_oprnd
= gimple_assign_rhs2 (stmt
);
1077 type
= gimple_expr_type (stmt
);
1079 if (TREE_CODE (oprnd
) != SSA_NAME
1080 || TREE_CODE (const_oprnd
) != INTEGER_CST
)
1083 /* If oprnd has other uses besides that in stmt we cannot mark it
1084 as being part of a pattern only. */
1085 if (!has_single_use (oprnd
))
1088 /* If we are in the middle of a sequence, we use DEF from a previous
1089 statement. Otherwise, OPRND has to be a result of type promotion. */
1092 half_type
= *new_type
;
1098 if (!type_conversion_p (oprnd
, stmt
, false, &half_type
, &def_stmt
,
1101 || !vect_same_loop_or_bb_p (stmt
, def_stmt
))
1105 /* Can we perform the operation on a smaller type? */
1111 if (!int_fits_type_p (const_oprnd
, half_type
))
1113 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1114 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1117 interm_type
= build_nonstandard_integer_type (
1118 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1119 if (!int_fits_type_p (const_oprnd
, interm_type
))
1126 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1127 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1130 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1131 (e.g., if the original value was char, the shift amount is at most 8
1132 if we want to use short). */
1133 if (compare_tree_int (const_oprnd
, TYPE_PRECISION (half_type
)) == 1)
1136 interm_type
= build_nonstandard_integer_type (
1137 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1139 if (!vect_supportable_shift (code
, interm_type
))
1145 if (vect_supportable_shift (code
, half_type
))
1148 /* Try intermediate type - HALF_TYPE is not supported. */
1149 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1152 interm_type
= build_nonstandard_integer_type (
1153 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1155 if (!vect_supportable_shift (code
, interm_type
))
1164 /* There are four possible cases:
1165 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1166 the first statement in the sequence)
1167 a. The original, HALF_TYPE, is not enough - we replace the promotion
1168 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1169 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1171 2. OPRND is defined by a pattern statement we created.
1172 a. Its type is not sufficient for the operation, we create a new stmt:
1173 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1174 this statement in NEW_DEF_STMT, and it is later put in
1175 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1176 b. OPRND is good to use in the new statement. */
1181 /* Replace the original type conversion HALF_TYPE->TYPE with
1182 HALF_TYPE->INTERM_TYPE. */
1183 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
1185 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
1186 /* Check if the already created pattern stmt is what we need. */
1187 if (!is_gimple_assign (new_stmt
)
1188 || gimple_assign_rhs_code (new_stmt
) != NOP_EXPR
1189 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != interm_type
)
1192 stmts
->safe_push (def_stmt
);
1193 oprnd
= gimple_assign_lhs (new_stmt
);
1197 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1198 oprnd
= gimple_assign_rhs1 (def_stmt
);
1199 new_oprnd
= make_ssa_name (interm_type
, NULL
);
1200 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
1202 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
1203 stmts
->safe_push (def_stmt
);
1209 /* Retrieve the operand before the type promotion. */
1210 oprnd
= gimple_assign_rhs1 (def_stmt
);
1217 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1218 new_oprnd
= make_ssa_name (interm_type
, NULL
);
1219 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
1222 *new_def_stmt
= new_stmt
;
1225 /* Otherwise, OPRND is already set. */
1229 *new_type
= interm_type
;
1231 *new_type
= half_type
;
1234 *op1
= fold_convert (*new_type
, const_oprnd
);
1240 /* Try to find a statement or a sequence of statements that can be performed
1244 TYPE x_T, res0_T, res1_T;
1247 S2 x_T = (TYPE) x_t;
1248 S3 res0_T = op (x_T, C0);
1249 S4 res1_T = op (res0_T, C1);
1250 S5 ... = () res1_T; - type demotion
1252 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1254 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1255 be 'type' or some intermediate type. For now, we expect S5 to be a type
1256 demotion operation. We also check that S3 and S4 have only one use. */
1259 vect_recog_over_widening_pattern (vec
<gimple
> *stmts
,
1260 tree
*type_in
, tree
*type_out
)
1262 gimple stmt
= stmts
->pop ();
1263 gimple pattern_stmt
= NULL
, new_def_stmt
, prev_stmt
= NULL
, use_stmt
= NULL
;
1264 tree op0
, op1
, vectype
= NULL_TREE
, use_lhs
, use_type
;
1265 tree var
= NULL_TREE
, new_type
= NULL_TREE
, new_oprnd
;
1272 if (!vinfo_for_stmt (stmt
)
1273 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt
)))
1276 new_def_stmt
= NULL
;
1277 if (!vect_operation_fits_smaller_type (stmt
, var
, &new_type
,
1278 &op0
, &op1
, &new_def_stmt
,
1287 /* STMT can be performed on a smaller type. Check its uses. */
1288 use_stmt
= vect_single_imm_use (stmt
);
1289 if (!use_stmt
|| !is_gimple_assign (use_stmt
))
1292 /* Create pattern statement for STMT. */
1293 vectype
= get_vectype_for_scalar_type (new_type
);
1297 /* We want to collect all the statements for which we create pattern
1298 statetments, except for the case when the last statement in the
1299 sequence doesn't have a corresponding pattern statement. In such
1300 case we associate the last pattern statement with the last statement
1301 in the sequence. Therefore, we only add the original statement to
1302 the list if we know that it is not the last. */
1304 stmts
->safe_push (prev_stmt
);
1306 var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1308 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt
), var
,
1310 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
1311 new_pattern_def_seq (vinfo_for_stmt (stmt
), new_def_stmt
);
1313 if (dump_enabled_p ())
1315 dump_printf_loc (MSG_NOTE
, vect_location
,
1316 "created pattern stmt: ");
1317 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1318 dump_printf (MSG_NOTE
, "\n");
1321 type
= gimple_expr_type (stmt
);
1328 /* We got a sequence. We expect it to end with a type demotion operation.
1329 Otherwise, we quit (for now). There are three possible cases: the
1330 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1331 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1332 NEW_TYPE differs (we create a new conversion statement). */
1333 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
1335 use_lhs
= gimple_assign_lhs (use_stmt
);
1336 use_type
= TREE_TYPE (use_lhs
);
1337 /* Support only type demotion or signedess change. */
1338 if (!INTEGRAL_TYPE_P (use_type
)
1339 || TYPE_PRECISION (type
) <= TYPE_PRECISION (use_type
))
1342 /* Check that NEW_TYPE is not bigger than the conversion result. */
1343 if (TYPE_PRECISION (new_type
) > TYPE_PRECISION (use_type
))
1346 if (TYPE_UNSIGNED (new_type
) != TYPE_UNSIGNED (use_type
)
1347 || TYPE_PRECISION (new_type
) != TYPE_PRECISION (use_type
))
1349 /* Create NEW_TYPE->USE_TYPE conversion. */
1350 new_oprnd
= make_ssa_name (use_type
, NULL
);
1351 pattern_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
1353 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt
)) = pattern_stmt
;
1355 *type_in
= get_vectype_for_scalar_type (new_type
);
1356 *type_out
= get_vectype_for_scalar_type (use_type
);
1358 /* We created a pattern statement for the last statement in the
1359 sequence, so we don't need to associate it with the pattern
1360 statement created for PREV_STMT. Therefore, we add PREV_STMT
1361 to the list in order to mark it later in vect_pattern_recog_1. */
1363 stmts
->safe_push (prev_stmt
);
1368 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt
))
1369 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt
));
1372 *type_out
= NULL_TREE
;
1375 stmts
->safe_push (use_stmt
);
1378 /* TODO: support general case, create a conversion to the correct type. */
1381 /* Pattern detected. */
1382 if (dump_enabled_p ())
1384 dump_printf_loc (MSG_NOTE
, vect_location
,
1385 "vect_recog_over_widening_pattern: detected: ");
1386 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1387 dump_printf (MSG_NOTE
, "\n");
1390 return pattern_stmt
;
1393 /* Detect widening shift pattern:
1399 S2 a_T = (TYPE) a_t;
1400 S3 res_T = a_T << CONST;
1402 where type 'TYPE' is at least double the size of type 'type'.
1404 Also detect cases where the shift result is immediately converted
1405 to another type 'result_type' that is no larger in size than 'TYPE'.
1406 In those cases we perform a widen-shift that directly results in
1407 'result_type', to avoid a possible over-widening situation:
1411 result_type res_result;
1414 S2 a_T = (TYPE) a_t;
1415 S3 res_T = a_T << CONST;
1416 S4 res_result = (result_type) res_T;
1417 '--> res_result' = a_t w<< CONST;
1419 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1420 create an additional pattern stmt for S2 to create a variable of an
1421 intermediate type, and perform widen-shift on the intermediate type:
1425 TYPE a_T, res_T, res_T';
1428 S2 a_T = (TYPE) a_t;
1429 '--> a_it = (interm_type) a_t;
1430 S3 res_T = a_T << CONST;
1431 '--> res_T' = a_it <<* CONST;
1435 * STMTS: Contains a stmt from which the pattern search begins.
1436 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1437 in STMTS. When an intermediate type is used and a pattern statement is
1438 created for S2, we also put S2 here (before S3).
1442 * TYPE_IN: The type of the input arguments to the pattern.
1444 * TYPE_OUT: The type of the output of this pattern.
1446 * Return value: A new stmt that will be used to replace the sequence of
1447 stmts that constitute the pattern. In this case it will be:
1448 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1451 vect_recog_widen_shift_pattern (vec
<gimple
> *stmts
,
1452 tree
*type_in
, tree
*type_out
)
1454 gimple last_stmt
= stmts
->pop ();
1456 tree oprnd0
, oprnd1
;
1457 tree type
, half_type0
;
1458 gimple pattern_stmt
;
1459 tree vectype
, vectype_out
= NULL_TREE
;
1461 enum tree_code dummy_code
;
1463 vec
<tree
> dummy_vec
;
1467 if (!is_gimple_assign (last_stmt
) || !vinfo_for_stmt (last_stmt
))
1470 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt
)))
1473 if (gimple_assign_rhs_code (last_stmt
) != LSHIFT_EXPR
)
1476 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1477 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1478 if (TREE_CODE (oprnd0
) != SSA_NAME
|| TREE_CODE (oprnd1
) != INTEGER_CST
)
1481 /* Check operand 0: it has to be defined by a type promotion. */
1482 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
1487 /* Check operand 1: has to be positive. We check that it fits the type
1488 in vect_handle_widen_op_by_const (). */
1489 if (tree_int_cst_compare (oprnd1
, size_zero_node
) <= 0)
1492 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
1493 type
= gimple_expr_type (last_stmt
);
1495 /* Check for subsequent conversion to another type. */
1496 use_stmt
= vect_single_imm_use (last_stmt
);
1497 if (use_stmt
&& is_gimple_assign (use_stmt
)
1498 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
))
1499 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
1501 tree use_lhs
= gimple_assign_lhs (use_stmt
);
1502 tree use_type
= TREE_TYPE (use_lhs
);
1504 if (INTEGRAL_TYPE_P (use_type
)
1505 && TYPE_PRECISION (use_type
) <= TYPE_PRECISION (type
))
1507 last_stmt
= use_stmt
;
1512 /* Check if this a widening operation. */
1513 if (!vect_handle_widen_op_by_const (last_stmt
, LSHIFT_EXPR
, oprnd1
,
1515 type
, &half_type0
, def_stmt0
))
1518 /* Pattern detected. */
1519 if (dump_enabled_p ())
1520 dump_printf_loc (MSG_NOTE
, vect_location
,
1521 "vect_recog_widen_shift_pattern: detected:\n");
1523 /* Check target support. */
1524 vectype
= get_vectype_for_scalar_type (half_type0
);
1525 vectype_out
= get_vectype_for_scalar_type (type
);
1529 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR
, last_stmt
,
1530 vectype_out
, vectype
,
1531 &dummy_code
, &dummy_code
,
1532 &dummy_int
, &dummy_vec
))
1536 *type_out
= vectype_out
;
1538 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1539 var
= vect_recog_temp_ssa_var (type
, NULL
);
1541 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR
, var
, oprnd0
, oprnd1
);
1543 if (dump_enabled_p ())
1544 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1546 stmts
->safe_push (last_stmt
);
1547 return pattern_stmt
;
1550 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1554 S0 a_t = b_t r<< c_t;
1558 * STMTS: Contains a stmt from which the pattern search begins,
1559 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1563 S2 e_t = d_t & (B - 1);
1564 S3 f_t = b_t << c_t;
1565 S4 g_t = b_t >> e_t;
1568 where B is element bitsize of type.
1572 * TYPE_IN: The type of the input arguments to the pattern.
1574 * TYPE_OUT: The type of the output of this pattern.
1576 * Return value: A new stmt that will be used to replace the rotate
1580 vect_recog_rotate_pattern (vec
<gimple
> *stmts
, tree
*type_in
, tree
*type_out
)
1582 gimple last_stmt
= stmts
->pop ();
1583 tree oprnd0
, oprnd1
, lhs
, var
, var1
, var2
, vectype
, type
, stype
, def
, def2
;
1584 gimple pattern_stmt
, def_stmt
;
1585 enum tree_code rhs_code
;
1586 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1587 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1588 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1589 enum vect_def_type dt
;
1590 optab optab1
, optab2
;
1591 edge ext_def
= NULL
;
1593 if (!is_gimple_assign (last_stmt
))
1596 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1606 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
1609 lhs
= gimple_assign_lhs (last_stmt
);
1610 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1611 type
= TREE_TYPE (oprnd0
);
1612 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1613 if (TREE_CODE (oprnd0
) != SSA_NAME
1614 || TYPE_PRECISION (TREE_TYPE (lhs
)) != TYPE_PRECISION (type
)
1615 || !INTEGRAL_TYPE_P (type
)
1616 || !TYPE_UNSIGNED (type
))
1619 if (!vect_is_simple_use (oprnd1
, last_stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
1623 if (dt
!= vect_internal_def
1624 && dt
!= vect_constant_def
1625 && dt
!= vect_external_def
)
1628 vectype
= get_vectype_for_scalar_type (type
);
1629 if (vectype
== NULL_TREE
)
1632 /* If vector/vector or vector/scalar rotate is supported by the target,
1633 don't do anything here. */
1634 optab1
= optab_for_tree_code (rhs_code
, vectype
, optab_vector
);
1636 && optab_handler (optab1
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1639 if (bb_vinfo
!= NULL
|| dt
!= vect_internal_def
)
1641 optab2
= optab_for_tree_code (rhs_code
, vectype
, optab_scalar
);
1643 && optab_handler (optab2
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1647 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1648 don't do anything here either. */
1649 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_vector
);
1650 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_vector
);
1652 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1654 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1656 if (bb_vinfo
== NULL
&& dt
== vect_internal_def
)
1658 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_scalar
);
1659 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_scalar
);
1661 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1663 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1668 *type_out
= vectype
;
1669 if (*type_in
== NULL_TREE
)
1672 if (dt
== vect_external_def
1673 && TREE_CODE (oprnd1
) == SSA_NAME
1676 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1677 ext_def
= loop_preheader_edge (loop
);
1678 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1
))
1680 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (oprnd1
));
1682 || !dominated_by_p (CDI_DOMINATORS
, ext_def
->dest
, bb
))
1688 if (TREE_CODE (oprnd1
) == INTEGER_CST
1689 || TYPE_MODE (TREE_TYPE (oprnd1
)) == TYPE_MODE (type
))
1691 else if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
1693 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
1694 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (type
)
1695 && TYPE_PRECISION (TREE_TYPE (rhs1
))
1696 == TYPE_PRECISION (type
))
1700 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
1701 if (def
== NULL_TREE
)
1703 def
= vect_recog_temp_ssa_var (type
, NULL
);
1704 def_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, def
, oprnd1
,
1709 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1710 gcc_assert (!new_bb
);
1713 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1715 stype
= TREE_TYPE (def
);
1717 if (TREE_CODE (def
) == INTEGER_CST
)
1719 if (!tree_fits_uhwi_p (def
)
1720 || tree_to_uhwi (def
) >= GET_MODE_PRECISION (TYPE_MODE (type
))
1721 || integer_zerop (def
))
1723 def2
= build_int_cst (stype
,
1724 GET_MODE_PRECISION (TYPE_MODE (type
))
1725 - tree_to_uhwi (def
));
1729 tree vecstype
= get_vectype_for_scalar_type (stype
);
1730 stmt_vec_info def_stmt_vinfo
;
1732 if (vecstype
== NULL_TREE
)
1734 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1735 def_stmt
= gimple_build_assign_with_ops (NEGATE_EXPR
, def2
, def
,
1740 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1741 gcc_assert (!new_bb
);
1745 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
1746 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1747 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
1748 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1751 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1753 = build_int_cst (stype
, GET_MODE_PRECISION (TYPE_MODE (stype
)) - 1);
1754 def_stmt
= gimple_build_assign_with_ops (BIT_AND_EXPR
, def2
,
1755 gimple_assign_lhs (def_stmt
),
1760 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1761 gcc_assert (!new_bb
);
1765 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
1766 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1767 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
1768 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1772 var1
= vect_recog_temp_ssa_var (type
, NULL
);
1773 def_stmt
= gimple_build_assign_with_ops (rhs_code
== LROTATE_EXPR
1774 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
1776 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1778 var2
= vect_recog_temp_ssa_var (type
, NULL
);
1779 def_stmt
= gimple_build_assign_with_ops (rhs_code
== LROTATE_EXPR
1780 ? RSHIFT_EXPR
: LSHIFT_EXPR
,
1781 var2
, oprnd0
, def2
);
1782 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1784 /* Pattern detected. */
1785 if (dump_enabled_p ())
1786 dump_printf_loc (MSG_NOTE
, vect_location
,
1787 "vect_recog_rotate_pattern: detected:\n");
1789 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1790 var
= vect_recog_temp_ssa_var (type
, NULL
);
1791 pattern_stmt
= gimple_build_assign_with_ops (BIT_IOR_EXPR
, var
, var1
, var2
);
1793 if (dump_enabled_p ())
1794 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1796 stmts
->safe_push (last_stmt
);
1797 return pattern_stmt
;
1800 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1808 S3 res_T = b_T op a_t;
1810 where type 'TYPE' is a type with different size than 'type',
1811 and op is <<, >> or rotate.
1816 TYPE b_T, c_T, res_T;
1819 S1 a_t = (type) c_T;
1821 S3 res_T = b_T op a_t;
1825 * STMTS: Contains a stmt from which the pattern search begins,
1826 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1827 with a shift/rotate which has same type on both operands, in the
1828 second case just b_T op c_T, in the first case with added cast
1829 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1833 * TYPE_IN: The type of the input arguments to the pattern.
1835 * TYPE_OUT: The type of the output of this pattern.
1837 * Return value: A new stmt that will be used to replace the shift/rotate
1841 vect_recog_vector_vector_shift_pattern (vec
<gimple
> *stmts
,
1842 tree
*type_in
, tree
*type_out
)
1844 gimple last_stmt
= stmts
->pop ();
1845 tree oprnd0
, oprnd1
, lhs
, var
;
1846 gimple pattern_stmt
, def_stmt
;
1847 enum tree_code rhs_code
;
1848 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1849 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1850 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1851 enum vect_def_type dt
;
1854 if (!is_gimple_assign (last_stmt
))
1857 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1869 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
1872 lhs
= gimple_assign_lhs (last_stmt
);
1873 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1874 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1875 if (TREE_CODE (oprnd0
) != SSA_NAME
1876 || TREE_CODE (oprnd1
) != SSA_NAME
1877 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
1878 || TYPE_PRECISION (TREE_TYPE (oprnd1
))
1879 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1
)))
1880 || TYPE_PRECISION (TREE_TYPE (lhs
))
1881 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
1884 if (!vect_is_simple_use (oprnd1
, last_stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
1888 if (dt
!= vect_internal_def
)
1891 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (oprnd0
));
1892 *type_out
= *type_in
;
1893 if (*type_in
== NULL_TREE
)
1897 if (gimple_assign_cast_p (def_stmt
))
1899 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
1900 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
1901 && TYPE_PRECISION (TREE_TYPE (rhs1
))
1902 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
1906 if (def
== NULL_TREE
)
1908 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
1909 def_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, def
, oprnd1
,
1911 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
1914 /* Pattern detected. */
1915 if (dump_enabled_p ())
1916 dump_printf_loc (MSG_NOTE
, vect_location
,
1917 "vect_recog_vector_vector_shift_pattern: detected:\n");
1919 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1920 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
1921 pattern_stmt
= gimple_build_assign_with_ops (rhs_code
, var
, oprnd0
, def
);
1923 if (dump_enabled_p ())
1924 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1926 stmts
->safe_push (last_stmt
);
1927 return pattern_stmt
;
1930 /* Detect a signed division by a constant that wouldn't be
1931 otherwise vectorized:
1937 where type 'type' is an integral type and N is a constant.
1939 Similarly handle modulo by a constant:
1945 * STMTS: Contains a stmt from which the pattern search begins,
1946 i.e. the division stmt. S1 is replaced by if N is a power
1947 of two constant and type is signed:
1948 S3 y_t = b_t < 0 ? N - 1 : 0;
1950 S1' a_t = x_t >> log2 (N);
1952 S4 is replaced if N is a power of two constant and
1953 type is signed by (where *_T temporaries have unsigned type):
1954 S9 y_T = b_t < 0 ? -1U : 0U;
1955 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1956 S7 z_t = (type) z_T;
1958 S5 x_t = w_t & (N - 1);
1959 S4' a_t = x_t - z_t;
1963 * TYPE_IN: The type of the input arguments to the pattern.
1965 * TYPE_OUT: The type of the output of this pattern.
1967 * Return value: A new stmt that will be used to replace the division
1968 S1 or modulo S4 stmt. */
1971 vect_recog_divmod_pattern (vec
<gimple
> *stmts
,
1972 tree
*type_in
, tree
*type_out
)
1974 gimple last_stmt
= stmts
->pop ();
1975 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
1976 gimple pattern_stmt
, def_stmt
;
1977 enum tree_code rhs_code
;
1978 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1979 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1980 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1983 int dummy_int
, prec
;
1984 stmt_vec_info def_stmt_vinfo
;
1986 if (!is_gimple_assign (last_stmt
))
1989 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1992 case TRUNC_DIV_EXPR
:
1993 case TRUNC_MOD_EXPR
:
1999 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2002 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2003 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2004 itype
= TREE_TYPE (oprnd0
);
2005 if (TREE_CODE (oprnd0
) != SSA_NAME
2006 || TREE_CODE (oprnd1
) != INTEGER_CST
2007 || TREE_CODE (itype
) != INTEGER_TYPE
2008 || TYPE_PRECISION (itype
) != GET_MODE_PRECISION (TYPE_MODE (itype
)))
2011 vectype
= get_vectype_for_scalar_type (itype
);
2012 if (vectype
== NULL_TREE
)
2015 /* If the target can handle vectorized division or modulo natively,
2016 don't attempt to optimize this. */
2017 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
2018 if (optab
!= unknown_optab
)
2020 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
2021 int icode
= (int) optab_handler (optab
, vec_mode
);
2022 if (icode
!= CODE_FOR_nothing
)
2026 prec
= TYPE_PRECISION (itype
);
2027 if (integer_pow2p (oprnd1
))
2029 if (TYPE_UNSIGNED (itype
) || tree_int_cst_sgn (oprnd1
) != 1)
2032 /* Pattern detected. */
2033 if (dump_enabled_p ())
2034 dump_printf_loc (MSG_NOTE
, vect_location
,
2035 "vect_recog_divmod_pattern: detected:\n");
2037 cond
= build2 (LT_EXPR
, boolean_type_node
, oprnd0
,
2038 build_int_cst (itype
, 0));
2039 if (rhs_code
== TRUNC_DIV_EXPR
)
2041 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
2044 = gimple_build_assign_with_ops (COND_EXPR
, var
, cond
,
2045 fold_build2 (MINUS_EXPR
, itype
,
2047 build_int_cst (itype
,
2049 build_int_cst (itype
, 0));
2050 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2051 var
= vect_recog_temp_ssa_var (itype
, NULL
);
2053 = gimple_build_assign_with_ops (PLUS_EXPR
, var
, oprnd0
,
2054 gimple_assign_lhs (def_stmt
));
2055 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2057 shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
2059 = gimple_build_assign_with_ops (RSHIFT_EXPR
,
2060 vect_recog_temp_ssa_var (itype
,
2067 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2068 if (compare_tree_int (oprnd1
, 2) == 0)
2070 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2072 = gimple_build_assign_with_ops (COND_EXPR
, signmask
, cond
,
2073 build_int_cst (itype
, 1),
2074 build_int_cst (itype
, 0));
2075 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2080 = build_nonstandard_integer_type (prec
, 1);
2081 tree vecutype
= get_vectype_for_scalar_type (utype
);
2083 = build_int_cst (utype
, GET_MODE_BITSIZE (TYPE_MODE (itype
))
2084 - tree_log2 (oprnd1
));
2085 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
2088 = gimple_build_assign_with_ops (COND_EXPR
, var
, cond
,
2089 build_int_cst (utype
, -1),
2090 build_int_cst (utype
, 0));
2092 = new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2093 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2094 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2095 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2096 var
= vect_recog_temp_ssa_var (utype
, NULL
);
2098 = gimple_build_assign_with_ops (RSHIFT_EXPR
, var
,
2099 gimple_assign_lhs (def_stmt
),
2102 = new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2103 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2104 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2105 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2106 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2108 = gimple_build_assign_with_ops (NOP_EXPR
, signmask
, var
,
2110 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2113 = gimple_build_assign_with_ops (PLUS_EXPR
,
2114 vect_recog_temp_ssa_var (itype
,
2117 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2119 = gimple_build_assign_with_ops (BIT_AND_EXPR
,
2120 vect_recog_temp_ssa_var (itype
,
2122 gimple_assign_lhs (def_stmt
),
2123 fold_build2 (MINUS_EXPR
, itype
,
2125 build_int_cst (itype
,
2127 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2130 = gimple_build_assign_with_ops (MINUS_EXPR
,
2131 vect_recog_temp_ssa_var (itype
,
2133 gimple_assign_lhs (def_stmt
),
2137 if (dump_enabled_p ())
2138 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
,
2141 stmts
->safe_push (last_stmt
);
2144 *type_out
= vectype
;
2145 return pattern_stmt
;
2148 if (prec
> HOST_BITS_PER_WIDE_INT
2149 || integer_zerop (oprnd1
))
2152 if (!can_mult_highpart_p (TYPE_MODE (vectype
), TYPE_UNSIGNED (itype
)))
2155 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2157 if (TYPE_UNSIGNED (itype
))
2159 unsigned HOST_WIDE_INT mh
, ml
;
2160 int pre_shift
, post_shift
;
2161 unsigned HOST_WIDE_INT d
= (TREE_INT_CST_LOW (oprnd1
)
2162 & GET_MODE_MASK (TYPE_MODE (itype
)));
2163 tree t1
, t2
, t3
, t4
;
2165 if (d
>= ((unsigned HOST_WIDE_INT
) 1 << (prec
- 1)))
2166 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2169 /* Find a suitable multiplier and right shift count
2170 instead of multiplying with D. */
2171 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
2173 /* If the suggested multiplier is more than SIZE bits, we can do better
2174 for even divisors, using an initial right shift. */
2175 if (mh
!= 0 && (d
& 1) == 0)
2177 pre_shift
= floor_log2 (d
& -d
);
2178 mh
= choose_multiplier (d
>> pre_shift
, prec
, prec
- pre_shift
,
2179 &ml
, &post_shift
, &dummy_int
);
2187 if (post_shift
- 1 >= prec
)
2190 /* t1 = oprnd0 h* ml;
2194 q = t4 >> (post_shift - 1); */
2195 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2197 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR
, t1
, oprnd0
,
2198 build_int_cst (itype
, ml
));
2199 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2201 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2203 = gimple_build_assign_with_ops (MINUS_EXPR
, t2
, oprnd0
, t1
);
2204 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2206 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2208 = gimple_build_assign_with_ops (RSHIFT_EXPR
, t3
, t2
,
2210 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2212 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2214 = gimple_build_assign_with_ops (PLUS_EXPR
, t4
, t1
, t3
);
2216 if (post_shift
!= 1)
2218 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2220 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2222 = gimple_build_assign_with_ops (RSHIFT_EXPR
, q
, t4
,
2223 build_int_cst (itype
,
2230 pattern_stmt
= def_stmt
;
2235 if (pre_shift
>= prec
|| post_shift
>= prec
)
2238 /* t1 = oprnd0 >> pre_shift;
2240 q = t2 >> post_shift; */
2243 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2245 = gimple_build_assign_with_ops (RSHIFT_EXPR
, t1
, oprnd0
,
2246 build_int_cst (NULL
,
2248 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2253 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2255 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR
, t2
, t1
,
2256 build_int_cst (itype
, ml
));
2260 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2262 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2264 = gimple_build_assign_with_ops (RSHIFT_EXPR
, q
, t2
,
2265 build_int_cst (itype
,
2271 pattern_stmt
= def_stmt
;
2276 unsigned HOST_WIDE_INT ml
;
2278 HOST_WIDE_INT d
= TREE_INT_CST_LOW (oprnd1
);
2279 unsigned HOST_WIDE_INT abs_d
;
2281 tree t1
, t2
, t3
, t4
;
2283 /* Give up for -1. */
2287 /* Since d might be INT_MIN, we have to cast to
2288 unsigned HOST_WIDE_INT before negating to avoid
2289 undefined signed overflow. */
2291 ? (unsigned HOST_WIDE_INT
) d
2292 : - (unsigned HOST_WIDE_INT
) d
);
2294 /* n rem d = n rem -d */
2295 if (rhs_code
== TRUNC_MOD_EXPR
&& d
< 0)
2298 oprnd1
= build_int_cst (itype
, abs_d
);
2300 else if (HOST_BITS_PER_WIDE_INT
>= prec
2301 && abs_d
== (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2302 /* This case is not handled correctly below. */
2305 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
, &post_shift
, &dummy_int
);
2306 if (ml
>= (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2309 ml
|= (~(unsigned HOST_WIDE_INT
) 0) << (prec
- 1);
2311 if (post_shift
>= prec
)
2314 /* t1 = oprnd0 h* ml; */
2315 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2317 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR
, t1
, oprnd0
,
2318 build_int_cst (itype
, ml
));
2322 /* t2 = t1 + oprnd0; */
2323 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2324 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2326 = gimple_build_assign_with_ops (PLUS_EXPR
, t2
, t1
, oprnd0
);
2333 /* t3 = t2 >> post_shift; */
2334 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2335 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2337 = gimple_build_assign_with_ops (RSHIFT_EXPR
, t3
, t2
,
2338 build_int_cst (itype
, post_shift
));
2343 double_int oprnd0_min
, oprnd0_max
;
2345 if (get_range_info (oprnd0
, &oprnd0_min
, &oprnd0_max
) == VR_RANGE
)
2347 if (!oprnd0_min
.is_negative ())
2349 else if (oprnd0_max
.is_negative ())
2353 if (msb
== 0 && d
>= 0)
2357 pattern_stmt
= def_stmt
;
2361 /* t4 = oprnd0 >> (prec - 1);
2362 or if we know from VRP that oprnd0 >= 0
2364 or if we know from VRP that oprnd0 < 0
2366 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2367 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2370 = gimple_build_assign_with_ops (INTEGER_CST
,
2371 t4
, build_int_cst (itype
, msb
),
2375 = gimple_build_assign_with_ops (RSHIFT_EXPR
, t4
, oprnd0
,
2376 build_int_cst (itype
, prec
- 1));
2377 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2379 /* q = t3 - t4; or q = t4 - t3; */
2380 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2382 = gimple_build_assign_with_ops (MINUS_EXPR
, q
, d
< 0 ? t4
: t3
,
2387 if (rhs_code
== TRUNC_MOD_EXPR
)
2391 /* We divided. Now finish by:
2394 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2396 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2398 = gimple_build_assign_with_ops (MULT_EXPR
, t1
, q
, oprnd1
);
2399 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2401 r
= vect_recog_temp_ssa_var (itype
, NULL
);
2403 = gimple_build_assign_with_ops (MINUS_EXPR
, r
, oprnd0
, t1
);
2406 /* Pattern detected. */
2407 if (dump_enabled_p ())
2409 dump_printf_loc (MSG_NOTE
, vect_location
,
2410 "vect_recog_divmod_pattern: detected: ");
2411 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
2412 dump_printf (MSG_NOTE
, "\n");
2415 stmts
->safe_push (last_stmt
);
2418 *type_out
= vectype
;
2419 return pattern_stmt
;
2422 /* Function vect_recog_mixed_size_cond_pattern
2424 Try to find the following pattern:
2429 S1 a_T = x_t CMP y_t ? b_T : c_T;
2431 where type 'TYPE' is an integral type which has different size
2432 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2433 than 'type', the constants need to fit into an integer type
2434 with the same width as 'type') or results of conversion from 'type'.
2438 * LAST_STMT: A stmt from which the pattern search begins.
2442 * TYPE_IN: The type of the input arguments to the pattern.
2444 * TYPE_OUT: The type of the output of this pattern.
2446 * Return value: A new stmt that will be used to replace the pattern.
2447 Additionally a def_stmt is added.
2449 a_it = x_t CMP y_t ? b_it : c_it;
2450 a_T = (TYPE) a_it; */
2453 vect_recog_mixed_size_cond_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
2456 gimple last_stmt
= (*stmts
)[0];
2457 tree cond_expr
, then_clause
, else_clause
;
2458 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
), def_stmt_info
;
2459 tree type
, vectype
, comp_vectype
, itype
= NULL_TREE
, vecitype
;
2460 enum machine_mode cmpmode
;
2461 gimple pattern_stmt
, def_stmt
;
2462 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2463 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2464 tree orig_type0
= NULL_TREE
, orig_type1
= NULL_TREE
;
2465 gimple def_stmt0
= NULL
, def_stmt1
= NULL
;
2467 tree comp_scalar_type
;
2469 if (!is_gimple_assign (last_stmt
)
2470 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
2471 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
2474 cond_expr
= gimple_assign_rhs1 (last_stmt
);
2475 then_clause
= gimple_assign_rhs2 (last_stmt
);
2476 else_clause
= gimple_assign_rhs3 (last_stmt
);
2478 if (!COMPARISON_CLASS_P (cond_expr
))
2481 comp_scalar_type
= TREE_TYPE (TREE_OPERAND (cond_expr
, 0));
2482 comp_vectype
= get_vectype_for_scalar_type (comp_scalar_type
);
2483 if (comp_vectype
== NULL_TREE
)
2486 type
= gimple_expr_type (last_stmt
);
2487 if (types_compatible_p (type
, comp_scalar_type
)
2488 || ((TREE_CODE (then_clause
) != INTEGER_CST
2489 || TREE_CODE (else_clause
) != INTEGER_CST
)
2490 && !INTEGRAL_TYPE_P (comp_scalar_type
))
2491 || !INTEGRAL_TYPE_P (type
))
2494 if ((TREE_CODE (then_clause
) != INTEGER_CST
2495 && !type_conversion_p (then_clause
, last_stmt
, false, &orig_type0
,
2496 &def_stmt0
, &promotion
))
2497 || (TREE_CODE (else_clause
) != INTEGER_CST
2498 && !type_conversion_p (else_clause
, last_stmt
, false, &orig_type1
,
2499 &def_stmt1
, &promotion
)))
2502 if (orig_type0
&& orig_type1
2503 && !types_compatible_p (orig_type0
, orig_type1
))
2508 if (!types_compatible_p (orig_type0
, comp_scalar_type
))
2510 then_clause
= gimple_assign_rhs1 (def_stmt0
);
2516 if (!types_compatible_p (orig_type1
, comp_scalar_type
))
2518 else_clause
= gimple_assign_rhs1 (def_stmt1
);
2522 cmpmode
= GET_MODE_INNER (TYPE_MODE (comp_vectype
));
2524 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) == GET_MODE_BITSIZE (cmpmode
))
2527 vectype
= get_vectype_for_scalar_type (type
);
2528 if (vectype
== NULL_TREE
)
2531 if (expand_vec_cond_expr_p (vectype
, comp_vectype
))
2534 if (itype
== NULL_TREE
)
2535 itype
= build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode
),
2536 TYPE_UNSIGNED (type
));
2538 if (itype
== NULL_TREE
2539 || GET_MODE_BITSIZE (TYPE_MODE (itype
)) != GET_MODE_BITSIZE (cmpmode
))
2542 vecitype
= get_vectype_for_scalar_type (itype
);
2543 if (vecitype
== NULL_TREE
)
2546 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
))
2549 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) > GET_MODE_BITSIZE (cmpmode
))
2551 if ((TREE_CODE (then_clause
) == INTEGER_CST
2552 && !int_fits_type_p (then_clause
, itype
))
2553 || (TREE_CODE (else_clause
) == INTEGER_CST
2554 && !int_fits_type_p (else_clause
, itype
)))
2559 = gimple_build_assign_with_ops (COND_EXPR
,
2560 vect_recog_temp_ssa_var (itype
, NULL
),
2561 unshare_expr (cond_expr
),
2562 fold_convert (itype
, then_clause
),
2563 fold_convert (itype
, else_clause
));
2565 = gimple_build_assign_with_ops (NOP_EXPR
,
2566 vect_recog_temp_ssa_var (type
, NULL
),
2567 gimple_assign_lhs (def_stmt
), NULL_TREE
);
2569 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2570 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2571 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
2572 STMT_VINFO_VECTYPE (def_stmt_info
) = vecitype
;
2573 *type_in
= vecitype
;
2574 *type_out
= vectype
;
2576 if (dump_enabled_p ())
2577 dump_printf_loc (MSG_NOTE
, vect_location
,
2578 "vect_recog_mixed_size_cond_pattern: detected:\n");
2580 return pattern_stmt
;
2584 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2585 true if bool VAR can be optimized that way. */
2588 check_bool_pattern (tree var
, loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
2591 enum vect_def_type dt
;
2593 enum tree_code rhs_code
;
2595 if (!vect_is_simple_use (var
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
, &def
,
2599 if (dt
!= vect_internal_def
)
2602 if (!is_gimple_assign (def_stmt
))
2605 if (!has_single_use (def
))
2608 rhs1
= gimple_assign_rhs1 (def_stmt
);
2609 rhs_code
= gimple_assign_rhs_code (def_stmt
);
2613 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2616 if ((TYPE_PRECISION (TREE_TYPE (rhs1
)) != 1
2617 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
2618 && TREE_CODE (TREE_TYPE (rhs1
)) != BOOLEAN_TYPE
)
2620 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2623 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2628 if (!check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
))
2630 return check_bool_pattern (gimple_assign_rhs2 (def_stmt
), loop_vinfo
,
2634 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
2636 tree vecitype
, comp_vectype
;
2638 /* If the comparison can throw, then is_gimple_condexpr will be
2639 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2640 if (stmt_could_throw_p (def_stmt
))
2643 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
2644 if (comp_vectype
== NULL_TREE
)
2647 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
2649 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
2651 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
2652 vecitype
= get_vectype_for_scalar_type (itype
);
2653 if (vecitype
== NULL_TREE
)
2657 vecitype
= comp_vectype
;
2658 return expand_vec_cond_expr_p (vecitype
, comp_vectype
);
2665 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2666 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2667 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2670 adjust_bool_pattern_cast (tree type
, tree var
)
2672 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (SSA_NAME_DEF_STMT (var
));
2673 gimple cast_stmt
, pattern_stmt
;
2675 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
));
2676 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
2677 new_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2679 = gimple_build_assign_with_ops (NOP_EXPR
,
2680 vect_recog_temp_ssa_var (type
, NULL
),
2681 gimple_assign_lhs (pattern_stmt
),
2683 STMT_VINFO_RELATED_STMT (stmt_vinfo
) = cast_stmt
;
2684 return gimple_assign_lhs (cast_stmt
);
2688 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2689 recursively. VAR is an SSA_NAME that should be transformed from bool
2690 to a wider integer type, OUT_TYPE is the desired final integer type of
2691 the whole pattern, TRUEVAL should be NULL unless optimizing
2692 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2693 in the then_clause, STMTS is where statements with added pattern stmts
2694 should be pushed to. */
2697 adjust_bool_pattern (tree var
, tree out_type
, tree trueval
,
2700 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2701 enum tree_code rhs_code
, def_rhs_code
;
2702 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
2704 gimple pattern_stmt
, def_stmt
;
2706 rhs1
= gimple_assign_rhs1 (stmt
);
2707 rhs2
= gimple_assign_rhs2 (stmt
);
2708 rhs_code
= gimple_assign_rhs_code (stmt
);
2709 loc
= gimple_location (stmt
);
2714 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2715 itype
= TREE_TYPE (irhs1
);
2717 = gimple_build_assign_with_ops (SSA_NAME
,
2718 vect_recog_temp_ssa_var (itype
, NULL
),
2723 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2724 itype
= TREE_TYPE (irhs1
);
2726 = gimple_build_assign_with_ops (BIT_XOR_EXPR
,
2727 vect_recog_temp_ssa_var (itype
, NULL
),
2728 irhs1
, build_int_cst (itype
, 1));
2732 /* Try to optimize x = y & (a < b ? 1 : 0); into
2733 x = (a < b ? y : 0);
2739 S1 a_b = x1 CMP1 y1;
2740 S2 b_b = x2 CMP2 y2;
2742 S4 d_T = (TYPE) c_b;
2744 we would normally emit:
2746 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2747 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2748 S3' c_T = a_T & b_T;
2751 but we can save one stmt by using the
2752 result of one of the COND_EXPRs in the other COND_EXPR and leave
2753 BIT_AND_EXPR stmt out:
2755 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2756 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2759 At least when VEC_COND_EXPR is implemented using masks
2760 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2761 computes the comparison masks and ands it, in one case with
2762 all ones vector, in the other case with a vector register.
2763 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2764 often more expensive. */
2765 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
2766 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
2767 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
2769 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
2770 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2771 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
2772 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
2775 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
2776 irhs2
= adjust_bool_pattern (rhs2
, out_type
, irhs1
, stmts
);
2777 tstmt
= stmts
->pop ();
2778 gcc_assert (tstmt
== def_stmt
);
2779 stmts
->quick_push (stmt
);
2780 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
2781 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
2782 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
2783 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
2787 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
2790 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
2791 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
2792 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
2794 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
2795 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
2796 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
2797 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
2800 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
2801 irhs1
= adjust_bool_pattern (rhs1
, out_type
, irhs2
, stmts
);
2802 tstmt
= stmts
->pop ();
2803 gcc_assert (tstmt
== def_stmt
);
2804 stmts
->quick_push (stmt
);
2805 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
2806 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
2807 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
2808 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
2812 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2818 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2819 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
2821 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
2822 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
2824 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
2825 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
2826 int out_prec
= TYPE_PRECISION (out_type
);
2827 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
2828 irhs2
= adjust_bool_pattern_cast (TREE_TYPE (irhs1
), rhs2
);
2829 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
2830 irhs1
= adjust_bool_pattern_cast (TREE_TYPE (irhs2
), rhs1
);
2833 irhs1
= adjust_bool_pattern_cast (out_type
, rhs1
);
2834 irhs2
= adjust_bool_pattern_cast (out_type
, rhs2
);
2837 itype
= TREE_TYPE (irhs1
);
2839 = gimple_build_assign_with_ops (rhs_code
,
2840 vect_recog_temp_ssa_var (itype
, NULL
),
2845 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
2846 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
2847 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
))
2848 || (TYPE_PRECISION (TREE_TYPE (rhs1
))
2849 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1
)))))
2851 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
2853 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
2856 itype
= TREE_TYPE (rhs1
);
2857 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
2858 if (trueval
== NULL_TREE
)
2859 trueval
= build_int_cst (itype
, 1);
2861 gcc_checking_assert (useless_type_conversion_p (itype
,
2862 TREE_TYPE (trueval
)));
2864 = gimple_build_assign_with_ops (COND_EXPR
,
2865 vect_recog_temp_ssa_var (itype
, NULL
),
2867 build_int_cst (itype
, 0));
2871 stmts
->safe_push (stmt
);
2872 gimple_set_location (pattern_stmt
, loc
);
2873 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
2874 return gimple_assign_lhs (pattern_stmt
);
2878 /* Function vect_recog_bool_pattern
2880 Try to find pattern like following:
2882 bool a_b, b_b, c_b, d_b, e_b;
2885 S1 a_b = x1 CMP1 y1;
2886 S2 b_b = x2 CMP2 y2;
2888 S4 d_b = x3 CMP3 y3;
2890 S6 f_T = (TYPE) e_b;
2892 where type 'TYPE' is an integral type.
2896 * LAST_STMT: A stmt at the end from which the pattern
2897 search begins, i.e. cast of a bool to
2902 * TYPE_IN: The type of the input arguments to the pattern.
2904 * TYPE_OUT: The type of the output of this pattern.
2906 * Return value: A new stmt that will be used to replace the pattern.
2908 Assuming size of TYPE is the same as size of all comparisons
2909 (otherwise some casts would be added where needed), the above
2910 sequence we create related pattern stmts:
2911 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2912 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2913 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2914 S5' e_T = c_T | d_T;
2917 Instead of the above S3' we could emit:
2918 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2919 S3' c_T = a_T | b_T;
2920 but the above is more efficient. */
2923 vect_recog_bool_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
2926 gimple last_stmt
= stmts
->pop ();
2927 enum tree_code rhs_code
;
2928 tree var
, lhs
, rhs
, vectype
;
2929 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2930 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2931 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2932 gimple pattern_stmt
;
2934 if (!is_gimple_assign (last_stmt
))
2937 var
= gimple_assign_rhs1 (last_stmt
);
2938 lhs
= gimple_assign_lhs (last_stmt
);
2940 if ((TYPE_PRECISION (TREE_TYPE (var
)) != 1
2941 || !TYPE_UNSIGNED (TREE_TYPE (var
)))
2942 && TREE_CODE (TREE_TYPE (var
)) != BOOLEAN_TYPE
)
2945 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2946 if (CONVERT_EXPR_CODE_P (rhs_code
))
2948 if (TREE_CODE (TREE_TYPE (lhs
)) != INTEGER_TYPE
2949 || TYPE_PRECISION (TREE_TYPE (lhs
)) == 1)
2951 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
2952 if (vectype
== NULL_TREE
)
2955 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
2958 rhs
= adjust_bool_pattern (var
, TREE_TYPE (lhs
), NULL_TREE
, stmts
);
2959 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
2960 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
2962 = gimple_build_assign_with_ops (SSA_NAME
, lhs
, rhs
, NULL_TREE
);
2965 = gimple_build_assign_with_ops (NOP_EXPR
, lhs
, rhs
, NULL_TREE
);
2966 *type_out
= vectype
;
2968 stmts
->safe_push (last_stmt
);
2969 if (dump_enabled_p ())
2970 dump_printf_loc (MSG_NOTE
, vect_location
,
2971 "vect_recog_bool_pattern: detected:\n");
2973 return pattern_stmt
;
2975 else if (rhs_code
== SSA_NAME
2976 && STMT_VINFO_DATA_REF (stmt_vinfo
))
2978 stmt_vec_info pattern_stmt_info
;
2979 vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
2980 gcc_assert (vectype
!= NULL_TREE
);
2981 if (!VECTOR_MODE_P (TYPE_MODE (vectype
)))
2983 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
2986 rhs
= adjust_bool_pattern (var
, TREE_TYPE (vectype
), NULL_TREE
, stmts
);
2987 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
2988 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
2990 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
2992 = gimple_build_assign_with_ops (NOP_EXPR
, rhs2
, rhs
, NULL_TREE
);
2993 new_pattern_def_seq (stmt_vinfo
, cast_stmt
);
2997 = gimple_build_assign_with_ops (SSA_NAME
, lhs
, rhs
, NULL_TREE
);
2998 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
,
3000 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3001 STMT_VINFO_DATA_REF (pattern_stmt_info
)
3002 = STMT_VINFO_DATA_REF (stmt_vinfo
);
3003 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info
)
3004 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo
);
3005 STMT_VINFO_DR_INIT (pattern_stmt_info
) = STMT_VINFO_DR_INIT (stmt_vinfo
);
3006 STMT_VINFO_DR_OFFSET (pattern_stmt_info
)
3007 = STMT_VINFO_DR_OFFSET (stmt_vinfo
);
3008 STMT_VINFO_DR_STEP (pattern_stmt_info
) = STMT_VINFO_DR_STEP (stmt_vinfo
);
3009 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info
)
3010 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo
);
3011 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo
)) = pattern_stmt
;
3012 *type_out
= vectype
;
3014 stmts
->safe_push (last_stmt
);
3015 if (dump_enabled_p ())
3016 dump_printf_loc (MSG_NOTE
, vect_location
,
3017 "vect_recog_bool_pattern: detected:\n");
3018 return pattern_stmt
;
3025 /* Mark statements that are involved in a pattern. */
3028 vect_mark_pattern_stmts (gimple orig_stmt
, gimple pattern_stmt
,
3029 tree pattern_vectype
)
3031 stmt_vec_info pattern_stmt_info
, def_stmt_info
;
3032 stmt_vec_info orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
3033 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (orig_stmt_info
);
3034 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (orig_stmt_info
);
3037 pattern_stmt_info
= vinfo_for_stmt (pattern_stmt
);
3038 if (pattern_stmt_info
== NULL
)
3040 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
,
3042 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3044 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt
));
3046 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt
;
3047 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
3048 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
3049 STMT_VINFO_VECTYPE (pattern_stmt_info
) = pattern_vectype
;
3050 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
3051 STMT_VINFO_RELATED_STMT (orig_stmt_info
) = pattern_stmt
;
3052 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
)
3053 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
3054 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
))
3056 gimple_stmt_iterator si
;
3057 for (si
= gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
));
3058 !gsi_end_p (si
); gsi_next (&si
))
3060 def_stmt
= gsi_stmt (si
);
3061 def_stmt_info
= vinfo_for_stmt (def_stmt
);
3062 if (def_stmt_info
== NULL
)
3064 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
,
3066 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
3068 gimple_set_bb (def_stmt
, gimple_bb (orig_stmt
));
3069 STMT_VINFO_RELATED_STMT (def_stmt_info
) = orig_stmt
;
3070 STMT_VINFO_DEF_TYPE (def_stmt_info
) = vect_internal_def
;
3071 if (STMT_VINFO_VECTYPE (def_stmt_info
) == NULL_TREE
)
3072 STMT_VINFO_VECTYPE (def_stmt_info
) = pattern_vectype
;
3077 /* Function vect_pattern_recog_1
3080 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3081 computation pattern.
3082 STMT: A stmt from which the pattern search should start.
3084 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3085 expression that computes the same functionality and can be used to
3086 replace the sequence of stmts that are involved in the pattern.
3089 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3090 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3091 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3092 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3093 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3094 to the available target pattern.
3096 This function also does some bookkeeping, as explained in the documentation
3097 for vect_recog_pattern. */
3100 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func
,
3101 gimple_stmt_iterator si
,
3102 vec
<gimple
> *stmts_to_replace
)
3104 gimple stmt
= gsi_stmt (si
), pattern_stmt
;
3105 stmt_vec_info stmt_info
;
3106 loop_vec_info loop_vinfo
;
3107 tree pattern_vectype
;
3108 tree type_in
, type_out
;
3109 enum tree_code code
;
3113 stmts_to_replace
->truncate (0);
3114 stmts_to_replace
->quick_push (stmt
);
3115 pattern_stmt
= (* vect_recog_func
) (stmts_to_replace
, &type_in
, &type_out
);
3119 stmt
= stmts_to_replace
->last ();
3120 stmt_info
= vinfo_for_stmt (stmt
);
3121 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3123 if (VECTOR_MODE_P (TYPE_MODE (type_in
)))
3125 /* No need to check target support (already checked by the pattern
3126 recognition function). */
3127 pattern_vectype
= type_out
? type_out
: type_in
;
3131 enum machine_mode vec_mode
;
3132 enum insn_code icode
;
3135 /* Check target support */
3136 type_in
= get_vectype_for_scalar_type (type_in
);
3140 type_out
= get_vectype_for_scalar_type (type_out
);
3145 pattern_vectype
= type_out
;
3147 if (is_gimple_assign (pattern_stmt
))
3148 code
= gimple_assign_rhs_code (pattern_stmt
);
3151 gcc_assert (is_gimple_call (pattern_stmt
));
3155 optab
= optab_for_tree_code (code
, type_in
, optab_default
);
3156 vec_mode
= TYPE_MODE (type_in
);
3158 || (icode
= optab_handler (optab
, vec_mode
)) == CODE_FOR_nothing
3159 || (insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (type_out
)))
3163 /* Found a vectorizable pattern. */
3164 if (dump_enabled_p ())
3166 dump_printf_loc (MSG_NOTE
, vect_location
,
3167 "pattern recognized: ");
3168 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3169 dump_printf (MSG_NOTE
, "\n");
3172 /* Mark the stmts that are involved in the pattern. */
3173 vect_mark_pattern_stmts (stmt
, pattern_stmt
, pattern_vectype
);
3175 /* Patterns cannot be vectorized using SLP, because they change the order of
3178 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
, next
)
3180 LOOP_VINFO_REDUCTIONS (loop_vinfo
).ordered_remove (i
);
3182 /* It is possible that additional pattern stmts are created and inserted in
3183 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3184 relevant statements. */
3185 for (i
= 0; stmts_to_replace
->iterate (i
, &stmt
)
3186 && (unsigned) i
< (stmts_to_replace
->length () - 1);
3189 stmt_info
= vinfo_for_stmt (stmt
);
3190 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
3191 if (dump_enabled_p ())
3193 dump_printf_loc (MSG_NOTE
, vect_location
,
3194 "additional pattern stmt: ");
3195 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3196 dump_printf (MSG_NOTE
, "\n");
3199 vect_mark_pattern_stmts (stmt
, pattern_stmt
, NULL_TREE
);
3204 /* Function vect_pattern_recog
3207 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3210 Output - for each computation idiom that is detected we create a new stmt
3211 that provides the same functionality and that can be vectorized. We
3212 also record some information in the struct_stmt_info of the relevant
3213 stmts, as explained below:
3215 At the entry to this function we have the following stmts, with the
3216 following initial value in the STMT_VINFO fields:
3218 stmt in_pattern_p related_stmt vec_stmt
3219 S1: a_i = .... - - -
3220 S2: a_2 = ..use(a_i).. - - -
3221 S3: a_1 = ..use(a_2).. - - -
3222 S4: a_0 = ..use(a_1).. - - -
3223 S5: ... = ..use(a_0).. - - -
3225 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3226 represented by a single stmt. We then:
3227 - create a new stmt S6 equivalent to the pattern (the stmt is not
3228 inserted into the code)
3229 - fill in the STMT_VINFO fields as follows:
3231 in_pattern_p related_stmt vec_stmt
3232 S1: a_i = .... - - -
3233 S2: a_2 = ..use(a_i).. - - -
3234 S3: a_1 = ..use(a_2).. - - -
3235 S4: a_0 = ..use(a_1).. true S6 -
3236 '---> S6: a_new = .... - S4 -
3237 S5: ... = ..use(a_0).. - - -
3239 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3240 to each other through the RELATED_STMT field).
3242 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3243 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3244 remain irrelevant unless used by stmts other than S4.
3246 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3247 (because they are marked as irrelevant). It will vectorize S6, and record
3248 a pointer to the new vector stmt VS6 from S6 (as usual).
3249 S4 will be skipped, and S5 will be vectorized as usual:
3251 in_pattern_p related_stmt vec_stmt
3252 S1: a_i = .... - - -
3253 S2: a_2 = ..use(a_i).. - - -
3254 S3: a_1 = ..use(a_2).. - - -
3255 > VS6: va_new = .... - - -
3256 S4: a_0 = ..use(a_1).. true S6 VS6
3257 '---> S6: a_new = .... - S4 VS6
3258 > VS5: ... = ..vuse(va_new).. - - -
3259 S5: ... = ..use(a_0).. - - -
3261 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3262 elsewhere), and we'll end up with:
3265 VS5: ... = ..vuse(va_new)..
3267 In case of more than one pattern statements, e.g., widen-mult with
3271 S2 a_T = (TYPE) a_t;
3272 '--> S3: a_it = (interm_type) a_t;
3273 S4 prod_T = a_T * CONST;
3274 '--> S5: prod_T' = a_it w* CONST;
3276 there may be other users of a_T outside the pattern. In that case S2 will
3277 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3278 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3279 be recorded in S3. */
3282 vect_pattern_recog (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
3287 gimple_stmt_iterator si
;
3289 vect_recog_func_ptr vect_recog_func
;
3290 auto_vec
<gimple
, 1> stmts_to_replace
;
3293 if (dump_enabled_p ())
3294 dump_printf_loc (MSG_NOTE
, vect_location
,
3295 "=== vect_pattern_recog ===\n");
3299 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3300 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
3301 nbbs
= loop
->num_nodes
;
3305 bbs
= &BB_VINFO_BB (bb_vinfo
);
3309 /* Scan through the loop stmts, applying the pattern recognition
3310 functions starting at each stmt visited: */
3311 for (i
= 0; i
< nbbs
; i
++)
3313 basic_block bb
= bbs
[i
];
3314 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
3316 if (bb_vinfo
&& (stmt
= gsi_stmt (si
))
3317 && vinfo_for_stmt (stmt
)
3318 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
3321 /* Scan over all generic vect_recog_xxx_pattern functions. */
3322 for (j
= 0; j
< NUM_PATTERNS
; j
++)
3324 vect_recog_func
= vect_vect_recog_func_ptrs
[j
];
3325 vect_pattern_recog_1 (vect_recog_func
, si
,