1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Nuzman <dorit@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-flow.h"
36 #include "tree-data-ref.h"
37 #include "tree-vectorizer.h"
38 #include "recog.h" /* FIXME: for insn_data */
39 #include "diagnostic-core.h"
42 /* Pattern recognition functions */
43 static gimple
vect_recog_widen_sum_pattern (VEC (gimple
, heap
) **, tree
*,
45 static gimple
vect_recog_widen_mult_pattern (VEC (gimple
, heap
) **, tree
*,
47 static gimple
vect_recog_dot_prod_pattern (VEC (gimple
, heap
) **, tree
*,
49 static gimple
vect_recog_pow_pattern (VEC (gimple
, heap
) **, tree
*, tree
*);
50 static gimple
vect_recog_over_widening_pattern (VEC (gimple
, heap
) **, tree
*,
52 static gimple
vect_recog_widen_shift_pattern (VEC (gimple
, heap
) **,
54 static gimple
vect_recog_vector_vector_shift_pattern (VEC (gimple
, heap
) **,
56 static gimple
vect_recog_divmod_pattern (VEC (gimple
, heap
) **,
58 static gimple
vect_recog_mixed_size_cond_pattern (VEC (gimple
, heap
) **,
60 static gimple
vect_recog_bool_pattern (VEC (gimple
, heap
) **, tree
*, tree
*);
61 static vect_recog_func_ptr vect_vect_recog_func_ptrs
[NUM_PATTERNS
] = {
62 vect_recog_widen_mult_pattern
,
63 vect_recog_widen_sum_pattern
,
64 vect_recog_dot_prod_pattern
,
65 vect_recog_pow_pattern
,
66 vect_recog_widen_shift_pattern
,
67 vect_recog_over_widening_pattern
,
68 vect_recog_vector_vector_shift_pattern
,
69 vect_recog_divmod_pattern
,
70 vect_recog_mixed_size_cond_pattern
,
71 vect_recog_bool_pattern
};
74 append_pattern_def_seq (stmt_vec_info stmt_info
, gimple stmt
)
76 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
81 new_pattern_def_seq (stmt_vec_info stmt_info
, gimple stmt
)
83 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
84 append_pattern_def_seq (stmt_info
, stmt
);
87 /* Check whether STMT2 is in the same loop or basic block as STMT1.
88 Which of the two applies depends on whether we're currently doing
89 loop-based or basic-block-based vectorization, as determined by
90 the vinfo_for_stmt for STMT1 (which must be defined).
92 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
93 to be defined as well. */
96 vect_same_loop_or_bb_p (gimple stmt1
, gimple stmt2
)
98 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt1
);
99 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
100 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
102 if (!gimple_bb (stmt2
))
107 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
108 if (!flow_bb_inside_loop_p (loop
, gimple_bb (stmt2
)))
113 if (gimple_bb (stmt2
) != BB_VINFO_BB (bb_vinfo
)
114 || gimple_code (stmt2
) == GIMPLE_PHI
)
118 gcc_assert (vinfo_for_stmt (stmt2
));
122 /* If the LHS of DEF_STMT has a single use, and that statement is
123 in the same loop or basic block, return it. */
126 vect_single_imm_use (gimple def_stmt
)
128 tree lhs
= gimple_assign_lhs (def_stmt
);
132 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
135 if (!vect_same_loop_or_bb_p (def_stmt
, use_stmt
))
141 /* Check whether NAME, an ssa-name used in USE_STMT,
142 is a result of a type promotion or demotion, such that:
143 DEF_STMT: NAME = NOP (name0)
144 where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
145 If CHECK_SIGN is TRUE, check that either both types are signed or both are
149 type_conversion_p (tree name
, gimple use_stmt
, bool check_sign
,
150 tree
*orig_type
, gimple
*def_stmt
, bool *promotion
)
154 loop_vec_info loop_vinfo
;
155 stmt_vec_info stmt_vinfo
;
156 tree type
= TREE_TYPE (name
);
158 enum vect_def_type dt
;
160 bb_vec_info bb_vinfo
;
162 stmt_vinfo
= vinfo_for_stmt (use_stmt
);
163 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
164 bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
165 if (!vect_is_simple_use (name
, use_stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
169 if (dt
!= vect_internal_def
170 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
176 if (!is_gimple_assign (*def_stmt
))
179 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt
)))
182 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
184 *orig_type
= TREE_TYPE (oprnd0
);
185 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*orig_type
)
186 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*orig_type
)) && check_sign
))
189 if (TYPE_PRECISION (type
) >= (TYPE_PRECISION (*orig_type
) * 2))
191 else if (TYPE_PRECISION (*orig_type
) >= (TYPE_PRECISION (type
) * 2))
196 if (!vect_is_simple_use (oprnd0
, *def_stmt
, loop_vinfo
,
197 bb_vinfo
, &dummy_gimple
, &dummy
, &dt
))
203 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
204 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
207 vect_recog_temp_ssa_var (tree type
, gimple stmt
)
209 return make_temp_ssa_name (type
, stmt
, "patt");
212 /* Function vect_recog_dot_prod_pattern
214 Try to find the following pattern:
220 sum_0 = phi <init, sum_1>
223 S3 x_T = (TYPE1) x_t;
224 S4 y_T = (TYPE1) y_t;
226 [S6 prod = (TYPE2) prod; #optional]
227 S7 sum_1 = prod + sum_0;
229 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
230 same size of 'TYPE1' or bigger. This is a special case of a reduction
235 * STMTS: Contains a stmt from which the pattern search begins. In the
236 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
241 * TYPE_IN: The type of the input arguments to the pattern.
243 * TYPE_OUT: The type of the output of this pattern.
245 * Return value: A new stmt that will be used to replace the sequence of
246 stmts that constitute the pattern. In this case it will be:
247 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
249 Note: The dot-prod idiom is a widening reduction pattern that is
250 vectorized without preserving all the intermediate results. It
251 produces only N/2 (widened) results (by summing up pairs of
252 intermediate results) rather than all N results. Therefore, we
253 cannot allow this pattern when we want to get all the results and in
254 the correct order (as is the case when this computation is in an
255 inner-loop nested in an outer-loop that us being vectorized). */
258 vect_recog_dot_prod_pattern (VEC (gimple
, heap
) **stmts
, tree
*type_in
,
261 gimple stmt
, last_stmt
= VEC_index (gimple
, *stmts
, 0);
263 tree oprnd00
, oprnd01
;
264 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
265 tree type
, half_type
;
268 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
276 loop
= LOOP_VINFO_LOOP (loop_info
);
278 if (!is_gimple_assign (last_stmt
))
281 type
= gimple_expr_type (last_stmt
);
283 /* Look for the following pattern
287 DDPROD = (TYPE2) DPROD;
288 sum_1 = DDPROD + sum_0;
290 - DX is double the size of X
291 - DY is double the size of Y
292 - DX, DY, DPROD all have the same type
293 - sum is the same size of DPROD or bigger
294 - sum has been recognized as a reduction variable.
296 This is equivalent to:
297 DPROD = X w* Y; #widen mult
298 sum_1 = DPROD w+ sum_0; #widen summation
300 DPROD = X w* Y; #widen mult
301 sum_1 = DPROD + sum_0; #summation
304 /* Starting from LAST_STMT, follow the defs of its uses in search
305 of the above pattern. */
307 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
310 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
312 /* Has been detected as widening-summation? */
314 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
315 type
= gimple_expr_type (stmt
);
316 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
318 oprnd0
= gimple_assign_rhs1 (stmt
);
319 oprnd1
= gimple_assign_rhs2 (stmt
);
320 half_type
= TREE_TYPE (oprnd0
);
326 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
)
328 oprnd0
= gimple_assign_rhs1 (last_stmt
);
329 oprnd1
= gimple_assign_rhs2 (last_stmt
);
330 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
331 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
335 if (type_conversion_p (oprnd0
, stmt
, true, &half_type
, &def_stmt
,
340 oprnd0
= gimple_assign_rhs1 (stmt
);
346 /* So far so good. Since last_stmt was detected as a (summation) reduction,
347 we know that oprnd1 is the reduction variable (defined by a loop-header
348 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
349 Left to check that oprnd0 is defined by a (widen_)mult_expr */
350 if (TREE_CODE (oprnd0
) != SSA_NAME
)
353 prod_type
= half_type
;
354 stmt
= SSA_NAME_DEF_STMT (oprnd0
);
356 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
357 if (!gimple_bb (stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
360 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
361 inside the loop (in case we are analyzing an outer-loop). */
362 if (!is_gimple_assign (stmt
))
364 stmt_vinfo
= vinfo_for_stmt (stmt
);
365 gcc_assert (stmt_vinfo
);
366 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
368 if (gimple_assign_rhs_code (stmt
) != MULT_EXPR
)
370 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
372 /* Has been detected as a widening multiplication? */
374 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
375 if (gimple_assign_rhs_code (stmt
) != WIDEN_MULT_EXPR
)
377 stmt_vinfo
= vinfo_for_stmt (stmt
);
378 gcc_assert (stmt_vinfo
);
379 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_internal_def
);
380 oprnd00
= gimple_assign_rhs1 (stmt
);
381 oprnd01
= gimple_assign_rhs2 (stmt
);
385 tree half_type0
, half_type1
;
389 oprnd0
= gimple_assign_rhs1 (stmt
);
390 oprnd1
= gimple_assign_rhs2 (stmt
);
391 if (!types_compatible_p (TREE_TYPE (oprnd0
), prod_type
)
392 || !types_compatible_p (TREE_TYPE (oprnd1
), prod_type
))
394 if (!type_conversion_p (oprnd0
, stmt
, true, &half_type0
, &def_stmt
,
398 oprnd00
= gimple_assign_rhs1 (def_stmt
);
399 if (!type_conversion_p (oprnd0
, stmt
, true, &half_type1
, &def_stmt
,
403 oprnd01
= gimple_assign_rhs1 (def_stmt
);
404 if (!types_compatible_p (half_type0
, half_type1
))
406 if (TYPE_PRECISION (prod_type
) != TYPE_PRECISION (half_type0
) * 2)
410 half_type
= TREE_TYPE (oprnd00
);
411 *type_in
= half_type
;
414 /* Pattern detected. Create a stmt to be used to replace the pattern: */
415 var
= vect_recog_temp_ssa_var (type
, NULL
);
416 pattern_stmt
= gimple_build_assign_with_ops (DOT_PROD_EXPR
, var
,
417 oprnd00
, oprnd01
, oprnd1
);
419 if (dump_enabled_p ())
421 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
422 "vect_recog_dot_prod_pattern: detected: ");
423 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS
, TDF_SLIM
, pattern_stmt
, 0);
426 /* We don't allow changing the order of the computation in the inner-loop
427 when doing outer-loop vectorization. */
428 gcc_assert (!nested_in_vect_loop_p (loop
, last_stmt
));
434 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
437 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
438 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
440 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
441 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
442 that satisfies the above restrictions, we can perform a widening opeartion
443 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
444 with a_it = (interm_type) a_t; */
447 vect_handle_widen_op_by_const (gimple stmt
, enum tree_code code
,
448 tree const_oprnd
, tree
*oprnd
,
449 VEC (gimple
, heap
) **stmts
, tree type
,
450 tree
*half_type
, gimple def_stmt
)
452 tree new_type
, new_oprnd
;
455 if (code
!= MULT_EXPR
&& code
!= LSHIFT_EXPR
)
458 if (((code
== MULT_EXPR
&& int_fits_type_p (const_oprnd
, *half_type
))
459 || (code
== LSHIFT_EXPR
460 && compare_tree_int (const_oprnd
, TYPE_PRECISION (*half_type
))
462 && TYPE_PRECISION (type
) == (TYPE_PRECISION (*half_type
) * 2))
464 /* CONST_OPRND is a constant of HALF_TYPE. */
465 *oprnd
= gimple_assign_rhs1 (def_stmt
);
469 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (*half_type
) * 4))
472 if (!vect_same_loop_or_bb_p (stmt
, def_stmt
))
475 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
476 a type 2 times bigger than HALF_TYPE. */
477 new_type
= build_nonstandard_integer_type (TYPE_PRECISION (type
) / 2,
478 TYPE_UNSIGNED (type
));
479 if ((code
== MULT_EXPR
&& !int_fits_type_p (const_oprnd
, new_type
))
480 || (code
== LSHIFT_EXPR
481 && compare_tree_int (const_oprnd
, TYPE_PRECISION (new_type
)) == 1))
484 /* Use NEW_TYPE for widening operation. */
485 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
487 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
488 /* Check if the already created pattern stmt is what we need. */
489 if (!is_gimple_assign (new_stmt
)
490 || gimple_assign_rhs_code (new_stmt
) != NOP_EXPR
491 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != new_type
)
494 VEC_safe_push (gimple
, heap
, *stmts
, def_stmt
);
495 *oprnd
= gimple_assign_lhs (new_stmt
);
499 /* Create a_T = (NEW_TYPE) a_t; */
500 *oprnd
= gimple_assign_rhs1 (def_stmt
);
501 new_oprnd
= make_ssa_name (new_type
, NULL
);
502 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
, *oprnd
,
504 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
505 VEC_safe_push (gimple
, heap
, *stmts
, def_stmt
);
509 *half_type
= new_type
;
514 /* Function vect_recog_widen_mult_pattern
516 Try to find the following pattern:
519 TYPE a_T, b_T, prod_T;
525 S5 prod_T = a_T * b_T;
527 where type 'TYPE' is at least double the size of type 'type'.
529 Also detect unsigned cases:
531 unsigned type a_t, b_t;
532 unsigned TYPE u_prod_T;
533 TYPE a_T, b_T, prod_T;
539 S5 prod_T = a_T * b_T;
540 S6 u_prod_T = (unsigned TYPE) prod_T;
542 and multiplication by constants:
549 S5 prod_T = a_T * CONST;
551 A special case of multiplication by constants is when 'TYPE' is 4 times
552 bigger than 'type', but CONST fits an intermediate type 2 times smaller
553 than 'TYPE'. In that case we create an additional pattern stmt for S3
554 to create a variable of the intermediate type, and perform widen-mult
555 on the intermediate type as well:
559 TYPE a_T, prod_T, prod_T';
563 '--> a_it = (interm_type) a_t;
564 S5 prod_T = a_T * CONST;
565 '--> prod_T' = a_it w* CONST;
569 * STMTS: Contains a stmt from which the pattern search begins. In the
570 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
571 is detected. In case of unsigned widen-mult, the original stmt (S5) is
572 replaced with S6 in STMTS. In case of multiplication by a constant
573 of an intermediate type (the last case above), STMTS also contains S3
574 (inserted before S5).
578 * TYPE_IN: The type of the input arguments to the pattern.
580 * TYPE_OUT: The type of the output of this pattern.
582 * Return value: A new stmt that will be used to replace the sequence of
583 stmts that constitute the pattern. In this case it will be:
584 WIDEN_MULT <a_t, b_t>
588 vect_recog_widen_mult_pattern (VEC (gimple
, heap
) **stmts
,
589 tree
*type_in
, tree
*type_out
)
591 gimple last_stmt
= VEC_pop (gimple
, *stmts
);
592 gimple def_stmt0
, def_stmt1
;
594 tree type
, half_type0
, half_type1
;
596 tree vectype
, vectype_out
= NULL_TREE
;
598 enum tree_code dummy_code
;
600 VEC (tree
, heap
) *dummy_vec
;
604 if (!is_gimple_assign (last_stmt
))
607 type
= gimple_expr_type (last_stmt
);
609 /* Starting from LAST_STMT, follow the defs of its uses in search
610 of the above pattern. */
612 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
615 oprnd0
= gimple_assign_rhs1 (last_stmt
);
616 oprnd1
= gimple_assign_rhs2 (last_stmt
);
617 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
618 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
621 /* Check argument 0. */
622 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
626 /* Check argument 1. */
627 op1_ok
= type_conversion_p (oprnd1
, last_stmt
, false, &half_type1
,
628 &def_stmt1
, &promotion
);
630 if (op1_ok
&& promotion
)
632 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
633 oprnd1
= gimple_assign_rhs1 (def_stmt1
);
637 if (TREE_CODE (oprnd1
) == INTEGER_CST
638 && TREE_CODE (half_type0
) == INTEGER_TYPE
639 && vect_handle_widen_op_by_const (last_stmt
, MULT_EXPR
, oprnd1
,
640 &oprnd0
, stmts
, type
,
641 &half_type0
, def_stmt0
))
642 half_type1
= half_type0
;
647 /* Handle unsigned case. Look for
648 S6 u_prod_T = (unsigned TYPE) prod_T;
649 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
650 if (TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type0
))
656 if (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (half_type1
))
659 use_stmt
= vect_single_imm_use (last_stmt
);
660 if (!use_stmt
|| !is_gimple_assign (use_stmt
)
661 || gimple_assign_rhs_code (use_stmt
) != NOP_EXPR
)
664 use_lhs
= gimple_assign_lhs (use_stmt
);
665 use_type
= TREE_TYPE (use_lhs
);
666 if (!INTEGRAL_TYPE_P (use_type
)
667 || (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (use_type
))
668 || (TYPE_PRECISION (type
) != TYPE_PRECISION (use_type
)))
672 last_stmt
= use_stmt
;
675 if (!types_compatible_p (half_type0
, half_type1
))
678 /* Pattern detected. */
679 if (dump_enabled_p ())
680 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
681 "vect_recog_widen_mult_pattern: detected: ");
683 /* Check target support */
684 vectype
= get_vectype_for_scalar_type (half_type0
);
685 vectype_out
= get_vectype_for_scalar_type (type
);
688 || !supportable_widening_operation (WIDEN_MULT_EXPR
, last_stmt
,
689 vectype_out
, vectype
,
690 &dummy_code
, &dummy_code
,
691 &dummy_int
, &dummy_vec
))
695 *type_out
= vectype_out
;
697 /* Pattern supported. Create a stmt to be used to replace the pattern: */
698 var
= vect_recog_temp_ssa_var (type
, NULL
);
699 pattern_stmt
= gimple_build_assign_with_ops (WIDEN_MULT_EXPR
, var
, oprnd0
,
702 if (dump_enabled_p ())
703 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
705 VEC_safe_push (gimple
, heap
, *stmts
, last_stmt
);
710 /* Function vect_recog_pow_pattern
712 Try to find the following pattern:
716 with POW being one of pow, powf, powi, powif and N being
721 * LAST_STMT: A stmt from which the pattern search begins.
725 * TYPE_IN: The type of the input arguments to the pattern.
727 * TYPE_OUT: The type of the output of this pattern.
729 * Return value: A new stmt that will be used to replace the sequence of
730 stmts that constitute the pattern. In this case it will be:
737 vect_recog_pow_pattern (VEC (gimple
, heap
) **stmts
, tree
*type_in
,
740 gimple last_stmt
= VEC_index (gimple
, *stmts
, 0);
741 tree fn
, base
, exp
= NULL
;
745 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
748 fn
= gimple_call_fndecl (last_stmt
);
749 if (fn
== NULL_TREE
|| DECL_BUILT_IN_CLASS (fn
) != BUILT_IN_NORMAL
)
752 switch (DECL_FUNCTION_CODE (fn
))
758 base
= gimple_call_arg (last_stmt
, 0);
759 exp
= gimple_call_arg (last_stmt
, 1);
760 if (TREE_CODE (exp
) != REAL_CST
761 && TREE_CODE (exp
) != INTEGER_CST
)
769 /* We now have a pow or powi builtin function call with a constant
772 *type_out
= NULL_TREE
;
774 /* Catch squaring. */
775 if ((host_integerp (exp
, 0)
776 && tree_low_cst (exp
, 0) == 2)
777 || (TREE_CODE (exp
) == REAL_CST
778 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconst2
)))
780 *type_in
= TREE_TYPE (base
);
782 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
783 stmt
= gimple_build_assign_with_ops (MULT_EXPR
, var
, base
, base
);
787 /* Catch square root. */
788 if (TREE_CODE (exp
) == REAL_CST
789 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconsthalf
))
791 tree newfn
= mathfn_built_in (TREE_TYPE (base
), BUILT_IN_SQRT
);
792 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (base
));
795 gimple stmt
= gimple_build_call (newfn
, 1, base
);
796 if (vectorizable_function (stmt
, *type_in
, *type_in
)
799 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
800 gimple_call_set_lhs (stmt
, var
);
810 /* Function vect_recog_widen_sum_pattern
812 Try to find the following pattern:
815 TYPE x_T, sum = init;
817 sum_0 = phi <init, sum_1>
820 S3 sum_1 = x_T + sum_0;
822 where type 'TYPE' is at least double the size of type 'type', i.e - we're
823 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
824 a special case of a reduction computation.
828 * LAST_STMT: A stmt from which the pattern search begins. In the example,
829 when this function is called with S3, the pattern {S2,S3} will be detected.
833 * TYPE_IN: The type of the input arguments to the pattern.
835 * TYPE_OUT: The type of the output of this pattern.
837 * Return value: A new stmt that will be used to replace the sequence of
838 stmts that constitute the pattern. In this case it will be:
839 WIDEN_SUM <x_t, sum_0>
841 Note: The widening-sum idiom is a widening reduction pattern that is
842 vectorized without preserving all the intermediate results. It
843 produces only N/2 (widened) results (by summing up pairs of
844 intermediate results) rather than all N results. Therefore, we
845 cannot allow this pattern when we want to get all the results and in
846 the correct order (as is the case when this computation is in an
847 inner-loop nested in an outer-loop that us being vectorized). */
850 vect_recog_widen_sum_pattern (VEC (gimple
, heap
) **stmts
, tree
*type_in
,
853 gimple stmt
, last_stmt
= VEC_index (gimple
, *stmts
, 0);
855 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
856 tree type
, half_type
;
858 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
866 loop
= LOOP_VINFO_LOOP (loop_info
);
868 if (!is_gimple_assign (last_stmt
))
871 type
= gimple_expr_type (last_stmt
);
873 /* Look for the following pattern
876 In which DX is at least double the size of X, and sum_1 has been
877 recognized as a reduction variable.
880 /* Starting from LAST_STMT, follow the defs of its uses in search
881 of the above pattern. */
883 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
886 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
)
889 oprnd0
= gimple_assign_rhs1 (last_stmt
);
890 oprnd1
= gimple_assign_rhs2 (last_stmt
);
891 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
892 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
895 /* So far so good. Since last_stmt was detected as a (summation) reduction,
896 we know that oprnd1 is the reduction variable (defined by a loop-header
897 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
898 Left to check that oprnd0 is defined by a cast from type 'type' to type
901 if (!type_conversion_p (oprnd0
, last_stmt
, true, &half_type
, &stmt
,
906 oprnd0
= gimple_assign_rhs1 (stmt
);
907 *type_in
= half_type
;
910 /* Pattern detected. Create a stmt to be used to replace the pattern: */
911 var
= vect_recog_temp_ssa_var (type
, NULL
);
912 pattern_stmt
= gimple_build_assign_with_ops (WIDEN_SUM_EXPR
, var
,
915 if (dump_enabled_p ())
917 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
918 "vect_recog_widen_sum_pattern: detected: ");
919 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS
, TDF_SLIM
, pattern_stmt
, 0);
922 /* We don't allow changing the order of the computation in the inner-loop
923 when doing outer-loop vectorization. */
924 gcc_assert (!nested_in_vect_loop_p (loop
, last_stmt
));
930 /* Return TRUE if the operation in STMT can be performed on a smaller type.
933 STMT - a statement to check.
934 DEF - we support operations with two operands, one of which is constant.
935 The other operand can be defined by a demotion operation, or by a
936 previous statement in a sequence of over-promoted operations. In the
937 later case DEF is used to replace that operand. (It is defined by a
938 pattern statement we created for the previous statement in the
942 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
943 NULL, it's the type of DEF.
944 STMTS - additional pattern statements. If a pattern statement (type
945 conversion) is created in this function, its original statement is
949 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
950 operands to use in the new pattern statement for STMT (will be created
951 in vect_recog_over_widening_pattern ()).
952 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
953 statements for STMT: the first one is a type promotion and the second
954 one is the operation itself. We return the type promotion statement
955 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
956 the second pattern statement. */
959 vect_operation_fits_smaller_type (gimple stmt
, tree def
, tree
*new_type
,
960 tree
*op0
, tree
*op1
, gimple
*new_def_stmt
,
961 VEC (gimple
, heap
) **stmts
)
964 tree const_oprnd
, oprnd
;
965 tree interm_type
= NULL_TREE
, half_type
, new_oprnd
, type
;
966 gimple def_stmt
, new_stmt
;
972 *new_def_stmt
= NULL
;
974 if (!is_gimple_assign (stmt
))
977 code
= gimple_assign_rhs_code (stmt
);
978 if (code
!= LSHIFT_EXPR
&& code
!= RSHIFT_EXPR
979 && code
!= BIT_IOR_EXPR
&& code
!= BIT_XOR_EXPR
&& code
!= BIT_AND_EXPR
)
982 oprnd
= gimple_assign_rhs1 (stmt
);
983 const_oprnd
= gimple_assign_rhs2 (stmt
);
984 type
= gimple_expr_type (stmt
);
986 if (TREE_CODE (oprnd
) != SSA_NAME
987 || TREE_CODE (const_oprnd
) != INTEGER_CST
)
990 /* If oprnd has other uses besides that in stmt we cannot mark it
991 as being part of a pattern only. */
992 if (!has_single_use (oprnd
))
995 /* If we are in the middle of a sequence, we use DEF from a previous
996 statement. Otherwise, OPRND has to be a result of type promotion. */
999 half_type
= *new_type
;
1005 if (!type_conversion_p (oprnd
, stmt
, false, &half_type
, &def_stmt
,
1008 || !vect_same_loop_or_bb_p (stmt
, def_stmt
))
1012 /* Can we perform the operation on a smaller type? */
1018 if (!int_fits_type_p (const_oprnd
, half_type
))
1020 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1021 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1024 interm_type
= build_nonstandard_integer_type (
1025 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1026 if (!int_fits_type_p (const_oprnd
, interm_type
))
1033 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1034 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1037 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1038 (e.g., if the original value was char, the shift amount is at most 8
1039 if we want to use short). */
1040 if (compare_tree_int (const_oprnd
, TYPE_PRECISION (half_type
)) == 1)
1043 interm_type
= build_nonstandard_integer_type (
1044 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1046 if (!vect_supportable_shift (code
, interm_type
))
1052 if (vect_supportable_shift (code
, half_type
))
1055 /* Try intermediate type - HALF_TYPE is not supported. */
1056 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1059 interm_type
= build_nonstandard_integer_type (
1060 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1062 if (!vect_supportable_shift (code
, interm_type
))
1071 /* There are four possible cases:
1072 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1073 the first statement in the sequence)
1074 a. The original, HALF_TYPE, is not enough - we replace the promotion
1075 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1076 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1078 2. OPRND is defined by a pattern statement we created.
1079 a. Its type is not sufficient for the operation, we create a new stmt:
1080 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1081 this statement in NEW_DEF_STMT, and it is later put in
1082 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1083 b. OPRND is good to use in the new statement. */
1088 /* Replace the original type conversion HALF_TYPE->TYPE with
1089 HALF_TYPE->INTERM_TYPE. */
1090 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
1092 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
1093 /* Check if the already created pattern stmt is what we need. */
1094 if (!is_gimple_assign (new_stmt
)
1095 || gimple_assign_rhs_code (new_stmt
) != NOP_EXPR
1096 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != interm_type
)
1099 VEC_safe_push (gimple
, heap
, *stmts
, def_stmt
);
1100 oprnd
= gimple_assign_lhs (new_stmt
);
1104 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1105 oprnd
= gimple_assign_rhs1 (def_stmt
);
1106 new_oprnd
= make_ssa_name (interm_type
, NULL
);
1107 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
1109 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
1110 VEC_safe_push (gimple
, heap
, *stmts
, def_stmt
);
1116 /* Retrieve the operand before the type promotion. */
1117 oprnd
= gimple_assign_rhs1 (def_stmt
);
1124 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1125 new_oprnd
= make_ssa_name (interm_type
, NULL
);
1126 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
1129 *new_def_stmt
= new_stmt
;
1132 /* Otherwise, OPRND is already set. */
1136 *new_type
= interm_type
;
1138 *new_type
= half_type
;
1141 *op1
= fold_convert (*new_type
, const_oprnd
);
1147 /* Try to find a statement or a sequence of statements that can be performed
1151 TYPE x_T, res0_T, res1_T;
1154 S2 x_T = (TYPE) x_t;
1155 S3 res0_T = op (x_T, C0);
1156 S4 res1_T = op (res0_T, C1);
1157 S5 ... = () res1_T; - type demotion
1159 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1161 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1162 be 'type' or some intermediate type. For now, we expect S5 to be a type
1163 demotion operation. We also check that S3 and S4 have only one use. */
1166 vect_recog_over_widening_pattern (VEC (gimple
, heap
) **stmts
,
1167 tree
*type_in
, tree
*type_out
)
1169 gimple stmt
= VEC_pop (gimple
, *stmts
);
1170 gimple pattern_stmt
= NULL
, new_def_stmt
, prev_stmt
= NULL
, use_stmt
= NULL
;
1171 tree op0
, op1
, vectype
= NULL_TREE
, use_lhs
, use_type
;
1172 tree var
= NULL_TREE
, new_type
= NULL_TREE
, new_oprnd
;
1179 if (!vinfo_for_stmt (stmt
)
1180 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt
)))
1183 new_def_stmt
= NULL
;
1184 if (!vect_operation_fits_smaller_type (stmt
, var
, &new_type
,
1185 &op0
, &op1
, &new_def_stmt
,
1194 /* STMT can be performed on a smaller type. Check its uses. */
1195 use_stmt
= vect_single_imm_use (stmt
);
1196 if (!use_stmt
|| !is_gimple_assign (use_stmt
))
1199 /* Create pattern statement for STMT. */
1200 vectype
= get_vectype_for_scalar_type (new_type
);
1204 /* We want to collect all the statements for which we create pattern
1205 statetments, except for the case when the last statement in the
1206 sequence doesn't have a corresponding pattern statement. In such
1207 case we associate the last pattern statement with the last statement
1208 in the sequence. Therefore, we only add the original statement to
1209 the list if we know that it is not the last. */
1211 VEC_safe_push (gimple
, heap
, *stmts
, prev_stmt
);
1213 var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1215 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt
), var
,
1217 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
1218 new_pattern_def_seq (vinfo_for_stmt (stmt
), new_def_stmt
);
1220 if (dump_enabled_p ())
1222 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
1223 "created pattern stmt: ");
1224 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS
, TDF_SLIM
, pattern_stmt
, 0);
1227 type
= gimple_expr_type (stmt
);
1234 /* We got a sequence. We expect it to end with a type demotion operation.
1235 Otherwise, we quit (for now). There are three possible cases: the
1236 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1237 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1238 NEW_TYPE differs (we create a new conversion statement). */
1239 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
1241 use_lhs
= gimple_assign_lhs (use_stmt
);
1242 use_type
= TREE_TYPE (use_lhs
);
1243 /* Support only type demotion or signedess change. */
1244 if (!INTEGRAL_TYPE_P (use_type
)
1245 || TYPE_PRECISION (type
) <= TYPE_PRECISION (use_type
))
1248 /* Check that NEW_TYPE is not bigger than the conversion result. */
1249 if (TYPE_PRECISION (new_type
) > TYPE_PRECISION (use_type
))
1252 if (TYPE_UNSIGNED (new_type
) != TYPE_UNSIGNED (use_type
)
1253 || TYPE_PRECISION (new_type
) != TYPE_PRECISION (use_type
))
1255 /* Create NEW_TYPE->USE_TYPE conversion. */
1256 new_oprnd
= make_ssa_name (use_type
, NULL
);
1257 pattern_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
1259 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt
)) = pattern_stmt
;
1261 *type_in
= get_vectype_for_scalar_type (new_type
);
1262 *type_out
= get_vectype_for_scalar_type (use_type
);
1264 /* We created a pattern statement for the last statement in the
1265 sequence, so we don't need to associate it with the pattern
1266 statement created for PREV_STMT. Therefore, we add PREV_STMT
1267 to the list in order to mark it later in vect_pattern_recog_1. */
1269 VEC_safe_push (gimple
, heap
, *stmts
, prev_stmt
);
1274 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt
))
1275 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt
));
1278 *type_out
= NULL_TREE
;
1281 VEC_safe_push (gimple
, heap
, *stmts
, use_stmt
);
1284 /* TODO: support general case, create a conversion to the correct type. */
1287 /* Pattern detected. */
1288 if (dump_enabled_p ())
1290 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
1291 "vect_recog_over_widening_pattern: detected: ");
1292 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS
, TDF_SLIM
, pattern_stmt
, 0);
1295 return pattern_stmt
;
1298 /* Detect widening shift pattern:
1304 S2 a_T = (TYPE) a_t;
1305 S3 res_T = a_T << CONST;
1307 where type 'TYPE' is at least double the size of type 'type'.
1309 Also detect cases where the shift result is immediately converted
1310 to another type 'result_type' that is no larger in size than 'TYPE'.
1311 In those cases we perform a widen-shift that directly results in
1312 'result_type', to avoid a possible over-widening situation:
1316 result_type res_result;
1319 S2 a_T = (TYPE) a_t;
1320 S3 res_T = a_T << CONST;
1321 S4 res_result = (result_type) res_T;
1322 '--> res_result' = a_t w<< CONST;
1324 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1325 create an additional pattern stmt for S2 to create a variable of an
1326 intermediate type, and perform widen-shift on the intermediate type:
1330 TYPE a_T, res_T, res_T';
1333 S2 a_T = (TYPE) a_t;
1334 '--> a_it = (interm_type) a_t;
1335 S3 res_T = a_T << CONST;
1336 '--> res_T' = a_it <<* CONST;
1340 * STMTS: Contains a stmt from which the pattern search begins.
1341 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1342 in STMTS. When an intermediate type is used and a pattern statement is
1343 created for S2, we also put S2 here (before S3).
1347 * TYPE_IN: The type of the input arguments to the pattern.
1349 * TYPE_OUT: The type of the output of this pattern.
1351 * Return value: A new stmt that will be used to replace the sequence of
1352 stmts that constitute the pattern. In this case it will be:
1353 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1356 vect_recog_widen_shift_pattern (VEC (gimple
, heap
) **stmts
,
1357 tree
*type_in
, tree
*type_out
)
1359 gimple last_stmt
= VEC_pop (gimple
, *stmts
);
1361 tree oprnd0
, oprnd1
;
1362 tree type
, half_type0
;
1363 gimple pattern_stmt
;
1364 tree vectype
, vectype_out
= NULL_TREE
;
1366 enum tree_code dummy_code
;
1368 VEC (tree
, heap
) * dummy_vec
;
1372 if (!is_gimple_assign (last_stmt
) || !vinfo_for_stmt (last_stmt
))
1375 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt
)))
1378 if (gimple_assign_rhs_code (last_stmt
) != LSHIFT_EXPR
)
1381 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1382 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1383 if (TREE_CODE (oprnd0
) != SSA_NAME
|| TREE_CODE (oprnd1
) != INTEGER_CST
)
1386 /* Check operand 0: it has to be defined by a type promotion. */
1387 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
1392 /* Check operand 1: has to be positive. We check that it fits the type
1393 in vect_handle_widen_op_by_const (). */
1394 if (tree_int_cst_compare (oprnd1
, size_zero_node
) <= 0)
1397 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
1398 type
= gimple_expr_type (last_stmt
);
1400 /* Check for subsequent conversion to another type. */
1401 use_stmt
= vect_single_imm_use (last_stmt
);
1402 if (use_stmt
&& is_gimple_assign (use_stmt
)
1403 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
))
1404 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
1406 tree use_lhs
= gimple_assign_lhs (use_stmt
);
1407 tree use_type
= TREE_TYPE (use_lhs
);
1409 if (INTEGRAL_TYPE_P (use_type
)
1410 && TYPE_PRECISION (use_type
) <= TYPE_PRECISION (type
))
1412 last_stmt
= use_stmt
;
1417 /* Check if this a widening operation. */
1418 if (!vect_handle_widen_op_by_const (last_stmt
, LSHIFT_EXPR
, oprnd1
,
1420 type
, &half_type0
, def_stmt0
))
1423 /* Pattern detected. */
1424 if (dump_enabled_p ())
1425 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
1426 "vect_recog_widen_shift_pattern: detected: ");
1428 /* Check target support. */
1429 vectype
= get_vectype_for_scalar_type (half_type0
);
1430 vectype_out
= get_vectype_for_scalar_type (type
);
1434 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR
, last_stmt
,
1435 vectype_out
, vectype
,
1436 &dummy_code
, &dummy_code
,
1437 &dummy_int
, &dummy_vec
))
1441 *type_out
= vectype_out
;
1443 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1444 var
= vect_recog_temp_ssa_var (type
, NULL
);
1446 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR
, var
, oprnd0
, oprnd1
);
1448 if (dump_enabled_p ())
1449 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1451 VEC_safe_push (gimple
, heap
, *stmts
, last_stmt
);
1452 return pattern_stmt
;
1455 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1463 S3 res_T = b_T op a_t;
1465 where type 'TYPE' is a type with different size than 'type',
1466 and op is <<, >> or rotate.
1471 TYPE b_T, c_T, res_T;
1474 S1 a_t = (type) c_T;
1476 S3 res_T = b_T op a_t;
1480 * STMTS: Contains a stmt from which the pattern search begins,
1481 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1482 with a shift/rotate which has same type on both operands, in the
1483 second case just b_T op c_T, in the first case with added cast
1484 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1488 * TYPE_IN: The type of the input arguments to the pattern.
1490 * TYPE_OUT: The type of the output of this pattern.
1492 * Return value: A new stmt that will be used to replace the shift/rotate
1496 vect_recog_vector_vector_shift_pattern (VEC (gimple
, heap
) **stmts
,
1497 tree
*type_in
, tree
*type_out
)
1499 gimple last_stmt
= VEC_pop (gimple
, *stmts
);
1500 tree oprnd0
, oprnd1
, lhs
, var
;
1501 gimple pattern_stmt
, def_stmt
;
1502 enum tree_code rhs_code
;
1503 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1504 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1505 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1506 enum vect_def_type dt
;
1509 if (!is_gimple_assign (last_stmt
))
1512 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1524 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
1527 lhs
= gimple_assign_lhs (last_stmt
);
1528 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1529 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1530 if (TREE_CODE (oprnd0
) != SSA_NAME
1531 || TREE_CODE (oprnd1
) != SSA_NAME
1532 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
1533 || TYPE_PRECISION (TREE_TYPE (oprnd1
))
1534 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1
)))
1535 || TYPE_PRECISION (TREE_TYPE (lhs
))
1536 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
1539 if (!vect_is_simple_use (oprnd1
, last_stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
1543 if (dt
!= vect_internal_def
)
1546 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (oprnd0
));
1547 *type_out
= *type_in
;
1548 if (*type_in
== NULL_TREE
)
1552 if (gimple_assign_cast_p (def_stmt
))
1554 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
1555 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
1556 && TYPE_PRECISION (TREE_TYPE (rhs1
))
1557 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
1561 if (def
== NULL_TREE
)
1563 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
1564 def_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, def
, oprnd1
,
1566 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
1569 /* Pattern detected. */
1570 if (dump_enabled_p ())
1571 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
1572 "vect_recog_vector_vector_shift_pattern: detected: ");
1574 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1575 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
1576 pattern_stmt
= gimple_build_assign_with_ops (rhs_code
, var
, oprnd0
, def
);
1578 if (dump_enabled_p ())
1579 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1581 VEC_safe_push (gimple
, heap
, *stmts
, last_stmt
);
1582 return pattern_stmt
;
1585 /* Detect a signed division by a constant that wouldn't be
1586 otherwise vectorized:
1592 where type 'type' is an integral type and N is a constant.
1594 Similarly handle modulo by a constant:
1600 * STMTS: Contains a stmt from which the pattern search begins,
1601 i.e. the division stmt. S1 is replaced by if N is a power
1602 of two constant and type is signed:
1603 S3 y_t = b_t < 0 ? N - 1 : 0;
1605 S1' a_t = x_t >> log2 (N);
1607 S4 is replaced if N is a power of two constant and
1608 type is signed by (where *_T temporaries have unsigned type):
1609 S9 y_T = b_t < 0 ? -1U : 0U;
1610 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1611 S7 z_t = (type) z_T;
1613 S5 x_t = w_t & (N - 1);
1614 S4' a_t = x_t - z_t;
1618 * TYPE_IN: The type of the input arguments to the pattern.
1620 * TYPE_OUT: The type of the output of this pattern.
1622 * Return value: A new stmt that will be used to replace the division
1623 S1 or modulo S4 stmt. */
1626 vect_recog_divmod_pattern (VEC (gimple
, heap
) **stmts
,
1627 tree
*type_in
, tree
*type_out
)
1629 gimple last_stmt
= VEC_pop (gimple
, *stmts
);
1630 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
1631 gimple pattern_stmt
, def_stmt
;
1632 enum tree_code rhs_code
;
1633 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1634 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1635 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1638 int dummy_int
, prec
;
1639 stmt_vec_info def_stmt_vinfo
;
1641 if (!is_gimple_assign (last_stmt
))
1644 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1647 case TRUNC_DIV_EXPR
:
1648 case TRUNC_MOD_EXPR
:
1654 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
1657 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1658 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1659 itype
= TREE_TYPE (oprnd0
);
1660 if (TREE_CODE (oprnd0
) != SSA_NAME
1661 || TREE_CODE (oprnd1
) != INTEGER_CST
1662 || TREE_CODE (itype
) != INTEGER_TYPE
1663 || TYPE_PRECISION (itype
) != GET_MODE_PRECISION (TYPE_MODE (itype
)))
1666 vectype
= get_vectype_for_scalar_type (itype
);
1667 if (vectype
== NULL_TREE
)
1670 /* If the target can handle vectorized division or modulo natively,
1671 don't attempt to optimize this. */
1672 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
1673 if (optab
!= unknown_optab
)
1675 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
1676 int icode
= (int) optab_handler (optab
, vec_mode
);
1677 if (icode
!= CODE_FOR_nothing
)
1681 prec
= TYPE_PRECISION (itype
);
1682 if (integer_pow2p (oprnd1
))
1684 if (TYPE_UNSIGNED (itype
) || tree_int_cst_sgn (oprnd1
) != 1)
1687 /* Pattern detected. */
1688 if (dump_enabled_p ())
1689 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
1690 "vect_recog_divmod_pattern: detected: ");
1692 cond
= build2 (LT_EXPR
, boolean_type_node
, oprnd0
,
1693 build_int_cst (itype
, 0));
1694 if (rhs_code
== TRUNC_DIV_EXPR
)
1696 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
1699 = gimple_build_assign_with_ops (COND_EXPR
, var
, cond
,
1700 fold_build2 (MINUS_EXPR
, itype
,
1702 build_int_cst (itype
,
1704 build_int_cst (itype
, 0));
1705 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
1706 var
= vect_recog_temp_ssa_var (itype
, NULL
);
1708 = gimple_build_assign_with_ops (PLUS_EXPR
, var
, oprnd0
,
1709 gimple_assign_lhs (def_stmt
));
1710 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1712 shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
1714 = gimple_build_assign_with_ops (RSHIFT_EXPR
,
1715 vect_recog_temp_ssa_var (itype
,
1722 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
1723 if (compare_tree_int (oprnd1
, 2) == 0)
1725 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
1727 = gimple_build_assign_with_ops (COND_EXPR
, signmask
, cond
,
1728 build_int_cst (itype
, 1),
1729 build_int_cst (itype
, 0));
1730 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1735 = build_nonstandard_integer_type (prec
, 1);
1736 tree vecutype
= get_vectype_for_scalar_type (utype
);
1738 = build_int_cst (utype
, GET_MODE_BITSIZE (TYPE_MODE (itype
))
1739 - tree_log2 (oprnd1
));
1740 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
1743 = gimple_build_assign_with_ops (COND_EXPR
, var
, cond
,
1744 build_int_cst (utype
, -1),
1745 build_int_cst (utype
, 0));
1747 = new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
1748 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1749 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
1750 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1751 var
= vect_recog_temp_ssa_var (utype
, NULL
);
1753 = gimple_build_assign_with_ops (RSHIFT_EXPR
, var
,
1754 gimple_assign_lhs (def_stmt
),
1757 = new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
1758 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1759 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
1760 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1761 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
1763 = gimple_build_assign_with_ops (NOP_EXPR
, signmask
, var
,
1765 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1768 = gimple_build_assign_with_ops (PLUS_EXPR
,
1769 vect_recog_temp_ssa_var (itype
,
1772 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1774 = gimple_build_assign_with_ops (BIT_AND_EXPR
,
1775 vect_recog_temp_ssa_var (itype
,
1777 gimple_assign_lhs (def_stmt
),
1778 fold_build2 (MINUS_EXPR
, itype
,
1780 build_int_cst (itype
,
1782 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1785 = gimple_build_assign_with_ops (MINUS_EXPR
,
1786 vect_recog_temp_ssa_var (itype
,
1788 gimple_assign_lhs (def_stmt
),
1792 if (dump_enabled_p ())
1793 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
,
1796 VEC_safe_push (gimple
, heap
, *stmts
, last_stmt
);
1799 *type_out
= vectype
;
1800 return pattern_stmt
;
1803 if (!host_integerp (oprnd1
, TYPE_UNSIGNED (itype
))
1804 || integer_zerop (oprnd1
)
1805 || prec
> HOST_BITS_PER_WIDE_INT
)
1808 if (!can_mult_highpart_p (TYPE_MODE (vectype
), TYPE_UNSIGNED (itype
)))
1811 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
1813 if (TYPE_UNSIGNED (itype
))
1815 unsigned HOST_WIDE_INT mh
, ml
;
1816 int pre_shift
, post_shift
;
1817 unsigned HOST_WIDE_INT d
= tree_low_cst (oprnd1
, 1)
1818 & GET_MODE_MASK (TYPE_MODE (itype
));
1819 tree t1
, t2
, t3
, t4
;
1821 if (d
>= ((unsigned HOST_WIDE_INT
) 1 << (prec
- 1)))
1822 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
1825 /* Find a suitable multiplier and right shift count
1826 instead of multiplying with D. */
1827 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
1829 /* If the suggested multiplier is more than SIZE bits, we can do better
1830 for even divisors, using an initial right shift. */
1831 if (mh
!= 0 && (d
& 1) == 0)
1833 pre_shift
= floor_log2 (d
& -d
);
1834 mh
= choose_multiplier (d
>> pre_shift
, prec
, prec
- pre_shift
,
1835 &ml
, &post_shift
, &dummy_int
);
1843 if (post_shift
- 1 >= prec
)
1846 /* t1 = oprnd0 h* ml;
1850 q = t4 >> (post_shift - 1); */
1851 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
1853 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR
, t1
, oprnd0
,
1854 build_int_cst (itype
, ml
));
1855 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1857 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
1859 = gimple_build_assign_with_ops (MINUS_EXPR
, t2
, oprnd0
, t1
);
1860 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1862 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
1864 = gimple_build_assign_with_ops (RSHIFT_EXPR
, t3
, t2
,
1866 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1868 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
1870 = gimple_build_assign_with_ops (PLUS_EXPR
, t4
, t1
, t3
);
1872 if (post_shift
!= 1)
1874 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1876 q
= vect_recog_temp_ssa_var (itype
, NULL
);
1878 = gimple_build_assign_with_ops (RSHIFT_EXPR
, q
, t4
,
1879 build_int_cst (itype
,
1886 pattern_stmt
= def_stmt
;
1891 if (pre_shift
>= prec
|| post_shift
>= prec
)
1894 /* t1 = oprnd0 >> pre_shift;
1896 q = t2 >> post_shift; */
1899 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
1901 = gimple_build_assign_with_ops (RSHIFT_EXPR
, t1
, oprnd0
,
1902 build_int_cst (NULL
,
1904 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1909 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
1911 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR
, t2
, t1
,
1912 build_int_cst (itype
, ml
));
1916 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1918 q
= vect_recog_temp_ssa_var (itype
, NULL
);
1920 = gimple_build_assign_with_ops (RSHIFT_EXPR
, q
, t2
,
1921 build_int_cst (itype
,
1927 pattern_stmt
= def_stmt
;
1932 unsigned HOST_WIDE_INT ml
;
1934 HOST_WIDE_INT d
= tree_low_cst (oprnd1
, 0);
1935 unsigned HOST_WIDE_INT abs_d
;
1937 tree t1
, t2
, t3
, t4
;
1939 /* Give up for -1. */
1943 /* Since d might be INT_MIN, we have to cast to
1944 unsigned HOST_WIDE_INT before negating to avoid
1945 undefined signed overflow. */
1947 ? (unsigned HOST_WIDE_INT
) d
1948 : - (unsigned HOST_WIDE_INT
) d
);
1950 /* n rem d = n rem -d */
1951 if (rhs_code
== TRUNC_MOD_EXPR
&& d
< 0)
1954 oprnd1
= build_int_cst (itype
, abs_d
);
1956 else if (HOST_BITS_PER_WIDE_INT
>= prec
1957 && abs_d
== (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
1958 /* This case is not handled correctly below. */
1961 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
, &post_shift
, &dummy_int
);
1962 if (ml
>= (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
1965 ml
|= (~(unsigned HOST_WIDE_INT
) 0) << (prec
- 1);
1967 if (post_shift
>= prec
)
1970 /* t1 = oprnd1 h* ml; */
1971 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
1973 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR
, t1
, oprnd0
,
1974 build_int_cst (itype
, ml
));
1975 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1979 /* t2 = t1 + oprnd0; */
1980 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
1982 = gimple_build_assign_with_ops (PLUS_EXPR
, t2
, t1
, oprnd0
);
1983 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1990 /* t3 = t2 >> post_shift; */
1991 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
1993 = gimple_build_assign_with_ops (RSHIFT_EXPR
, t3
, t2
,
1994 build_int_cst (itype
, post_shift
));
1995 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2000 /* t4 = oprnd0 >> (prec - 1); */
2001 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2003 = gimple_build_assign_with_ops (RSHIFT_EXPR
, t4
, oprnd0
,
2004 build_int_cst (itype
, prec
- 1));
2005 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2007 /* q = t3 - t4; or q = t4 - t3; */
2008 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2010 = gimple_build_assign_with_ops (MINUS_EXPR
, q
, d
< 0 ? t4
: t3
,
2014 if (rhs_code
== TRUNC_MOD_EXPR
)
2018 /* We divided. Now finish by:
2021 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2023 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2025 = gimple_build_assign_with_ops (MULT_EXPR
, t1
, q
, oprnd1
);
2026 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2028 r
= vect_recog_temp_ssa_var (itype
, NULL
);
2030 = gimple_build_assign_with_ops (MINUS_EXPR
, r
, oprnd0
, t1
);
2033 /* Pattern detected. */
2034 if (dump_enabled_p ())
2036 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
2037 "vect_recog_divmod_pattern: detected: ");
2038 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS
, TDF_SLIM
, pattern_stmt
, 0);
2041 VEC_safe_push (gimple
, heap
, *stmts
, last_stmt
);
2044 *type_out
= vectype
;
2045 return pattern_stmt
;
2048 /* Function vect_recog_mixed_size_cond_pattern
2050 Try to find the following pattern:
2055 S1 a_T = x_t CMP y_t ? b_T : c_T;
2057 where type 'TYPE' is an integral type which has different size
2058 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2059 than 'type', the constants need to fit into an integer type
2060 with the same width as 'type') or results of conversion from 'type'.
2064 * LAST_STMT: A stmt from which the pattern search begins.
2068 * TYPE_IN: The type of the input arguments to the pattern.
2070 * TYPE_OUT: The type of the output of this pattern.
2072 * Return value: A new stmt that will be used to replace the pattern.
2073 Additionally a def_stmt is added.
2075 a_it = x_t CMP y_t ? b_it : c_it;
2076 a_T = (TYPE) a_it; */
2079 vect_recog_mixed_size_cond_pattern (VEC (gimple
, heap
) **stmts
, tree
*type_in
,
2082 gimple last_stmt
= VEC_index (gimple
, *stmts
, 0);
2083 tree cond_expr
, then_clause
, else_clause
;
2084 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
), def_stmt_info
;
2085 tree type
, vectype
, comp_vectype
, itype
= NULL_TREE
, vecitype
;
2086 enum machine_mode cmpmode
;
2087 gimple pattern_stmt
, def_stmt
;
2088 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2089 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2090 tree orig_type0
= NULL_TREE
, orig_type1
= NULL_TREE
;
2091 gimple def_stmt0
= NULL
, def_stmt1
= NULL
;
2093 tree comp_scalar_type
;
2095 if (!is_gimple_assign (last_stmt
)
2096 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
2097 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
2100 cond_expr
= gimple_assign_rhs1 (last_stmt
);
2101 then_clause
= gimple_assign_rhs2 (last_stmt
);
2102 else_clause
= gimple_assign_rhs3 (last_stmt
);
2104 if (!COMPARISON_CLASS_P (cond_expr
))
2107 comp_scalar_type
= TREE_TYPE (TREE_OPERAND (cond_expr
, 0));
2108 comp_vectype
= get_vectype_for_scalar_type (comp_scalar_type
);
2109 if (comp_vectype
== NULL_TREE
)
2112 type
= gimple_expr_type (last_stmt
);
2113 if (types_compatible_p (type
, comp_scalar_type
)
2114 || ((TREE_CODE (then_clause
) != INTEGER_CST
2115 || TREE_CODE (else_clause
) != INTEGER_CST
)
2116 && !INTEGRAL_TYPE_P (comp_scalar_type
))
2117 || !INTEGRAL_TYPE_P (type
))
2120 if ((TREE_CODE (then_clause
) != INTEGER_CST
2121 && !type_conversion_p (then_clause
, last_stmt
, false, &orig_type0
,
2122 &def_stmt0
, &promotion
))
2123 || (TREE_CODE (else_clause
) != INTEGER_CST
2124 && !type_conversion_p (else_clause
, last_stmt
, false, &orig_type1
,
2125 &def_stmt1
, &promotion
)))
2128 if (orig_type0
&& orig_type1
2129 && !types_compatible_p (orig_type0
, orig_type1
))
2134 if (!types_compatible_p (orig_type0
, comp_scalar_type
))
2136 then_clause
= gimple_assign_rhs1 (def_stmt0
);
2142 if (!types_compatible_p (orig_type1
, comp_scalar_type
))
2144 else_clause
= gimple_assign_rhs1 (def_stmt1
);
2148 cmpmode
= GET_MODE_INNER (TYPE_MODE (comp_vectype
));
2150 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) == GET_MODE_BITSIZE (cmpmode
))
2153 vectype
= get_vectype_for_scalar_type (type
);
2154 if (vectype
== NULL_TREE
)
2157 if (expand_vec_cond_expr_p (vectype
, comp_vectype
))
2160 if (itype
== NULL_TREE
)
2161 itype
= build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode
),
2162 TYPE_UNSIGNED (type
));
2164 if (itype
== NULL_TREE
2165 || GET_MODE_BITSIZE (TYPE_MODE (itype
)) != GET_MODE_BITSIZE (cmpmode
))
2168 vecitype
= get_vectype_for_scalar_type (itype
);
2169 if (vecitype
== NULL_TREE
)
2172 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
))
2175 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) > GET_MODE_BITSIZE (cmpmode
))
2177 if ((TREE_CODE (then_clause
) == INTEGER_CST
2178 && !int_fits_type_p (then_clause
, itype
))
2179 || (TREE_CODE (else_clause
) == INTEGER_CST
2180 && !int_fits_type_p (else_clause
, itype
)))
2185 = gimple_build_assign_with_ops (COND_EXPR
,
2186 vect_recog_temp_ssa_var (itype
, NULL
),
2187 unshare_expr (cond_expr
),
2188 fold_convert (itype
, then_clause
),
2189 fold_convert (itype
, else_clause
));
2191 = gimple_build_assign_with_ops (NOP_EXPR
,
2192 vect_recog_temp_ssa_var (type
, NULL
),
2193 gimple_assign_lhs (def_stmt
), NULL_TREE
);
2195 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2196 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2197 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
2198 STMT_VINFO_VECTYPE (def_stmt_info
) = vecitype
;
2199 *type_in
= vecitype
;
2200 *type_out
= vectype
;
2202 if (dump_enabled_p ())
2203 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
2204 "vect_recog_mixed_size_cond_pattern: detected: ");
2206 return pattern_stmt
;
2210 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2211 true if bool VAR can be optimized that way. */
2214 check_bool_pattern (tree var
, loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
2217 enum vect_def_type dt
;
2219 enum tree_code rhs_code
;
2221 if (!vect_is_simple_use (var
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
, &def
,
2225 if (dt
!= vect_internal_def
)
2228 if (!is_gimple_assign (def_stmt
))
2231 if (!has_single_use (def
))
2234 rhs1
= gimple_assign_rhs1 (def_stmt
);
2235 rhs_code
= gimple_assign_rhs_code (def_stmt
);
2239 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2242 if ((TYPE_PRECISION (TREE_TYPE (rhs1
)) != 1
2243 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
2244 && TREE_CODE (TREE_TYPE (rhs1
)) != BOOLEAN_TYPE
)
2246 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2249 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2254 if (!check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
))
2256 return check_bool_pattern (gimple_assign_rhs2 (def_stmt
), loop_vinfo
,
2260 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
2262 tree vecitype
, comp_vectype
;
2264 /* If the comparison can throw, then is_gimple_condexpr will be
2265 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2266 if (stmt_could_throw_p (def_stmt
))
2269 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
2270 if (comp_vectype
== NULL_TREE
)
2273 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
2275 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
2277 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
2278 vecitype
= get_vectype_for_scalar_type (itype
);
2279 if (vecitype
== NULL_TREE
)
2283 vecitype
= comp_vectype
;
2284 return expand_vec_cond_expr_p (vecitype
, comp_vectype
);
2291 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2292 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2293 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2296 adjust_bool_pattern_cast (tree type
, tree var
)
2298 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (SSA_NAME_DEF_STMT (var
));
2299 gimple cast_stmt
, pattern_stmt
;
2301 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
));
2302 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
2303 new_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2305 = gimple_build_assign_with_ops (NOP_EXPR
,
2306 vect_recog_temp_ssa_var (type
, NULL
),
2307 gimple_assign_lhs (pattern_stmt
),
2309 STMT_VINFO_RELATED_STMT (stmt_vinfo
) = cast_stmt
;
2310 return gimple_assign_lhs (cast_stmt
);
2314 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2315 recursively. VAR is an SSA_NAME that should be transformed from bool
2316 to a wider integer type, OUT_TYPE is the desired final integer type of
2317 the whole pattern, TRUEVAL should be NULL unless optimizing
2318 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2319 in the then_clause, STMTS is where statements with added pattern stmts
2320 should be pushed to. */
2323 adjust_bool_pattern (tree var
, tree out_type
, tree trueval
,
2324 VEC (gimple
, heap
) **stmts
)
2326 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2327 enum tree_code rhs_code
, def_rhs_code
;
2328 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
2330 gimple pattern_stmt
, def_stmt
;
2332 rhs1
= gimple_assign_rhs1 (stmt
);
2333 rhs2
= gimple_assign_rhs2 (stmt
);
2334 rhs_code
= gimple_assign_rhs_code (stmt
);
2335 loc
= gimple_location (stmt
);
2340 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2341 itype
= TREE_TYPE (irhs1
);
2343 = gimple_build_assign_with_ops (SSA_NAME
,
2344 vect_recog_temp_ssa_var (itype
, NULL
),
2349 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2350 itype
= TREE_TYPE (irhs1
);
2352 = gimple_build_assign_with_ops (BIT_XOR_EXPR
,
2353 vect_recog_temp_ssa_var (itype
, NULL
),
2354 irhs1
, build_int_cst (itype
, 1));
2358 /* Try to optimize x = y & (a < b ? 1 : 0); into
2359 x = (a < b ? y : 0);
2365 S1 a_b = x1 CMP1 y1;
2366 S2 b_b = x2 CMP2 y2;
2368 S4 d_T = (TYPE) c_b;
2370 we would normally emit:
2372 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2373 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2374 S3' c_T = a_T & b_T;
2377 but we can save one stmt by using the
2378 result of one of the COND_EXPRs in the other COND_EXPR and leave
2379 BIT_AND_EXPR stmt out:
2381 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2382 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2385 At least when VEC_COND_EXPR is implemented using masks
2386 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2387 computes the comparison masks and ands it, in one case with
2388 all ones vector, in the other case with a vector register.
2389 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2390 often more expensive. */
2391 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
2392 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
2393 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
2395 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
2396 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2397 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
2398 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
2401 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
2402 irhs2
= adjust_bool_pattern (rhs2
, out_type
, irhs1
, stmts
);
2403 tstmt
= VEC_pop (gimple
, *stmts
);
2404 gcc_assert (tstmt
== def_stmt
);
2405 VEC_quick_push (gimple
, *stmts
, stmt
);
2406 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
2407 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
2408 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
2409 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
2413 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
2416 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
2417 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
2418 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
2420 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
2421 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
2422 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
2423 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
2426 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
2427 irhs1
= adjust_bool_pattern (rhs1
, out_type
, irhs2
, stmts
);
2428 tstmt
= VEC_pop (gimple
, *stmts
);
2429 gcc_assert (tstmt
== def_stmt
);
2430 VEC_quick_push (gimple
, *stmts
, stmt
);
2431 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
2432 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
2433 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
2434 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
2438 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2444 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2445 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
2447 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
2448 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
2450 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
2451 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
2452 int out_prec
= TYPE_PRECISION (out_type
);
2453 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
2454 irhs2
= adjust_bool_pattern_cast (TREE_TYPE (irhs1
), rhs2
);
2455 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
2456 irhs1
= adjust_bool_pattern_cast (TREE_TYPE (irhs2
), rhs1
);
2459 irhs1
= adjust_bool_pattern_cast (out_type
, rhs1
);
2460 irhs2
= adjust_bool_pattern_cast (out_type
, rhs2
);
2463 itype
= TREE_TYPE (irhs1
);
2465 = gimple_build_assign_with_ops (rhs_code
,
2466 vect_recog_temp_ssa_var (itype
, NULL
),
2471 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
2472 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
2473 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
))
2474 || (TYPE_PRECISION (TREE_TYPE (rhs1
))
2475 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1
)))))
2477 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
2479 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
2482 itype
= TREE_TYPE (rhs1
);
2483 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
2484 if (trueval
== NULL_TREE
)
2485 trueval
= build_int_cst (itype
, 1);
2487 gcc_checking_assert (useless_type_conversion_p (itype
,
2488 TREE_TYPE (trueval
)));
2490 = gimple_build_assign_with_ops (COND_EXPR
,
2491 vect_recog_temp_ssa_var (itype
, NULL
),
2493 build_int_cst (itype
, 0));
2497 VEC_safe_push (gimple
, heap
, *stmts
, stmt
);
2498 gimple_set_location (pattern_stmt
, loc
);
2499 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
2500 return gimple_assign_lhs (pattern_stmt
);
2504 /* Function vect_recog_bool_pattern
2506 Try to find pattern like following:
2508 bool a_b, b_b, c_b, d_b, e_b;
2511 S1 a_b = x1 CMP1 y1;
2512 S2 b_b = x2 CMP2 y2;
2514 S4 d_b = x3 CMP3 y3;
2516 S6 f_T = (TYPE) e_b;
2518 where type 'TYPE' is an integral type.
2522 * LAST_STMT: A stmt at the end from which the pattern
2523 search begins, i.e. cast of a bool to
2528 * TYPE_IN: The type of the input arguments to the pattern.
2530 * TYPE_OUT: The type of the output of this pattern.
2532 * Return value: A new stmt that will be used to replace the pattern.
2534 Assuming size of TYPE is the same as size of all comparisons
2535 (otherwise some casts would be added where needed), the above
2536 sequence we create related pattern stmts:
2537 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2538 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2539 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2540 S5' e_T = c_T | d_T;
2543 Instead of the above S3' we could emit:
2544 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2545 S3' c_T = a_T | b_T;
2546 but the above is more efficient. */
2549 vect_recog_bool_pattern (VEC (gimple
, heap
) **stmts
, tree
*type_in
,
2552 gimple last_stmt
= VEC_pop (gimple
, *stmts
);
2553 enum tree_code rhs_code
;
2554 tree var
, lhs
, rhs
, vectype
;
2555 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2556 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2557 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2558 gimple pattern_stmt
;
2560 if (!is_gimple_assign (last_stmt
))
2563 var
= gimple_assign_rhs1 (last_stmt
);
2564 lhs
= gimple_assign_lhs (last_stmt
);
2566 if ((TYPE_PRECISION (TREE_TYPE (var
)) != 1
2567 || !TYPE_UNSIGNED (TREE_TYPE (var
)))
2568 && TREE_CODE (TREE_TYPE (var
)) != BOOLEAN_TYPE
)
2571 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2572 if (CONVERT_EXPR_CODE_P (rhs_code
))
2574 if (TREE_CODE (TREE_TYPE (lhs
)) != INTEGER_TYPE
2575 || TYPE_PRECISION (TREE_TYPE (lhs
)) == 1)
2577 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
2578 if (vectype
== NULL_TREE
)
2581 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
2584 rhs
= adjust_bool_pattern (var
, TREE_TYPE (lhs
), NULL_TREE
, stmts
);
2585 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
2586 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
2588 = gimple_build_assign_with_ops (SSA_NAME
, lhs
, rhs
, NULL_TREE
);
2591 = gimple_build_assign_with_ops (NOP_EXPR
, lhs
, rhs
, NULL_TREE
);
2592 *type_out
= vectype
;
2594 VEC_safe_push (gimple
, heap
, *stmts
, last_stmt
);
2595 if (dump_enabled_p ())
2596 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
2597 "vect_recog_bool_pattern: detected: ");
2599 return pattern_stmt
;
2601 else if (rhs_code
== SSA_NAME
2602 && STMT_VINFO_DATA_REF (stmt_vinfo
))
2604 stmt_vec_info pattern_stmt_info
;
2605 vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
2606 gcc_assert (vectype
!= NULL_TREE
);
2607 if (!VECTOR_MODE_P (TYPE_MODE (vectype
)))
2609 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
2612 rhs
= adjust_bool_pattern (var
, TREE_TYPE (vectype
), NULL_TREE
, stmts
);
2613 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
2614 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
2616 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
2618 = gimple_build_assign_with_ops (NOP_EXPR
, rhs2
, rhs
, NULL_TREE
);
2619 new_pattern_def_seq (stmt_vinfo
, cast_stmt
);
2623 = gimple_build_assign_with_ops (SSA_NAME
, lhs
, rhs
, NULL_TREE
);
2624 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
,
2626 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
2627 STMT_VINFO_DATA_REF (pattern_stmt_info
)
2628 = STMT_VINFO_DATA_REF (stmt_vinfo
);
2629 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info
)
2630 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo
);
2631 STMT_VINFO_DR_INIT (pattern_stmt_info
) = STMT_VINFO_DR_INIT (stmt_vinfo
);
2632 STMT_VINFO_DR_OFFSET (pattern_stmt_info
)
2633 = STMT_VINFO_DR_OFFSET (stmt_vinfo
);
2634 STMT_VINFO_DR_STEP (pattern_stmt_info
) = STMT_VINFO_DR_STEP (stmt_vinfo
);
2635 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info
)
2636 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo
);
2637 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo
)) = pattern_stmt
;
2638 *type_out
= vectype
;
2640 VEC_safe_push (gimple
, heap
, *stmts
, last_stmt
);
2641 if (dump_enabled_p ())
2642 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
2643 "vect_recog_bool_pattern: detected: ");
2644 return pattern_stmt
;
2651 /* Mark statements that are involved in a pattern. */
2654 vect_mark_pattern_stmts (gimple orig_stmt
, gimple pattern_stmt
,
2655 tree pattern_vectype
)
2657 stmt_vec_info pattern_stmt_info
, def_stmt_info
;
2658 stmt_vec_info orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
2659 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (orig_stmt_info
);
2660 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (orig_stmt_info
);
2663 pattern_stmt_info
= vinfo_for_stmt (pattern_stmt
);
2664 if (pattern_stmt_info
== NULL
)
2666 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
,
2668 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
2670 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt
));
2672 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt
;
2673 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
2674 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
2675 STMT_VINFO_VECTYPE (pattern_stmt_info
) = pattern_vectype
;
2676 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
2677 STMT_VINFO_RELATED_STMT (orig_stmt_info
) = pattern_stmt
;
2678 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
)
2679 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
2680 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
))
2682 gimple_stmt_iterator si
;
2683 for (si
= gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
));
2684 !gsi_end_p (si
); gsi_next (&si
))
2686 def_stmt
= gsi_stmt (si
);
2687 def_stmt_info
= vinfo_for_stmt (def_stmt
);
2688 if (def_stmt_info
== NULL
)
2690 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
,
2692 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
2694 gimple_set_bb (def_stmt
, gimple_bb (orig_stmt
));
2695 STMT_VINFO_RELATED_STMT (def_stmt_info
) = orig_stmt
;
2696 STMT_VINFO_DEF_TYPE (def_stmt_info
)
2697 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
2698 if (STMT_VINFO_VECTYPE (def_stmt_info
) == NULL_TREE
)
2699 STMT_VINFO_VECTYPE (def_stmt_info
) = pattern_vectype
;
2704 /* Function vect_pattern_recog_1
2707 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
2708 computation pattern.
2709 STMT: A stmt from which the pattern search should start.
2711 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
2712 expression that computes the same functionality and can be used to
2713 replace the sequence of stmts that are involved in the pattern.
2716 This function checks if the expression returned by PATTERN_RECOG_FUNC is
2717 supported in vector form by the target. We use 'TYPE_IN' to obtain the
2718 relevant vector type. If 'TYPE_IN' is already a vector type, then this
2719 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
2720 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
2721 to the available target pattern.
2723 This function also does some bookkeeping, as explained in the documentation
2724 for vect_recog_pattern. */
2727 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func
,
2728 gimple_stmt_iterator si
,
2729 VEC (gimple
, heap
) **stmts_to_replace
)
2731 gimple stmt
= gsi_stmt (si
), pattern_stmt
;
2732 stmt_vec_info stmt_info
;
2733 loop_vec_info loop_vinfo
;
2734 tree pattern_vectype
;
2735 tree type_in
, type_out
;
2736 enum tree_code code
;
2740 VEC_truncate (gimple
, *stmts_to_replace
, 0);
2741 VEC_quick_push (gimple
, *stmts_to_replace
, stmt
);
2742 pattern_stmt
= (* vect_recog_func
) (stmts_to_replace
, &type_in
, &type_out
);
2746 stmt
= VEC_last (gimple
, *stmts_to_replace
);
2747 stmt_info
= vinfo_for_stmt (stmt
);
2748 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2750 if (VECTOR_MODE_P (TYPE_MODE (type_in
)))
2752 /* No need to check target support (already checked by the pattern
2753 recognition function). */
2754 pattern_vectype
= type_out
? type_out
: type_in
;
2758 enum machine_mode vec_mode
;
2759 enum insn_code icode
;
2762 /* Check target support */
2763 type_in
= get_vectype_for_scalar_type (type_in
);
2767 type_out
= get_vectype_for_scalar_type (type_out
);
2772 pattern_vectype
= type_out
;
2774 if (is_gimple_assign (pattern_stmt
))
2775 code
= gimple_assign_rhs_code (pattern_stmt
);
2778 gcc_assert (is_gimple_call (pattern_stmt
));
2782 optab
= optab_for_tree_code (code
, type_in
, optab_default
);
2783 vec_mode
= TYPE_MODE (type_in
);
2785 || (icode
= optab_handler (optab
, vec_mode
)) == CODE_FOR_nothing
2786 || (insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (type_out
)))
2790 /* Found a vectorizable pattern. */
2791 if (dump_enabled_p ())
2793 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
2794 "pattern recognized: ");
2795 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS
, TDF_SLIM
, pattern_stmt
, 0);
2798 /* Mark the stmts that are involved in the pattern. */
2799 vect_mark_pattern_stmts (stmt
, pattern_stmt
, pattern_vectype
);
2801 /* Patterns cannot be vectorized using SLP, because they change the order of
2804 FOR_EACH_VEC_ELT (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
, next
)
2806 VEC_ordered_remove (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
);
2808 /* It is possible that additional pattern stmts are created and inserted in
2809 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
2810 relevant statements. */
2811 for (i
= 0; VEC_iterate (gimple
, *stmts_to_replace
, i
, stmt
)
2812 && (unsigned) i
< (VEC_length (gimple
, *stmts_to_replace
) - 1);
2815 stmt_info
= vinfo_for_stmt (stmt
);
2816 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2817 if (dump_enabled_p ())
2819 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
2820 "additional pattern stmt: ");
2821 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS
, TDF_SLIM
, pattern_stmt
, 0);
2824 vect_mark_pattern_stmts (stmt
, pattern_stmt
, NULL_TREE
);
2829 /* Function vect_pattern_recog
2832 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
2835 Output - for each computation idiom that is detected we create a new stmt
2836 that provides the same functionality and that can be vectorized. We
2837 also record some information in the struct_stmt_info of the relevant
2838 stmts, as explained below:
2840 At the entry to this function we have the following stmts, with the
2841 following initial value in the STMT_VINFO fields:
2843 stmt in_pattern_p related_stmt vec_stmt
2844 S1: a_i = .... - - -
2845 S2: a_2 = ..use(a_i).. - - -
2846 S3: a_1 = ..use(a_2).. - - -
2847 S4: a_0 = ..use(a_1).. - - -
2848 S5: ... = ..use(a_0).. - - -
2850 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
2851 represented by a single stmt. We then:
2852 - create a new stmt S6 equivalent to the pattern (the stmt is not
2853 inserted into the code)
2854 - fill in the STMT_VINFO fields as follows:
2856 in_pattern_p related_stmt vec_stmt
2857 S1: a_i = .... - - -
2858 S2: a_2 = ..use(a_i).. - - -
2859 S3: a_1 = ..use(a_2).. - - -
2860 S4: a_0 = ..use(a_1).. true S6 -
2861 '---> S6: a_new = .... - S4 -
2862 S5: ... = ..use(a_0).. - - -
2864 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
2865 to each other through the RELATED_STMT field).
2867 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
2868 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
2869 remain irrelevant unless used by stmts other than S4.
2871 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
2872 (because they are marked as irrelevant). It will vectorize S6, and record
2873 a pointer to the new vector stmt VS6 from S6 (as usual).
2874 S4 will be skipped, and S5 will be vectorized as usual:
2876 in_pattern_p related_stmt vec_stmt
2877 S1: a_i = .... - - -
2878 S2: a_2 = ..use(a_i).. - - -
2879 S3: a_1 = ..use(a_2).. - - -
2880 > VS6: va_new = .... - - -
2881 S4: a_0 = ..use(a_1).. true S6 VS6
2882 '---> S6: a_new = .... - S4 VS6
2883 > VS5: ... = ..vuse(va_new).. - - -
2884 S5: ... = ..use(a_0).. - - -
2886 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
2887 elsewhere), and we'll end up with:
2890 VS5: ... = ..vuse(va_new)..
2892 In case of more than one pattern statements, e.g., widen-mult with
2896 S2 a_T = (TYPE) a_t;
2897 '--> S3: a_it = (interm_type) a_t;
2898 S4 prod_T = a_T * CONST;
2899 '--> S5: prod_T' = a_it w* CONST;
2901 there may be other users of a_T outside the pattern. In that case S2 will
2902 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
2903 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
2904 be recorded in S3. */
2907 vect_pattern_recog (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
2912 gimple_stmt_iterator si
;
2914 vect_recog_func_ptr vect_recog_func
;
2915 VEC (gimple
, heap
) *stmts_to_replace
= VEC_alloc (gimple
, heap
, 1);
2918 if (dump_enabled_p ())
2919 dump_printf_loc (MSG_NOTE
, vect_location
,
2920 "=== vect_pattern_recog ===");
2924 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2925 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
2926 nbbs
= loop
->num_nodes
;
2930 bbs
= &BB_VINFO_BB (bb_vinfo
);
2934 /* Scan through the loop stmts, applying the pattern recognition
2935 functions starting at each stmt visited: */
2936 for (i
= 0; i
< nbbs
; i
++)
2938 basic_block bb
= bbs
[i
];
2939 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
2941 if (bb_vinfo
&& (stmt
= gsi_stmt (si
))
2942 && vinfo_for_stmt (stmt
)
2943 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
2946 /* Scan over all generic vect_recog_xxx_pattern functions. */
2947 for (j
= 0; j
< NUM_PATTERNS
; j
++)
2949 vect_recog_func
= vect_vect_recog_func_ptrs
[j
];
2950 vect_pattern_recog_1 (vect_recog_func
, si
,
2956 VEC_free (gimple
, heap
, stmts_to_replace
);