1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
28 #include "basic-block.h"
29 #include "gimple-pretty-print.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
36 #include "tree-data-ref.h"
37 #include "tree-vectorizer.h"
39 #include "diagnostic-core.h"
41 /* Pattern recognition functions */
42 static gimple
vect_recog_widen_sum_pattern (gimple
*, tree
*, tree
*);
43 static gimple
vect_recog_widen_mult_pattern (gimple
*, tree
*, tree
*);
44 static gimple
vect_recog_dot_prod_pattern (gimple
*, tree
*, tree
*);
45 static gimple
vect_recog_pow_pattern (gimple
*, tree
*, tree
*);
46 static vect_recog_func_ptr vect_vect_recog_func_ptrs
[NUM_PATTERNS
] = {
47 vect_recog_widen_mult_pattern
,
48 vect_recog_widen_sum_pattern
,
49 vect_recog_dot_prod_pattern
,
50 vect_recog_pow_pattern
};
53 /* Function widened_name_p
55 Check whether NAME, an ssa-name used in USE_STMT,
56 is a result of a type-promotion, such that:
57 DEF_STMT: NAME = NOP (name0)
58 where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
59 If CHECK_SIGN is TRUE, check that either both types are signed or both are
63 widened_name_p (tree name
, gimple use_stmt
, tree
*half_type
, gimple
*def_stmt
,
68 loop_vec_info loop_vinfo
;
69 stmt_vec_info stmt_vinfo
;
70 tree type
= TREE_TYPE (name
);
72 enum vect_def_type dt
;
75 stmt_vinfo
= vinfo_for_stmt (use_stmt
);
76 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
78 if (!vect_is_simple_use (name
, loop_vinfo
, NULL
, def_stmt
, &def
, &dt
))
81 if (dt
!= vect_internal_def
82 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
88 if (!is_gimple_assign (*def_stmt
))
91 if (gimple_assign_rhs_code (*def_stmt
) != NOP_EXPR
)
94 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
96 *half_type
= TREE_TYPE (oprnd0
);
97 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*half_type
)
98 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*half_type
)) && check_sign
)
99 || (TYPE_PRECISION (type
) < (TYPE_PRECISION (*half_type
) * 2)))
102 if (!vect_is_simple_use (oprnd0
, loop_vinfo
, NULL
, &dummy_gimple
, &dummy
,
109 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
110 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
113 vect_recog_temp_ssa_var (tree type
, gimple stmt
)
115 tree var
= create_tmp_var (type
, "patt");
117 add_referenced_var (var
);
118 var
= make_ssa_name (var
, stmt
);
122 /* Function vect_recog_dot_prod_pattern
124 Try to find the following pattern:
130 sum_0 = phi <init, sum_1>
133 S3 x_T = (TYPE1) x_t;
134 S4 y_T = (TYPE1) y_t;
136 [S6 prod = (TYPE2) prod; #optional]
137 S7 sum_1 = prod + sum_0;
139 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
140 same size of 'TYPE1' or bigger. This is a special case of a reduction
145 * LAST_STMT: A stmt from which the pattern search begins. In the example,
146 when this function is called with S7, the pattern {S3,S4,S5,S6,S7} will be
151 * TYPE_IN: The type of the input arguments to the pattern.
153 * TYPE_OUT: The type of the output of this pattern.
155 * Return value: A new stmt that will be used to replace the sequence of
156 stmts that constitute the pattern. In this case it will be:
157 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
159 Note: The dot-prod idiom is a widening reduction pattern that is
160 vectorized without preserving all the intermediate results. It
161 produces only N/2 (widened) results (by summing up pairs of
162 intermediate results) rather than all N results. Therefore, we
163 cannot allow this pattern when we want to get all the results and in
164 the correct order (as is the case when this computation is in an
165 inner-loop nested in an outer-loop that us being vectorized). */
168 vect_recog_dot_prod_pattern (gimple
*last_stmt
, tree
*type_in
, tree
*type_out
)
172 tree oprnd00
, oprnd01
;
173 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (*last_stmt
);
174 tree type
, half_type
;
177 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
178 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
181 if (!is_gimple_assign (*last_stmt
))
184 type
= gimple_expr_type (*last_stmt
);
186 /* Look for the following pattern
190 DDPROD = (TYPE2) DPROD;
191 sum_1 = DDPROD + sum_0;
193 - DX is double the size of X
194 - DY is double the size of Y
195 - DX, DY, DPROD all have the same type
196 - sum is the same size of DPROD or bigger
197 - sum has been recognized as a reduction variable.
199 This is equivalent to:
200 DPROD = X w* Y; #widen mult
201 sum_1 = DPROD w+ sum_0; #widen summation
203 DPROD = X w* Y; #widen mult
204 sum_1 = DPROD + sum_0; #summation
207 /* Starting from LAST_STMT, follow the defs of its uses in search
208 of the above pattern. */
210 if (gimple_assign_rhs_code (*last_stmt
) != PLUS_EXPR
)
213 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
215 /* Has been detected as widening-summation? */
217 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
218 type
= gimple_expr_type (stmt
);
219 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
221 oprnd0
= gimple_assign_rhs1 (stmt
);
222 oprnd1
= gimple_assign_rhs2 (stmt
);
223 half_type
= TREE_TYPE (oprnd0
);
229 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
)
231 oprnd0
= gimple_assign_rhs1 (*last_stmt
);
232 oprnd1
= gimple_assign_rhs2 (*last_stmt
);
233 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
234 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
238 if (widened_name_p (oprnd0
, stmt
, &half_type
, &def_stmt
, true))
241 oprnd0
= gimple_assign_rhs1 (stmt
);
247 /* So far so good. Since *last_stmt was detected as a (summation) reduction,
248 we know that oprnd1 is the reduction variable (defined by a loop-header
249 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
250 Left to check that oprnd0 is defined by a (widen_)mult_expr */
252 prod_type
= half_type
;
253 stmt
= SSA_NAME_DEF_STMT (oprnd0
);
255 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
256 if (!gimple_bb (stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
259 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
260 inside the loop (in case we are analyzing an outer-loop). */
261 if (!is_gimple_assign (stmt
))
263 stmt_vinfo
= vinfo_for_stmt (stmt
);
264 gcc_assert (stmt_vinfo
);
265 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
267 if (gimple_assign_rhs_code (stmt
) != MULT_EXPR
)
269 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
271 /* Has been detected as a widening multiplication? */
273 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
274 if (gimple_assign_rhs_code (stmt
) != WIDEN_MULT_EXPR
)
276 stmt_vinfo
= vinfo_for_stmt (stmt
);
277 gcc_assert (stmt_vinfo
);
278 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_internal_def
);
279 oprnd00
= gimple_assign_rhs1 (stmt
);
280 oprnd01
= gimple_assign_rhs2 (stmt
);
284 tree half_type0
, half_type1
;
288 oprnd0
= gimple_assign_rhs1 (stmt
);
289 oprnd1
= gimple_assign_rhs2 (stmt
);
290 if (!types_compatible_p (TREE_TYPE (oprnd0
), prod_type
)
291 || !types_compatible_p (TREE_TYPE (oprnd1
), prod_type
))
293 if (!widened_name_p (oprnd0
, stmt
, &half_type0
, &def_stmt
, true))
295 oprnd00
= gimple_assign_rhs1 (def_stmt
);
296 if (!widened_name_p (oprnd1
, stmt
, &half_type1
, &def_stmt
, true))
298 oprnd01
= gimple_assign_rhs1 (def_stmt
);
299 if (!types_compatible_p (half_type0
, half_type1
))
301 if (TYPE_PRECISION (prod_type
) != TYPE_PRECISION (half_type0
) * 2)
305 half_type
= TREE_TYPE (oprnd00
);
306 *type_in
= half_type
;
309 /* Pattern detected. Create a stmt to be used to replace the pattern: */
310 var
= vect_recog_temp_ssa_var (type
, NULL
);
311 pattern_stmt
= gimple_build_assign_with_ops3 (DOT_PROD_EXPR
, var
,
312 oprnd00
, oprnd01
, oprnd1
);
314 if (vect_print_dump_info (REPORT_DETAILS
))
316 fprintf (vect_dump
, "vect_recog_dot_prod_pattern: detected: ");
317 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
320 /* We don't allow changing the order of the computation in the inner-loop
321 when doing outer-loop vectorization. */
322 gcc_assert (!nested_in_vect_loop_p (loop
, *last_stmt
));
327 /* Function vect_recog_widen_mult_pattern
329 Try to find the following pattern:
332 TYPE a_T, b_T, prod_T;
338 S5 prod_T = a_T * b_T;
340 where type 'TYPE' is at least double the size of type 'type'.
342 Also detect unsgigned cases:
344 unsigned type a_t, b_t;
345 unsigned TYPE u_prod_T;
346 TYPE a_T, b_T, prod_T;
352 S5 prod_T = a_T * b_T;
353 S6 u_prod_T = (unsigned TYPE) prod_T;
355 and multiplication by constants:
362 S5 prod_T = a_T * CONST;
366 * LAST_STMT: A stmt from which the pattern search begins. In the example,
367 when this function is called with S5, the pattern {S3,S4,S5,(S6)} is
372 * TYPE_IN: The type of the input arguments to the pattern.
374 * TYPE_OUT: The type of the output of this pattern.
376 * Return value: A new stmt that will be used to replace the sequence of
377 stmts that constitute the pattern. In this case it will be:
378 WIDEN_MULT <a_t, b_t>
382 vect_recog_widen_mult_pattern (gimple
*last_stmt
,
386 gimple def_stmt0
, def_stmt1
;
388 tree type
, half_type0
, half_type1
;
390 tree vectype
, vectype_out
= NULL_TREE
;
393 enum tree_code dummy_code
;
395 VEC (tree
, heap
) *dummy_vec
;
398 if (!is_gimple_assign (*last_stmt
))
401 type
= gimple_expr_type (*last_stmt
);
403 /* Starting from LAST_STMT, follow the defs of its uses in search
404 of the above pattern. */
406 if (gimple_assign_rhs_code (*last_stmt
) != MULT_EXPR
)
409 oprnd0
= gimple_assign_rhs1 (*last_stmt
);
410 oprnd1
= gimple_assign_rhs2 (*last_stmt
);
411 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
412 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
415 /* Check argument 0. */
416 op0_ok
= widened_name_p (oprnd0
, *last_stmt
, &half_type0
, &def_stmt0
, false);
417 /* Check argument 1. */
418 op1_ok
= widened_name_p (oprnd1
, *last_stmt
, &half_type1
, &def_stmt1
, false);
420 /* In case of multiplication by a constant one of the operands may not match
421 the pattern, but not both. */
422 if (!op0_ok
&& !op1_ok
)
425 if (op0_ok
&& op1_ok
)
427 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
428 oprnd1
= gimple_assign_rhs1 (def_stmt1
);
432 if (CONSTANT_CLASS_P (oprnd0
)
433 && TREE_CODE (half_type1
) == INTEGER_TYPE
434 && tree_int_cst_lt (oprnd0
, TYPE_MAXVAL (half_type1
))
435 && tree_int_cst_lt (TYPE_MINVAL (half_type1
), oprnd0
))
437 /* OPRND0 is a constant of HALF_TYPE1. */
438 half_type0
= half_type1
;
439 oprnd1
= gimple_assign_rhs1 (def_stmt1
);
446 if (CONSTANT_CLASS_P (oprnd1
)
447 && TREE_CODE (half_type0
) == INTEGER_TYPE
448 && tree_int_cst_lt (oprnd1
, TYPE_MAXVAL (half_type0
))
449 && tree_int_cst_lt (TYPE_MINVAL (half_type0
), oprnd1
))
451 /* OPRND1 is a constant of HALF_TYPE0. */
452 half_type1
= half_type0
;
453 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
459 /* Handle unsigned case. Look for
460 S6 u_prod_T = (unsigned TYPE) prod_T;
461 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
462 if (TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type0
))
464 tree lhs
= gimple_assign_lhs (*last_stmt
), use_lhs
;
465 imm_use_iterator imm_iter
;
468 gimple use_stmt
= NULL
;
471 if (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (half_type1
))
474 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
476 use_stmt
= USE_STMT (use_p
);
480 if (nuses
!= 1 || !is_gimple_assign (use_stmt
)
481 || gimple_assign_rhs_code (use_stmt
) != NOP_EXPR
)
484 use_lhs
= gimple_assign_lhs (use_stmt
);
485 use_type
= TREE_TYPE (use_lhs
);
486 if (!INTEGRAL_TYPE_P (use_type
)
487 || (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (use_type
))
488 || (TYPE_PRECISION (type
) != TYPE_PRECISION (use_type
)))
492 *last_stmt
= use_stmt
;
495 if (!types_compatible_p (half_type0
, half_type1
))
498 /* Pattern detected. */
499 if (vect_print_dump_info (REPORT_DETAILS
))
500 fprintf (vect_dump
, "vect_recog_widen_mult_pattern: detected: ");
502 /* Check target support */
503 vectype
= get_vectype_for_scalar_type (half_type0
);
504 vectype_out
= get_vectype_for_scalar_type (type
);
507 || !supportable_widening_operation (WIDEN_MULT_EXPR
, *last_stmt
,
508 vectype_out
, vectype
,
509 &dummy
, &dummy
, &dummy_code
,
510 &dummy_code
, &dummy_int
, &dummy_vec
))
514 *type_out
= vectype_out
;
516 /* Pattern supported. Create a stmt to be used to replace the pattern: */
517 var
= vect_recog_temp_ssa_var (type
, NULL
);
518 pattern_stmt
= gimple_build_assign_with_ops (WIDEN_MULT_EXPR
, var
, oprnd0
,
520 SSA_NAME_DEF_STMT (var
) = pattern_stmt
;
522 if (vect_print_dump_info (REPORT_DETAILS
))
523 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
529 /* Function vect_recog_pow_pattern
531 Try to find the following pattern:
535 with POW being one of pow, powf, powi, powif and N being
540 * LAST_STMT: A stmt from which the pattern search begins.
544 * TYPE_IN: The type of the input arguments to the pattern.
546 * TYPE_OUT: The type of the output of this pattern.
548 * Return value: A new stmt that will be used to replace the sequence of
549 stmts that constitute the pattern. In this case it will be:
556 vect_recog_pow_pattern (gimple
*last_stmt
, tree
*type_in
, tree
*type_out
)
558 tree fn
, base
, exp
= NULL
;
562 if (!is_gimple_call (*last_stmt
) || gimple_call_lhs (*last_stmt
) == NULL
)
565 fn
= gimple_call_fndecl (*last_stmt
);
566 if (fn
== NULL_TREE
|| DECL_BUILT_IN_CLASS (fn
) != BUILT_IN_NORMAL
)
569 switch (DECL_FUNCTION_CODE (fn
))
575 base
= gimple_call_arg (*last_stmt
, 0);
576 exp
= gimple_call_arg (*last_stmt
, 1);
577 if (TREE_CODE (exp
) != REAL_CST
578 && TREE_CODE (exp
) != INTEGER_CST
)
586 /* We now have a pow or powi builtin function call with a constant
589 *type_out
= NULL_TREE
;
591 /* Catch squaring. */
592 if ((host_integerp (exp
, 0)
593 && tree_low_cst (exp
, 0) == 2)
594 || (TREE_CODE (exp
) == REAL_CST
595 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconst2
)))
597 *type_in
= TREE_TYPE (base
);
599 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
600 stmt
= gimple_build_assign_with_ops (MULT_EXPR
, var
, base
, base
);
601 SSA_NAME_DEF_STMT (var
) = stmt
;
605 /* Catch square root. */
606 if (TREE_CODE (exp
) == REAL_CST
607 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconsthalf
))
609 tree newfn
= mathfn_built_in (TREE_TYPE (base
), BUILT_IN_SQRT
);
610 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (base
));
613 gimple stmt
= gimple_build_call (newfn
, 1, base
);
614 if (vectorizable_function (stmt
, *type_in
, *type_in
)
617 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
618 gimple_call_set_lhs (stmt
, var
);
628 /* Function vect_recog_widen_sum_pattern
630 Try to find the following pattern:
633 TYPE x_T, sum = init;
635 sum_0 = phi <init, sum_1>
638 S3 sum_1 = x_T + sum_0;
640 where type 'TYPE' is at least double the size of type 'type', i.e - we're
641 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
642 a special case of a reduction computation.
646 * LAST_STMT: A stmt from which the pattern search begins. In the example,
647 when this function is called with S3, the pattern {S2,S3} will be detected.
651 * TYPE_IN: The type of the input arguments to the pattern.
653 * TYPE_OUT: The type of the output of this pattern.
655 * Return value: A new stmt that will be used to replace the sequence of
656 stmts that constitute the pattern. In this case it will be:
657 WIDEN_SUM <x_t, sum_0>
659 Note: The widening-sum idiom is a widening reduction pattern that is
660 vectorized without preserving all the intermediate results. It
661 produces only N/2 (widened) results (by summing up pairs of
662 intermediate results) rather than all N results. Therefore, we
663 cannot allow this pattern when we want to get all the results and in
664 the correct order (as is the case when this computation is in an
665 inner-loop nested in an outer-loop that us being vectorized). */
668 vect_recog_widen_sum_pattern (gimple
*last_stmt
, tree
*type_in
, tree
*type_out
)
672 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (*last_stmt
);
673 tree type
, half_type
;
675 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
676 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
679 if (!is_gimple_assign (*last_stmt
))
682 type
= gimple_expr_type (*last_stmt
);
684 /* Look for the following pattern
687 In which DX is at least double the size of X, and sum_1 has been
688 recognized as a reduction variable.
691 /* Starting from LAST_STMT, follow the defs of its uses in search
692 of the above pattern. */
694 if (gimple_assign_rhs_code (*last_stmt
) != PLUS_EXPR
)
697 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
)
700 oprnd0
= gimple_assign_rhs1 (*last_stmt
);
701 oprnd1
= gimple_assign_rhs2 (*last_stmt
);
702 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
703 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
706 /* So far so good. Since *last_stmt was detected as a (summation) reduction,
707 we know that oprnd1 is the reduction variable (defined by a loop-header
708 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
709 Left to check that oprnd0 is defined by a cast from type 'type' to type
712 if (!widened_name_p (oprnd0
, *last_stmt
, &half_type
, &stmt
, true))
715 oprnd0
= gimple_assign_rhs1 (stmt
);
716 *type_in
= half_type
;
719 /* Pattern detected. Create a stmt to be used to replace the pattern: */
720 var
= vect_recog_temp_ssa_var (type
, NULL
);
721 pattern_stmt
= gimple_build_assign_with_ops (WIDEN_SUM_EXPR
, var
,
723 SSA_NAME_DEF_STMT (var
) = pattern_stmt
;
725 if (vect_print_dump_info (REPORT_DETAILS
))
727 fprintf (vect_dump
, "vect_recog_widen_sum_pattern: detected: ");
728 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
731 /* We don't allow changing the order of the computation in the inner-loop
732 when doing outer-loop vectorization. */
733 gcc_assert (!nested_in_vect_loop_p (loop
, *last_stmt
));
739 /* Function vect_pattern_recog_1
742 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
744 STMT: A stmt from which the pattern search should start.
746 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
747 expression that computes the same functionality and can be used to
748 replace the sequence of stmts that are involved in the pattern.
751 This function checks if the expression returned by PATTERN_RECOG_FUNC is
752 supported in vector form by the target. We use 'TYPE_IN' to obtain the
753 relevant vector type. If 'TYPE_IN' is already a vector type, then this
754 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
755 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
756 to the available target pattern.
758 This function also does some bookkeeping, as explained in the documentation
759 for vect_recog_pattern. */
762 vect_pattern_recog_1 (
763 gimple (* vect_recog_func
) (gimple
*, tree
*, tree
*),
764 gimple_stmt_iterator si
)
766 gimple stmt
= gsi_stmt (si
), pattern_stmt
;
767 stmt_vec_info stmt_info
;
768 stmt_vec_info pattern_stmt_info
;
769 loop_vec_info loop_vinfo
;
770 tree pattern_vectype
;
771 tree type_in
, type_out
;
776 pattern_stmt
= (* vect_recog_func
) (&stmt
, &type_in
, &type_out
);
780 si
= gsi_for_stmt (stmt
);
781 stmt_info
= vinfo_for_stmt (stmt
);
782 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
784 if (VECTOR_MODE_P (TYPE_MODE (type_in
)))
786 /* No need to check target support (already checked by the pattern
787 recognition function). */
789 gcc_assert (VECTOR_MODE_P (TYPE_MODE (type_out
)));
790 pattern_vectype
= type_out
? type_out
: type_in
;
794 enum machine_mode vec_mode
;
795 enum insn_code icode
;
798 /* Check target support */
799 type_in
= get_vectype_for_scalar_type (type_in
);
803 type_out
= get_vectype_for_scalar_type (type_out
);
808 pattern_vectype
= type_out
;
810 if (is_gimple_assign (pattern_stmt
))
811 code
= gimple_assign_rhs_code (pattern_stmt
);
814 gcc_assert (is_gimple_call (pattern_stmt
));
818 optab
= optab_for_tree_code (code
, type_in
, optab_default
);
819 vec_mode
= TYPE_MODE (type_in
);
821 || (icode
= optab_handler (optab
, vec_mode
)) == CODE_FOR_nothing
822 || (insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (type_out
)))
826 /* Found a vectorizable pattern. */
827 if (vect_print_dump_info (REPORT_DETAILS
))
829 fprintf (vect_dump
, "pattern recognized: ");
830 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
833 /* Mark the stmts that are involved in the pattern. */
834 gsi_insert_before (&si
, pattern_stmt
, GSI_SAME_STMT
);
835 set_vinfo_for_stmt (pattern_stmt
,
836 new_stmt_vec_info (pattern_stmt
, loop_vinfo
, NULL
));
837 pattern_stmt_info
= vinfo_for_stmt (pattern_stmt
);
839 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = stmt
;
840 STMT_VINFO_DEF_TYPE (pattern_stmt_info
) = STMT_VINFO_DEF_TYPE (stmt_info
);
841 STMT_VINFO_VECTYPE (pattern_stmt_info
) = pattern_vectype
;
842 STMT_VINFO_IN_PATTERN_P (stmt_info
) = true;
843 STMT_VINFO_RELATED_STMT (stmt_info
) = pattern_stmt
;
845 /* Patterns cannot be vectorized using SLP, because they change the order of
847 FOR_EACH_VEC_ELT (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
, next
)
849 VEC_ordered_remove (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
);
853 /* Function vect_pattern_recog
856 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
859 Output - for each computation idiom that is detected we insert a new stmt
860 that provides the same functionality and that can be vectorized. We
861 also record some information in the struct_stmt_info of the relevant
862 stmts, as explained below:
864 At the entry to this function we have the following stmts, with the
865 following initial value in the STMT_VINFO fields:
867 stmt in_pattern_p related_stmt vec_stmt
869 S2: a_2 = ..use(a_i).. - - -
870 S3: a_1 = ..use(a_2).. - - -
871 S4: a_0 = ..use(a_1).. - - -
872 S5: ... = ..use(a_0).. - - -
874 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
875 represented by a single stmt. We then:
876 - create a new stmt S6 that will replace the pattern.
877 - insert the new stmt S6 before the last stmt in the pattern
878 - fill in the STMT_VINFO fields as follows:
880 in_pattern_p related_stmt vec_stmt
882 S2: a_2 = ..use(a_i).. - - -
883 S3: a_1 = ..use(a_2).. - - -
884 > S6: a_new = .... - S4 -
885 S4: a_0 = ..use(a_1).. true S6 -
886 S5: ... = ..use(a_0).. - - -
888 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
889 to each other through the RELATED_STMT field).
891 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
892 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
893 remain irrelevant unless used by stmts other than S4.
895 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
896 (because they are marked as irrelevant). It will vectorize S6, and record
897 a pointer to the new vector stmt VS6 both from S6 (as usual), and also
898 from S4. We do that so that when we get to vectorizing stmts that use the
899 def of S4 (like S5 that uses a_0), we'll know where to take the relevant
900 vector-def from. S4 will be skipped, and S5 will be vectorized as usual:
902 in_pattern_p related_stmt vec_stmt
904 S2: a_2 = ..use(a_i).. - - -
905 S3: a_1 = ..use(a_2).. - - -
906 > VS6: va_new = .... - - -
907 S6: a_new = .... - S4 VS6
908 S4: a_0 = ..use(a_1).. true S6 VS6
909 > VS5: ... = ..vuse(va_new).. - - -
910 S5: ... = ..use(a_0).. - - -
912 DCE could then get rid of {S1,S2,S3,S4,S5,S6} (if their defs are not used
913 elsewhere), and we'll end up with:
916 VS5: ... = ..vuse(va_new)..
918 If vectorization does not succeed, DCE will clean S6 away (its def is
919 not used), and we'll end up with the original sequence.
923 vect_pattern_recog (loop_vec_info loop_vinfo
)
925 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
926 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
927 unsigned int nbbs
= loop
->num_nodes
;
928 gimple_stmt_iterator si
;
930 gimple (* vect_recog_func_ptr
) (gimple
*, tree
*, tree
*);
932 if (vect_print_dump_info (REPORT_DETAILS
))
933 fprintf (vect_dump
, "=== vect_pattern_recog ===");
935 /* Scan through the loop stmts, applying the pattern recognition
936 functions starting at each stmt visited: */
937 for (i
= 0; i
< nbbs
; i
++)
939 basic_block bb
= bbs
[i
];
940 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
942 /* Scan over all generic vect_recog_xxx_pattern functions. */
943 for (j
= 0; j
< NUM_PATTERNS
; j
++)
945 vect_recog_func_ptr
= vect_vect_recog_func_ptrs
[j
];
946 vect_pattern_recog_1 (vect_recog_func_ptr
, si
);