1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Nuzman <dorit@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
37 #include "tree-data-ref.h"
38 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
42 /* Pattern recognition functions */
43 static gimple
vect_recog_widen_sum_pattern (VEC (gimple
, heap
) **, tree
*,
45 static gimple
vect_recog_widen_mult_pattern (VEC (gimple
, heap
) **, tree
*,
47 static gimple
vect_recog_dot_prod_pattern (VEC (gimple
, heap
) **, tree
*,
49 static gimple
vect_recog_pow_pattern (VEC (gimple
, heap
) **, tree
*, tree
*);
50 static gimple
vect_recog_over_widening_pattern (VEC (gimple
, heap
) **, tree
*,
52 static vect_recog_func_ptr vect_vect_recog_func_ptrs
[NUM_PATTERNS
] = {
53 vect_recog_widen_mult_pattern
,
54 vect_recog_widen_sum_pattern
,
55 vect_recog_dot_prod_pattern
,
56 vect_recog_pow_pattern
,
57 vect_recog_over_widening_pattern
};
60 /* Function widened_name_p
62 Check whether NAME, an ssa-name used in USE_STMT,
63 is a result of a type-promotion, such that:
64 DEF_STMT: NAME = NOP (name0)
65 where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
66 If CHECK_SIGN is TRUE, check that either both types are signed or both are
70 widened_name_p (tree name
, gimple use_stmt
, tree
*half_type
, gimple
*def_stmt
,
75 loop_vec_info loop_vinfo
;
76 stmt_vec_info stmt_vinfo
;
77 tree type
= TREE_TYPE (name
);
79 enum vect_def_type dt
;
82 stmt_vinfo
= vinfo_for_stmt (use_stmt
);
83 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
85 if (!vect_is_simple_use (name
, loop_vinfo
, NULL
, def_stmt
, &def
, &dt
))
88 if (dt
!= vect_internal_def
89 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
95 if (!is_gimple_assign (*def_stmt
))
98 if (gimple_assign_rhs_code (*def_stmt
) != NOP_EXPR
)
101 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
103 *half_type
= TREE_TYPE (oprnd0
);
104 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*half_type
)
105 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*half_type
)) && check_sign
)
106 || (TYPE_PRECISION (type
) < (TYPE_PRECISION (*half_type
) * 2)))
109 if (!vect_is_simple_use (oprnd0
, loop_vinfo
, NULL
, &dummy_gimple
, &dummy
,
116 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
117 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
120 vect_recog_temp_ssa_var (tree type
, gimple stmt
)
122 tree var
= create_tmp_var (type
, "patt");
124 add_referenced_var (var
);
125 var
= make_ssa_name (var
, stmt
);
129 /* Function vect_recog_dot_prod_pattern
131 Try to find the following pattern:
137 sum_0 = phi <init, sum_1>
140 S3 x_T = (TYPE1) x_t;
141 S4 y_T = (TYPE1) y_t;
143 [S6 prod = (TYPE2) prod; #optional]
144 S7 sum_1 = prod + sum_0;
146 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
147 same size of 'TYPE1' or bigger. This is a special case of a reduction
152 * STMTS: Contains a stmt from which the pattern search begins. In the
153 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
158 * TYPE_IN: The type of the input arguments to the pattern.
160 * TYPE_OUT: The type of the output of this pattern.
162 * Return value: A new stmt that will be used to replace the sequence of
163 stmts that constitute the pattern. In this case it will be:
164 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
166 Note: The dot-prod idiom is a widening reduction pattern that is
167 vectorized without preserving all the intermediate results. It
168 produces only N/2 (widened) results (by summing up pairs of
169 intermediate results) rather than all N results. Therefore, we
170 cannot allow this pattern when we want to get all the results and in
171 the correct order (as is the case when this computation is in an
172 inner-loop nested in an outer-loop that us being vectorized). */
175 vect_recog_dot_prod_pattern (VEC (gimple
, heap
) **stmts
, tree
*type_in
,
178 gimple stmt
, last_stmt
= VEC_index (gimple
, *stmts
, 0);
180 tree oprnd00
, oprnd01
;
181 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
182 tree type
, half_type
;
185 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
186 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
189 if (!is_gimple_assign (last_stmt
))
192 type
= gimple_expr_type (last_stmt
);
194 /* Look for the following pattern
198 DDPROD = (TYPE2) DPROD;
199 sum_1 = DDPROD + sum_0;
201 - DX is double the size of X
202 - DY is double the size of Y
203 - DX, DY, DPROD all have the same type
204 - sum is the same size of DPROD or bigger
205 - sum has been recognized as a reduction variable.
207 This is equivalent to:
208 DPROD = X w* Y; #widen mult
209 sum_1 = DPROD w+ sum_0; #widen summation
211 DPROD = X w* Y; #widen mult
212 sum_1 = DPROD + sum_0; #summation
215 /* Starting from LAST_STMT, follow the defs of its uses in search
216 of the above pattern. */
218 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
221 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
223 /* Has been detected as widening-summation? */
225 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
226 type
= gimple_expr_type (stmt
);
227 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
229 oprnd0
= gimple_assign_rhs1 (stmt
);
230 oprnd1
= gimple_assign_rhs2 (stmt
);
231 half_type
= TREE_TYPE (oprnd0
);
237 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
)
239 oprnd0
= gimple_assign_rhs1 (last_stmt
);
240 oprnd1
= gimple_assign_rhs2 (last_stmt
);
241 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
242 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
246 if (widened_name_p (oprnd0
, stmt
, &half_type
, &def_stmt
, true))
249 oprnd0
= gimple_assign_rhs1 (stmt
);
255 /* So far so good. Since last_stmt was detected as a (summation) reduction,
256 we know that oprnd1 is the reduction variable (defined by a loop-header
257 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
258 Left to check that oprnd0 is defined by a (widen_)mult_expr */
259 if (TREE_CODE (oprnd0
) != SSA_NAME
)
262 prod_type
= half_type
;
263 stmt
= SSA_NAME_DEF_STMT (oprnd0
);
265 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
266 if (!gimple_bb (stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
269 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
270 inside the loop (in case we are analyzing an outer-loop). */
271 if (!is_gimple_assign (stmt
))
273 stmt_vinfo
= vinfo_for_stmt (stmt
);
274 gcc_assert (stmt_vinfo
);
275 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
277 if (gimple_assign_rhs_code (stmt
) != MULT_EXPR
)
279 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
281 /* Has been detected as a widening multiplication? */
283 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
284 if (gimple_assign_rhs_code (stmt
) != WIDEN_MULT_EXPR
)
286 stmt_vinfo
= vinfo_for_stmt (stmt
);
287 gcc_assert (stmt_vinfo
);
288 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_internal_def
);
289 oprnd00
= gimple_assign_rhs1 (stmt
);
290 oprnd01
= gimple_assign_rhs2 (stmt
);
294 tree half_type0
, half_type1
;
298 oprnd0
= gimple_assign_rhs1 (stmt
);
299 oprnd1
= gimple_assign_rhs2 (stmt
);
300 if (!types_compatible_p (TREE_TYPE (oprnd0
), prod_type
)
301 || !types_compatible_p (TREE_TYPE (oprnd1
), prod_type
))
303 if (!widened_name_p (oprnd0
, stmt
, &half_type0
, &def_stmt
, true))
305 oprnd00
= gimple_assign_rhs1 (def_stmt
);
306 if (!widened_name_p (oprnd1
, stmt
, &half_type1
, &def_stmt
, true))
308 oprnd01
= gimple_assign_rhs1 (def_stmt
);
309 if (!types_compatible_p (half_type0
, half_type1
))
311 if (TYPE_PRECISION (prod_type
) != TYPE_PRECISION (half_type0
) * 2)
315 half_type
= TREE_TYPE (oprnd00
);
316 *type_in
= half_type
;
319 /* Pattern detected. Create a stmt to be used to replace the pattern: */
320 var
= vect_recog_temp_ssa_var (type
, NULL
);
321 pattern_stmt
= gimple_build_assign_with_ops3 (DOT_PROD_EXPR
, var
,
322 oprnd00
, oprnd01
, oprnd1
);
324 if (vect_print_dump_info (REPORT_DETAILS
))
326 fprintf (vect_dump
, "vect_recog_dot_prod_pattern: detected: ");
327 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
330 /* We don't allow changing the order of the computation in the inner-loop
331 when doing outer-loop vectorization. */
332 gcc_assert (!nested_in_vect_loop_p (loop
, last_stmt
));
338 /* Handle two cases of multiplication by a constant. The first one is when
339 the constant, CONST_OPRND, fits the type (HALF_TYPE) of the second
340 operand (OPRND). In that case, we can peform widen-mult from HALF_TYPE to
343 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
344 HALF_TYPE, and CONST_OPRND fits an intermediate type (2 times smaller than
345 TYPE), we can perform widen-mult from the intermediate type to TYPE and
346 replace a_T = (TYPE) a_t; with a_it - (interm_type) a_t; */
349 vect_handle_widen_mult_by_const (gimple stmt
, tree const_oprnd
, tree
*oprnd
,
350 VEC (gimple
, heap
) **stmts
, tree type
,
351 tree
*half_type
, gimple def_stmt
)
353 tree new_type
, new_oprnd
, tmp
;
355 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt
));
356 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
358 if (int_fits_type_p (const_oprnd
, *half_type
))
360 /* CONST_OPRND is a constant of HALF_TYPE. */
361 *oprnd
= gimple_assign_rhs1 (def_stmt
);
365 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (*half_type
) * 4)
366 || !gimple_bb (def_stmt
)
367 || !flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
))
368 || !vinfo_for_stmt (def_stmt
))
371 /* TYPE is 4 times bigger than HALF_TYPE, try widen-mult for
372 a type 2 times bigger than HALF_TYPE. */
373 new_type
= build_nonstandard_integer_type (TYPE_PRECISION (type
) / 2,
374 TYPE_UNSIGNED (type
));
375 if (!int_fits_type_p (const_oprnd
, new_type
))
378 /* Use NEW_TYPE for widen_mult. */
379 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
381 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
382 /* Check if the already created pattern stmt is what we need. */
383 if (!is_gimple_assign (new_stmt
)
384 || gimple_assign_rhs_code (new_stmt
) != NOP_EXPR
385 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != new_type
)
388 *oprnd
= gimple_assign_lhs (new_stmt
);
392 /* Create a_T = (NEW_TYPE) a_t; */
393 *oprnd
= gimple_assign_rhs1 (def_stmt
);
394 tmp
= create_tmp_var (new_type
, NULL
);
395 add_referenced_var (tmp
);
396 new_oprnd
= make_ssa_name (tmp
, NULL
);
397 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
, *oprnd
,
399 SSA_NAME_DEF_STMT (new_oprnd
) = new_stmt
;
400 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
401 VEC_safe_push (gimple
, heap
, *stmts
, def_stmt
);
405 *half_type
= new_type
;
410 /* Function vect_recog_widen_mult_pattern
412 Try to find the following pattern:
415 TYPE a_T, b_T, prod_T;
421 S5 prod_T = a_T * b_T;
423 where type 'TYPE' is at least double the size of type 'type'.
425 Also detect unsgigned cases:
427 unsigned type a_t, b_t;
428 unsigned TYPE u_prod_T;
429 TYPE a_T, b_T, prod_T;
435 S5 prod_T = a_T * b_T;
436 S6 u_prod_T = (unsigned TYPE) prod_T;
438 and multiplication by constants:
445 S5 prod_T = a_T * CONST;
447 A special case of multiplication by constants is when 'TYPE' is 4 times
448 bigger than 'type', but CONST fits an intermediate type 2 times smaller
449 than 'TYPE'. In that case we create an additional pattern stmt for S3
450 to create a variable of the intermediate type, and perform widen-mult
451 on the intermediate type as well:
455 TYPE a_T, prod_T, prod_T';
459 '--> a_it = (interm_type) a_t;
460 S5 prod_T = a_T * CONST;
461 '--> prod_T' = a_it w* CONST;
465 * STMTS: Contains a stmt from which the pattern search begins. In the
466 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
467 is detected. In case of unsigned widen-mult, the original stmt (S5) is
468 replaced with S6 in STMTS. In case of multiplication by a constant
469 of an intermediate type (the last case above), STMTS also contains S3
470 (inserted before S5).
474 * TYPE_IN: The type of the input arguments to the pattern.
476 * TYPE_OUT: The type of the output of this pattern.
478 * Return value: A new stmt that will be used to replace the sequence of
479 stmts that constitute the pattern. In this case it will be:
480 WIDEN_MULT <a_t, b_t>
484 vect_recog_widen_mult_pattern (VEC (gimple
, heap
) **stmts
,
485 tree
*type_in
, tree
*type_out
)
487 gimple last_stmt
= VEC_pop (gimple
, *stmts
);
488 gimple def_stmt0
, def_stmt1
;
490 tree type
, half_type0
, half_type1
;
492 tree vectype
, vectype_out
= NULL_TREE
;
495 enum tree_code dummy_code
;
497 VEC (tree
, heap
) *dummy_vec
;
500 if (!is_gimple_assign (last_stmt
))
503 type
= gimple_expr_type (last_stmt
);
505 /* Starting from LAST_STMT, follow the defs of its uses in search
506 of the above pattern. */
508 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
511 oprnd0
= gimple_assign_rhs1 (last_stmt
);
512 oprnd1
= gimple_assign_rhs2 (last_stmt
);
513 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
514 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
517 /* Check argument 0. */
518 op0_ok
= widened_name_p (oprnd0
, last_stmt
, &half_type0
, &def_stmt0
, false);
519 /* Check argument 1. */
520 op1_ok
= widened_name_p (oprnd1
, last_stmt
, &half_type1
, &def_stmt1
, false);
522 /* In case of multiplication by a constant one of the operands may not match
523 the pattern, but not both. */
524 if (!op0_ok
&& !op1_ok
)
527 if (op0_ok
&& op1_ok
)
529 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
530 oprnd1
= gimple_assign_rhs1 (def_stmt1
);
534 if (TREE_CODE (oprnd0
) == INTEGER_CST
535 && TREE_CODE (half_type1
) == INTEGER_TYPE
536 && vect_handle_widen_mult_by_const (last_stmt
, oprnd0
, &oprnd1
,
538 &half_type1
, def_stmt1
))
539 half_type0
= half_type1
;
545 if (TREE_CODE (oprnd1
) == INTEGER_CST
546 && TREE_CODE (half_type0
) == INTEGER_TYPE
547 && vect_handle_widen_mult_by_const (last_stmt
, oprnd1
, &oprnd0
,
549 &half_type0
, def_stmt0
))
550 half_type1
= half_type0
;
555 /* Handle unsigned case. Look for
556 S6 u_prod_T = (unsigned TYPE) prod_T;
557 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
558 if (TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type0
))
560 tree lhs
= gimple_assign_lhs (last_stmt
), use_lhs
;
561 imm_use_iterator imm_iter
;
564 gimple use_stmt
= NULL
;
567 if (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (half_type1
))
570 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
572 if (is_gimple_debug (USE_STMT (use_p
)))
574 use_stmt
= USE_STMT (use_p
);
578 if (nuses
!= 1 || !is_gimple_assign (use_stmt
)
579 || gimple_assign_rhs_code (use_stmt
) != NOP_EXPR
)
582 use_lhs
= gimple_assign_lhs (use_stmt
);
583 use_type
= TREE_TYPE (use_lhs
);
584 if (!INTEGRAL_TYPE_P (use_type
)
585 || (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (use_type
))
586 || (TYPE_PRECISION (type
) != TYPE_PRECISION (use_type
)))
590 last_stmt
= use_stmt
;
593 if (!types_compatible_p (half_type0
, half_type1
))
596 /* Pattern detected. */
597 if (vect_print_dump_info (REPORT_DETAILS
))
598 fprintf (vect_dump
, "vect_recog_widen_mult_pattern: detected: ");
600 /* Check target support */
601 vectype
= get_vectype_for_scalar_type (half_type0
);
602 vectype_out
= get_vectype_for_scalar_type (type
);
605 || !supportable_widening_operation (WIDEN_MULT_EXPR
, last_stmt
,
606 vectype_out
, vectype
,
607 &dummy
, &dummy
, &dummy_code
,
608 &dummy_code
, &dummy_int
, &dummy_vec
))
612 *type_out
= vectype_out
;
614 /* Pattern supported. Create a stmt to be used to replace the pattern: */
615 var
= vect_recog_temp_ssa_var (type
, NULL
);
616 pattern_stmt
= gimple_build_assign_with_ops (WIDEN_MULT_EXPR
, var
, oprnd0
,
618 SSA_NAME_DEF_STMT (var
) = pattern_stmt
;
620 if (vect_print_dump_info (REPORT_DETAILS
))
621 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
623 VEC_safe_push (gimple
, heap
, *stmts
, last_stmt
);
628 /* Function vect_recog_pow_pattern
630 Try to find the following pattern:
634 with POW being one of pow, powf, powi, powif and N being
639 * LAST_STMT: A stmt from which the pattern search begins.
643 * TYPE_IN: The type of the input arguments to the pattern.
645 * TYPE_OUT: The type of the output of this pattern.
647 * Return value: A new stmt that will be used to replace the sequence of
648 stmts that constitute the pattern. In this case it will be:
655 vect_recog_pow_pattern (VEC (gimple
, heap
) **stmts
, tree
*type_in
,
658 gimple last_stmt
= VEC_index (gimple
, *stmts
, 0);
659 tree fn
, base
, exp
= NULL
;
663 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
666 fn
= gimple_call_fndecl (last_stmt
);
667 if (fn
== NULL_TREE
|| DECL_BUILT_IN_CLASS (fn
) != BUILT_IN_NORMAL
)
670 switch (DECL_FUNCTION_CODE (fn
))
676 base
= gimple_call_arg (last_stmt
, 0);
677 exp
= gimple_call_arg (last_stmt
, 1);
678 if (TREE_CODE (exp
) != REAL_CST
679 && TREE_CODE (exp
) != INTEGER_CST
)
687 /* We now have a pow or powi builtin function call with a constant
690 *type_out
= NULL_TREE
;
692 /* Catch squaring. */
693 if ((host_integerp (exp
, 0)
694 && tree_low_cst (exp
, 0) == 2)
695 || (TREE_CODE (exp
) == REAL_CST
696 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconst2
)))
698 *type_in
= TREE_TYPE (base
);
700 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
701 stmt
= gimple_build_assign_with_ops (MULT_EXPR
, var
, base
, base
);
702 SSA_NAME_DEF_STMT (var
) = stmt
;
706 /* Catch square root. */
707 if (TREE_CODE (exp
) == REAL_CST
708 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconsthalf
))
710 tree newfn
= mathfn_built_in (TREE_TYPE (base
), BUILT_IN_SQRT
);
711 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (base
));
714 gimple stmt
= gimple_build_call (newfn
, 1, base
);
715 if (vectorizable_function (stmt
, *type_in
, *type_in
)
718 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
719 gimple_call_set_lhs (stmt
, var
);
729 /* Function vect_recog_widen_sum_pattern
731 Try to find the following pattern:
734 TYPE x_T, sum = init;
736 sum_0 = phi <init, sum_1>
739 S3 sum_1 = x_T + sum_0;
741 where type 'TYPE' is at least double the size of type 'type', i.e - we're
742 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
743 a special case of a reduction computation.
747 * LAST_STMT: A stmt from which the pattern search begins. In the example,
748 when this function is called with S3, the pattern {S2,S3} will be detected.
752 * TYPE_IN: The type of the input arguments to the pattern.
754 * TYPE_OUT: The type of the output of this pattern.
756 * Return value: A new stmt that will be used to replace the sequence of
757 stmts that constitute the pattern. In this case it will be:
758 WIDEN_SUM <x_t, sum_0>
760 Note: The widening-sum idiom is a widening reduction pattern that is
761 vectorized without preserving all the intermediate results. It
762 produces only N/2 (widened) results (by summing up pairs of
763 intermediate results) rather than all N results. Therefore, we
764 cannot allow this pattern when we want to get all the results and in
765 the correct order (as is the case when this computation is in an
766 inner-loop nested in an outer-loop that us being vectorized). */
769 vect_recog_widen_sum_pattern (VEC (gimple
, heap
) **stmts
, tree
*type_in
,
772 gimple stmt
, last_stmt
= VEC_index (gimple
, *stmts
, 0);
774 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
775 tree type
, half_type
;
777 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
778 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
781 if (!is_gimple_assign (last_stmt
))
784 type
= gimple_expr_type (last_stmt
);
786 /* Look for the following pattern
789 In which DX is at least double the size of X, and sum_1 has been
790 recognized as a reduction variable.
793 /* Starting from LAST_STMT, follow the defs of its uses in search
794 of the above pattern. */
796 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
799 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
)
802 oprnd0
= gimple_assign_rhs1 (last_stmt
);
803 oprnd1
= gimple_assign_rhs2 (last_stmt
);
804 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
805 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
808 /* So far so good. Since last_stmt was detected as a (summation) reduction,
809 we know that oprnd1 is the reduction variable (defined by a loop-header
810 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
811 Left to check that oprnd0 is defined by a cast from type 'type' to type
814 if (!widened_name_p (oprnd0
, last_stmt
, &half_type
, &stmt
, true))
817 oprnd0
= gimple_assign_rhs1 (stmt
);
818 *type_in
= half_type
;
821 /* Pattern detected. Create a stmt to be used to replace the pattern: */
822 var
= vect_recog_temp_ssa_var (type
, NULL
);
823 pattern_stmt
= gimple_build_assign_with_ops (WIDEN_SUM_EXPR
, var
,
825 SSA_NAME_DEF_STMT (var
) = pattern_stmt
;
827 if (vect_print_dump_info (REPORT_DETAILS
))
829 fprintf (vect_dump
, "vect_recog_widen_sum_pattern: detected: ");
830 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
833 /* We don't allow changing the order of the computation in the inner-loop
834 when doing outer-loop vectorization. */
835 gcc_assert (!nested_in_vect_loop_p (loop
, last_stmt
));
841 /* Return TRUE if the operation in STMT can be performed on a smaller type.
844 STMT - a statement to check.
845 DEF - we support operations with two operands, one of which is constant.
846 The other operand can be defined by a demotion operation, or by a
847 previous statement in a sequence of over-promoted operations. In the
848 later case DEF is used to replace that operand. (It is defined by a
849 pattern statement we created for the previous statement in the
853 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
854 NULL, it's the type of DEF.
855 STMTS - additional pattern statements. If a pattern statement (type
856 conversion) is created in this function, its original statement is
860 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
861 operands to use in the new pattern statement for STMT (will be created
862 in vect_recog_over_widening_pattern ()).
863 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
864 statements for STMT: the first one is a type promotion and the second
865 one is the operation itself. We return the type promotion statement
866 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_STMT of
867 the second pattern statement. */
870 vect_operation_fits_smaller_type (gimple stmt
, tree def
, tree
*new_type
,
871 tree
*op0
, tree
*op1
, gimple
*new_def_stmt
,
872 VEC (gimple
, heap
) **stmts
)
875 tree const_oprnd
, oprnd
;
876 tree interm_type
= NULL_TREE
, half_type
, tmp
, new_oprnd
, type
;
877 gimple def_stmt
, new_stmt
;
879 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt
));
880 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
882 *new_def_stmt
= NULL
;
884 if (!is_gimple_assign (stmt
))
887 code
= gimple_assign_rhs_code (stmt
);
888 if (code
!= LSHIFT_EXPR
&& code
!= RSHIFT_EXPR
889 && code
!= BIT_IOR_EXPR
&& code
!= BIT_XOR_EXPR
&& code
!= BIT_AND_EXPR
)
892 oprnd
= gimple_assign_rhs1 (stmt
);
893 const_oprnd
= gimple_assign_rhs2 (stmt
);
894 type
= gimple_expr_type (stmt
);
896 if (TREE_CODE (oprnd
) != SSA_NAME
897 || TREE_CODE (const_oprnd
) != INTEGER_CST
)
900 /* If we are in the middle of a sequence, we use DEF from a previous
901 statement. Otherwise, OPRND has to be a result of type promotion. */
904 half_type
= *new_type
;
910 if (!widened_name_p (oprnd
, stmt
, &half_type
, &def_stmt
, false)
911 || !gimple_bb (def_stmt
)
912 || !flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
))
913 || !vinfo_for_stmt (def_stmt
))
917 /* Can we perform the operation on a smaller type? */
923 if (!int_fits_type_p (const_oprnd
, half_type
))
925 /* HALF_TYPE is not enough. Try a bigger type if possible. */
926 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
929 interm_type
= build_nonstandard_integer_type (
930 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
931 if (!int_fits_type_p (const_oprnd
, interm_type
))
938 /* Try intermediate type - HALF_TYPE is not enough for sure. */
939 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
942 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
943 (e.g., if the original value was char, the shift amount is at most 8
944 if we want to use short). */
945 if (compare_tree_int (const_oprnd
, TYPE_PRECISION (half_type
)) == 1)
948 interm_type
= build_nonstandard_integer_type (
949 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
951 if (!vect_supportable_shift (code
, interm_type
))
957 if (vect_supportable_shift (code
, half_type
))
960 /* Try intermediate type - HALF_TYPE is not supported. */
961 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
964 interm_type
= build_nonstandard_integer_type (
965 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
967 if (!vect_supportable_shift (code
, interm_type
))
976 /* There are four possible cases:
977 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
978 the first statement in the sequence)
979 a. The original, HALF_TYPE, is not enough - we replace the promotion
980 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
981 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
983 2. OPRND is defined by a pattern statement we created.
984 a. Its type is not sufficient for the operation, we create a new stmt:
985 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
986 this statement in NEW_DEF_STMT, and it is later put in
987 STMT_VINFO_PATTERN_DEF_STMT of the pattern statement for STMT.
988 b. OPRND is good to use in the new statement. */
993 /* Replace the original type conversion HALF_TYPE->TYPE with
994 HALF_TYPE->INTERM_TYPE. */
995 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
997 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
998 /* Check if the already created pattern stmt is what we need. */
999 if (!is_gimple_assign (new_stmt
)
1000 || gimple_assign_rhs_code (new_stmt
) != NOP_EXPR
1001 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != interm_type
)
1004 oprnd
= gimple_assign_lhs (new_stmt
);
1008 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1009 oprnd
= gimple_assign_rhs1 (def_stmt
);
1010 tmp
= create_tmp_reg (interm_type
, NULL
);
1011 add_referenced_var (tmp
);
1012 new_oprnd
= make_ssa_name (tmp
, NULL
);
1013 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
1015 SSA_NAME_DEF_STMT (new_oprnd
) = new_stmt
;
1016 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
1017 VEC_safe_push (gimple
, heap
, *stmts
, def_stmt
);
1023 /* Retrieve the operand before the type promotion. */
1024 oprnd
= gimple_assign_rhs1 (def_stmt
);
1031 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1032 tmp
= create_tmp_reg (interm_type
, NULL
);
1033 add_referenced_var (tmp
);
1034 new_oprnd
= make_ssa_name (tmp
, NULL
);
1035 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
1037 SSA_NAME_DEF_STMT (new_oprnd
) = new_stmt
;
1039 *new_def_stmt
= new_stmt
;
1042 /* Otherwise, OPRND is already set. */
1046 *new_type
= interm_type
;
1048 *new_type
= half_type
;
1051 *op1
= fold_convert (*new_type
, const_oprnd
);
1057 /* Try to find a statement or a sequence of statements that can be performed
1061 TYPE x_T, res0_T, res1_T;
1064 S2 x_T = (TYPE) x_t;
1065 S3 res0_T = op (x_T, C0);
1066 S4 res1_T = op (res0_T, C1);
1067 S5 ... = () res1_T; - type demotion
1069 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1071 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1072 be 'type' or some intermediate type. For now, we expect S5 to be a type
1073 demotion operation. We also check that S3 and S4 have only one use.
1078 vect_recog_over_widening_pattern (VEC (gimple
, heap
) **stmts
,
1079 tree
*type_in
, tree
*type_out
)
1081 gimple stmt
= VEC_pop (gimple
, *stmts
);
1082 gimple pattern_stmt
= NULL
, new_def_stmt
, prev_stmt
= NULL
, use_stmt
= NULL
;
1083 tree op0
, op1
, vectype
= NULL_TREE
, lhs
, use_lhs
, use_type
;
1084 imm_use_iterator imm_iter
;
1085 use_operand_p use_p
;
1087 tree var
= NULL_TREE
, new_type
= NULL_TREE
, tmp
, new_oprnd
;
1089 struct loop
*loop
= (gimple_bb (stmt
))->loop_father
;
1094 if (!vinfo_for_stmt (stmt
)
1095 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt
)))
1098 new_def_stmt
= NULL
;
1099 if (!vect_operation_fits_smaller_type (stmt
, var
, &new_type
,
1100 &op0
, &op1
, &new_def_stmt
,
1109 /* STMT can be performed on a smaller type. Check its uses. */
1110 lhs
= gimple_assign_lhs (stmt
);
1112 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
1114 if (is_gimple_debug (USE_STMT (use_p
)))
1116 use_stmt
= USE_STMT (use_p
);
1120 if (nuses
!= 1 || !is_gimple_assign (use_stmt
)
1121 || !gimple_bb (use_stmt
)
1122 || !flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
1125 /* Create pattern statement for STMT. */
1126 vectype
= get_vectype_for_scalar_type (new_type
);
1130 /* We want to collect all the statements for which we create pattern
1131 statetments, except for the case when the last statement in the
1132 sequence doesn't have a corresponding pattern statement. In such
1133 case we associate the last pattern statement with the last statement
1134 in the sequence. Therefore, we only add an original statetement to
1135 the list if we know that it is not the last. */
1137 VEC_safe_push (gimple
, heap
, *stmts
, prev_stmt
);
1139 var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1140 pattern_stmt
= gimple_build_assign_with_ops (
1141 gimple_assign_rhs_code (stmt
), var
, op0
, op1
);
1142 SSA_NAME_DEF_STMT (var
) = pattern_stmt
;
1143 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
1144 STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (stmt
)) = new_def_stmt
;
1146 if (vect_print_dump_info (REPORT_DETAILS
))
1148 fprintf (vect_dump
, "created pattern stmt: ");
1149 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
1158 /* We got a sequence. We expect it to end with a type demotion operation.
1159 Otherwise, we quit (for now). There are three possible cases: the
1160 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1161 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1162 NEW_TYPE differs (we create a new conversion statement). */
1163 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
1165 use_lhs
= gimple_assign_lhs (use_stmt
);
1166 use_type
= TREE_TYPE (use_lhs
);
1167 /* Support only type promotion or signedess change. */
1168 if (!INTEGRAL_TYPE_P (use_type
)
1169 || TYPE_PRECISION (new_type
) > TYPE_PRECISION (use_type
))
1172 if (TYPE_UNSIGNED (new_type
) != TYPE_UNSIGNED (use_type
)
1173 || TYPE_PRECISION (new_type
) != TYPE_PRECISION (use_type
))
1175 /* Create NEW_TYPE->USE_TYPE conversion. */
1176 tmp
= create_tmp_reg (use_type
, NULL
);
1177 add_referenced_var (tmp
);
1178 new_oprnd
= make_ssa_name (tmp
, NULL
);
1179 pattern_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
1181 SSA_NAME_DEF_STMT (new_oprnd
) = pattern_stmt
;
1182 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt
)) = pattern_stmt
;
1184 *type_in
= get_vectype_for_scalar_type (new_type
);
1185 *type_out
= get_vectype_for_scalar_type (use_type
);
1187 /* We created a pattern statement for the last statement in the
1188 sequence, so we don't need to associate it with the pattern
1189 statement created for PREV_STMT. Therefore, we add PREV_STMT
1190 to the list in order to mark it later in vect_pattern_recog_1. */
1192 VEC_safe_push (gimple
, heap
, *stmts
, prev_stmt
);
1197 STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (use_stmt
))
1198 = STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (prev_stmt
));
1201 *type_out
= NULL_TREE
;
1204 VEC_safe_push (gimple
, heap
, *stmts
, use_stmt
);
1207 /* TODO: support general case, create a conversion to the correct type. */
1210 /* Pattern detected. */
1211 if (vect_print_dump_info (REPORT_DETAILS
))
1213 fprintf (vect_dump
, "vect_recog_over_widening_pattern: detected: ");
1214 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
1217 return pattern_stmt
;
1221 /* Mark statements that are involved in a pattern. */
1224 vect_mark_pattern_stmts (gimple orig_stmt
, gimple pattern_stmt
,
1225 tree pattern_vectype
)
1227 stmt_vec_info pattern_stmt_info
, def_stmt_info
;
1228 stmt_vec_info orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
1229 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (orig_stmt_info
);
1232 set_vinfo_for_stmt (pattern_stmt
,
1233 new_stmt_vec_info (pattern_stmt
, loop_vinfo
, NULL
));
1234 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt
));
1235 pattern_stmt_info
= vinfo_for_stmt (pattern_stmt
);
1237 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt
;
1238 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
1239 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
1240 STMT_VINFO_VECTYPE (pattern_stmt_info
) = pattern_vectype
;
1241 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
1242 STMT_VINFO_RELATED_STMT (orig_stmt_info
) = pattern_stmt
;
1243 STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info
)
1244 = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info
);
1245 if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info
))
1247 def_stmt
= STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info
);
1248 set_vinfo_for_stmt (def_stmt
,
1249 new_stmt_vec_info (def_stmt
, loop_vinfo
, NULL
));
1250 gimple_set_bb (def_stmt
, gimple_bb (orig_stmt
));
1251 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1252 STMT_VINFO_RELATED_STMT (def_stmt_info
) = orig_stmt
;
1253 STMT_VINFO_DEF_TYPE (def_stmt_info
)
1254 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
1255 STMT_VINFO_VECTYPE (def_stmt_info
) = pattern_vectype
;
1259 /* Function vect_pattern_recog_1
1262 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
1263 computation pattern.
1264 STMT: A stmt from which the pattern search should start.
1266 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
1267 expression that computes the same functionality and can be used to
1268 replace the sequence of stmts that are involved in the pattern.
1271 This function checks if the expression returned by PATTERN_RECOG_FUNC is
1272 supported in vector form by the target. We use 'TYPE_IN' to obtain the
1273 relevant vector type. If 'TYPE_IN' is already a vector type, then this
1274 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
1275 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
1276 to the available target pattern.
1278 This function also does some bookkeeping, as explained in the documentation
1279 for vect_recog_pattern. */
1282 vect_pattern_recog_1 (
1283 gimple (* vect_recog_func
) (VEC (gimple
, heap
) **, tree
*, tree
*),
1284 gimple_stmt_iterator si
)
1286 gimple stmt
= gsi_stmt (si
), pattern_stmt
;
1287 stmt_vec_info stmt_info
;
1288 loop_vec_info loop_vinfo
;
1289 tree pattern_vectype
;
1290 tree type_in
, type_out
;
1291 enum tree_code code
;
1294 VEC (gimple
, heap
) *stmts_to_replace
= VEC_alloc (gimple
, heap
, 1);
1296 VEC_quick_push (gimple
, stmts_to_replace
, stmt
);
1297 pattern_stmt
= (* vect_recog_func
) (&stmts_to_replace
, &type_in
, &type_out
);
1301 stmt
= VEC_last (gimple
, stmts_to_replace
);
1302 stmt_info
= vinfo_for_stmt (stmt
);
1303 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1305 if (VECTOR_MODE_P (TYPE_MODE (type_in
)))
1307 /* No need to check target support (already checked by the pattern
1308 recognition function). */
1310 gcc_assert (VECTOR_MODE_P (TYPE_MODE (type_out
)));
1311 pattern_vectype
= type_out
? type_out
: type_in
;
1315 enum machine_mode vec_mode
;
1316 enum insn_code icode
;
1319 /* Check target support */
1320 type_in
= get_vectype_for_scalar_type (type_in
);
1324 type_out
= get_vectype_for_scalar_type (type_out
);
1329 pattern_vectype
= type_out
;
1331 if (is_gimple_assign (pattern_stmt
))
1332 code
= gimple_assign_rhs_code (pattern_stmt
);
1335 gcc_assert (is_gimple_call (pattern_stmt
));
1339 optab
= optab_for_tree_code (code
, type_in
, optab_default
);
1340 vec_mode
= TYPE_MODE (type_in
);
1342 || (icode
= optab_handler (optab
, vec_mode
)) == CODE_FOR_nothing
1343 || (insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (type_out
)))
1347 /* Found a vectorizable pattern. */
1348 if (vect_print_dump_info (REPORT_DETAILS
))
1350 fprintf (vect_dump
, "pattern recognized: ");
1351 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
1354 /* Mark the stmts that are involved in the pattern. */
1355 vect_mark_pattern_stmts (stmt
, pattern_stmt
, pattern_vectype
);
1357 /* Patterns cannot be vectorized using SLP, because they change the order of
1359 FOR_EACH_VEC_ELT (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
, next
)
1361 VEC_ordered_remove (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
);
1363 /* It is possible that additional pattern stmts are created and inserted in
1364 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
1365 relevant statements. */
1366 for (i
= 0; VEC_iterate (gimple
, stmts_to_replace
, i
, stmt
)
1367 && (unsigned) i
< (VEC_length (gimple
, stmts_to_replace
) - 1);
1370 stmt_info
= vinfo_for_stmt (stmt
);
1371 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
1372 if (vect_print_dump_info (REPORT_DETAILS
))
1374 fprintf (vect_dump
, "additional pattern stmt: ");
1375 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
1378 vect_mark_pattern_stmts (stmt
, pattern_stmt
, NULL_TREE
);
1381 VEC_free (gimple
, heap
, stmts_to_replace
);
1385 /* Function vect_pattern_recog
1388 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
1391 Output - for each computation idiom that is detected we create a new stmt
1392 that provides the same functionality and that can be vectorized. We
1393 also record some information in the struct_stmt_info of the relevant
1394 stmts, as explained below:
1396 At the entry to this function we have the following stmts, with the
1397 following initial value in the STMT_VINFO fields:
1399 stmt in_pattern_p related_stmt vec_stmt
1400 S1: a_i = .... - - -
1401 S2: a_2 = ..use(a_i).. - - -
1402 S3: a_1 = ..use(a_2).. - - -
1403 S4: a_0 = ..use(a_1).. - - -
1404 S5: ... = ..use(a_0).. - - -
1406 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
1407 represented by a single stmt. We then:
1408 - create a new stmt S6 equivalent to the pattern (the stmt is not
1409 inserted into the code)
1410 - fill in the STMT_VINFO fields as follows:
1412 in_pattern_p related_stmt vec_stmt
1413 S1: a_i = .... - - -
1414 S2: a_2 = ..use(a_i).. - - -
1415 S3: a_1 = ..use(a_2).. - - -
1416 S4: a_0 = ..use(a_1).. true S6 -
1417 '---> S6: a_new = .... - S4 -
1418 S5: ... = ..use(a_0).. - - -
1420 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
1421 to each other through the RELATED_STMT field).
1423 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
1424 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
1425 remain irrelevant unless used by stmts other than S4.
1427 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
1428 (because they are marked as irrelevant). It will vectorize S6, and record
1429 a pointer to the new vector stmt VS6 from S6 (as usual).
1430 S4 will be skipped, and S5 will be vectorized as usual:
1432 in_pattern_p related_stmt vec_stmt
1433 S1: a_i = .... - - -
1434 S2: a_2 = ..use(a_i).. - - -
1435 S3: a_1 = ..use(a_2).. - - -
1436 > VS6: va_new = .... - - -
1437 S4: a_0 = ..use(a_1).. true S6 VS6
1438 '---> S6: a_new = .... - S4 VS6
1439 > VS5: ... = ..vuse(va_new).. - - -
1440 S5: ... = ..use(a_0).. - - -
1442 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
1443 elsewhere), and we'll end up with:
1446 VS5: ... = ..vuse(va_new)..
1448 In case of more than one pattern statements, e.g., widen-mult with
1452 S2 a_T = (TYPE) a_t;
1453 '--> S3: a_it = (interm_type) a_t;
1454 S4 prod_T = a_T * CONST;
1455 '--> S5: prod_T' = a_it w* CONST;
1457 there may be other users of a_T outside the pattern. In that case S2 will
1458 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
1459 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
1460 be recorded in S3. */
1463 vect_pattern_recog (loop_vec_info loop_vinfo
)
1465 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1466 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
1467 unsigned int nbbs
= loop
->num_nodes
;
1468 gimple_stmt_iterator si
;
1470 gimple (* vect_recog_func_ptr
) (VEC (gimple
, heap
) **, tree
*, tree
*);
1472 if (vect_print_dump_info (REPORT_DETAILS
))
1473 fprintf (vect_dump
, "=== vect_pattern_recog ===");
1475 /* Scan through the loop stmts, applying the pattern recognition
1476 functions starting at each stmt visited: */
1477 for (i
= 0; i
< nbbs
; i
++)
1479 basic_block bb
= bbs
[i
];
1480 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
1482 /* Scan over all generic vect_recog_xxx_pattern functions. */
1483 for (j
= 0; j
< NUM_PATTERNS
; j
++)
1485 vect_recog_func_ptr
= vect_vect_recog_func_ptrs
[j
];
1486 vect_pattern_recog_1 (vect_recog_func_ptr
, si
);