1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Nuzman <dorit@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
37 #include "tree-data-ref.h"
38 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
42 /* Pattern recognition functions */
43 static gimple
vect_recog_widen_sum_pattern (VEC (gimple
, heap
) **, tree
*,
45 static gimple
vect_recog_widen_mult_pattern (VEC (gimple
, heap
) **, tree
*,
47 static gimple
vect_recog_dot_prod_pattern (VEC (gimple
, heap
) **, tree
*,
49 static gimple
vect_recog_pow_pattern (VEC (gimple
, heap
) **, tree
*, tree
*);
50 static gimple
vect_recog_over_widening_pattern (VEC (gimple
, heap
) **, tree
*,
52 static gimple
vect_recog_widen_shift_pattern (VEC (gimple
, heap
) **,
54 static gimple
vect_recog_vector_vector_shift_pattern (VEC (gimple
, heap
) **,
56 static gimple
vect_recog_mixed_size_cond_pattern (VEC (gimple
, heap
) **,
58 static gimple
vect_recog_bool_pattern (VEC (gimple
, heap
) **, tree
*, tree
*);
59 static vect_recog_func_ptr vect_vect_recog_func_ptrs
[NUM_PATTERNS
] = {
60 vect_recog_widen_mult_pattern
,
61 vect_recog_widen_sum_pattern
,
62 vect_recog_dot_prod_pattern
,
63 vect_recog_pow_pattern
,
64 vect_recog_over_widening_pattern
,
65 vect_recog_widen_shift_pattern
,
66 vect_recog_vector_vector_shift_pattern
,
67 vect_recog_mixed_size_cond_pattern
,
68 vect_recog_bool_pattern
};
70 /* Function widened_name_p
72 Check whether NAME, an ssa-name used in USE_STMT,
73 is a result of a type-promotion, such that:
74 DEF_STMT: NAME = NOP (name0)
75 where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
76 If CHECK_SIGN is TRUE, check that either both types are signed or both are
80 widened_name_p (tree name
, gimple use_stmt
, tree
*half_type
, gimple
*def_stmt
,
85 loop_vec_info loop_vinfo
;
86 stmt_vec_info stmt_vinfo
;
87 tree type
= TREE_TYPE (name
);
89 enum vect_def_type dt
;
92 stmt_vinfo
= vinfo_for_stmt (use_stmt
);
93 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
95 if (!vect_is_simple_use (name
, loop_vinfo
, NULL
, def_stmt
, &def
, &dt
))
98 if (dt
!= vect_internal_def
99 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
105 if (!is_gimple_assign (*def_stmt
))
108 if (gimple_assign_rhs_code (*def_stmt
) != NOP_EXPR
)
111 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
113 *half_type
= TREE_TYPE (oprnd0
);
114 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*half_type
)
115 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*half_type
)) && check_sign
)
116 || (TYPE_PRECISION (type
) < (TYPE_PRECISION (*half_type
) * 2)))
119 if (!vect_is_simple_use (oprnd0
, loop_vinfo
, NULL
, &dummy_gimple
, &dummy
,
126 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
127 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
130 vect_recog_temp_ssa_var (tree type
, gimple stmt
)
132 tree var
= create_tmp_var (type
, "patt");
134 add_referenced_var (var
);
135 var
= make_ssa_name (var
, stmt
);
139 /* Function vect_recog_dot_prod_pattern
141 Try to find the following pattern:
147 sum_0 = phi <init, sum_1>
150 S3 x_T = (TYPE1) x_t;
151 S4 y_T = (TYPE1) y_t;
153 [S6 prod = (TYPE2) prod; #optional]
154 S7 sum_1 = prod + sum_0;
156 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
157 same size of 'TYPE1' or bigger. This is a special case of a reduction
162 * STMTS: Contains a stmt from which the pattern search begins. In the
163 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
168 * TYPE_IN: The type of the input arguments to the pattern.
170 * TYPE_OUT: The type of the output of this pattern.
172 * Return value: A new stmt that will be used to replace the sequence of
173 stmts that constitute the pattern. In this case it will be:
174 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
176 Note: The dot-prod idiom is a widening reduction pattern that is
177 vectorized without preserving all the intermediate results. It
178 produces only N/2 (widened) results (by summing up pairs of
179 intermediate results) rather than all N results. Therefore, we
180 cannot allow this pattern when we want to get all the results and in
181 the correct order (as is the case when this computation is in an
182 inner-loop nested in an outer-loop that us being vectorized). */
185 vect_recog_dot_prod_pattern (VEC (gimple
, heap
) **stmts
, tree
*type_in
,
188 gimple stmt
, last_stmt
= VEC_index (gimple
, *stmts
, 0);
190 tree oprnd00
, oprnd01
;
191 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
192 tree type
, half_type
;
195 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
196 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
199 if (!is_gimple_assign (last_stmt
))
202 type
= gimple_expr_type (last_stmt
);
204 /* Look for the following pattern
208 DDPROD = (TYPE2) DPROD;
209 sum_1 = DDPROD + sum_0;
211 - DX is double the size of X
212 - DY is double the size of Y
213 - DX, DY, DPROD all have the same type
214 - sum is the same size of DPROD or bigger
215 - sum has been recognized as a reduction variable.
217 This is equivalent to:
218 DPROD = X w* Y; #widen mult
219 sum_1 = DPROD w+ sum_0; #widen summation
221 DPROD = X w* Y; #widen mult
222 sum_1 = DPROD + sum_0; #summation
225 /* Starting from LAST_STMT, follow the defs of its uses in search
226 of the above pattern. */
228 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
231 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
233 /* Has been detected as widening-summation? */
235 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
236 type
= gimple_expr_type (stmt
);
237 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
239 oprnd0
= gimple_assign_rhs1 (stmt
);
240 oprnd1
= gimple_assign_rhs2 (stmt
);
241 half_type
= TREE_TYPE (oprnd0
);
247 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
)
249 oprnd0
= gimple_assign_rhs1 (last_stmt
);
250 oprnd1
= gimple_assign_rhs2 (last_stmt
);
251 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
252 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
256 if (widened_name_p (oprnd0
, stmt
, &half_type
, &def_stmt
, true))
259 oprnd0
= gimple_assign_rhs1 (stmt
);
265 /* So far so good. Since last_stmt was detected as a (summation) reduction,
266 we know that oprnd1 is the reduction variable (defined by a loop-header
267 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
268 Left to check that oprnd0 is defined by a (widen_)mult_expr */
269 if (TREE_CODE (oprnd0
) != SSA_NAME
)
272 prod_type
= half_type
;
273 stmt
= SSA_NAME_DEF_STMT (oprnd0
);
275 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
276 if (!gimple_bb (stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
279 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
280 inside the loop (in case we are analyzing an outer-loop). */
281 if (!is_gimple_assign (stmt
))
283 stmt_vinfo
= vinfo_for_stmt (stmt
);
284 gcc_assert (stmt_vinfo
);
285 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
287 if (gimple_assign_rhs_code (stmt
) != MULT_EXPR
)
289 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
291 /* Has been detected as a widening multiplication? */
293 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
294 if (gimple_assign_rhs_code (stmt
) != WIDEN_MULT_EXPR
)
296 stmt_vinfo
= vinfo_for_stmt (stmt
);
297 gcc_assert (stmt_vinfo
);
298 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_internal_def
);
299 oprnd00
= gimple_assign_rhs1 (stmt
);
300 oprnd01
= gimple_assign_rhs2 (stmt
);
304 tree half_type0
, half_type1
;
308 oprnd0
= gimple_assign_rhs1 (stmt
);
309 oprnd1
= gimple_assign_rhs2 (stmt
);
310 if (!types_compatible_p (TREE_TYPE (oprnd0
), prod_type
)
311 || !types_compatible_p (TREE_TYPE (oprnd1
), prod_type
))
313 if (!widened_name_p (oprnd0
, stmt
, &half_type0
, &def_stmt
, true))
315 oprnd00
= gimple_assign_rhs1 (def_stmt
);
316 if (!widened_name_p (oprnd1
, stmt
, &half_type1
, &def_stmt
, true))
318 oprnd01
= gimple_assign_rhs1 (def_stmt
);
319 if (!types_compatible_p (half_type0
, half_type1
))
321 if (TYPE_PRECISION (prod_type
) != TYPE_PRECISION (half_type0
) * 2)
325 half_type
= TREE_TYPE (oprnd00
);
326 *type_in
= half_type
;
329 /* Pattern detected. Create a stmt to be used to replace the pattern: */
330 var
= vect_recog_temp_ssa_var (type
, NULL
);
331 pattern_stmt
= gimple_build_assign_with_ops3 (DOT_PROD_EXPR
, var
,
332 oprnd00
, oprnd01
, oprnd1
);
334 if (vect_print_dump_info (REPORT_DETAILS
))
336 fprintf (vect_dump
, "vect_recog_dot_prod_pattern: detected: ");
337 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
340 /* We don't allow changing the order of the computation in the inner-loop
341 when doing outer-loop vectorization. */
342 gcc_assert (!nested_in_vect_loop_p (loop
, last_stmt
));
348 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
351 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
352 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
354 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
355 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
356 that satisfies the above restrictions, we can perform a widening opeartion
357 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
358 with a_it = (interm_type) a_t; */
361 vect_handle_widen_op_by_const (gimple stmt
, enum tree_code code
,
362 tree const_oprnd
, tree
*oprnd
,
363 VEC (gimple
, heap
) **stmts
, tree type
,
364 tree
*half_type
, gimple def_stmt
)
366 tree new_type
, new_oprnd
, tmp
;
368 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt
));
369 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
371 if (code
!= MULT_EXPR
&& code
!= LSHIFT_EXPR
)
374 if (((code
== MULT_EXPR
&& int_fits_type_p (const_oprnd
, *half_type
))
375 || (code
== LSHIFT_EXPR
376 && compare_tree_int (const_oprnd
, TYPE_PRECISION (*half_type
))
378 && TYPE_PRECISION (type
) == (TYPE_PRECISION (*half_type
) * 2))
380 /* CONST_OPRND is a constant of HALF_TYPE. */
381 *oprnd
= gimple_assign_rhs1 (def_stmt
);
385 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (*half_type
) * 4)
386 || !gimple_bb (def_stmt
)
387 || !flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
))
388 || !vinfo_for_stmt (def_stmt
))
391 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
392 a type 2 times bigger than HALF_TYPE. */
393 new_type
= build_nonstandard_integer_type (TYPE_PRECISION (type
) / 2,
394 TYPE_UNSIGNED (type
));
395 if ((code
== MULT_EXPR
&& !int_fits_type_p (const_oprnd
, new_type
))
396 || (code
== LSHIFT_EXPR
397 && compare_tree_int (const_oprnd
, TYPE_PRECISION (new_type
)) == 1))
400 /* Use NEW_TYPE for widening operation. */
401 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
403 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
404 /* Check if the already created pattern stmt is what we need. */
405 if (!is_gimple_assign (new_stmt
)
406 || gimple_assign_rhs_code (new_stmt
) != NOP_EXPR
407 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != new_type
)
410 VEC_safe_push (gimple
, heap
, *stmts
, def_stmt
);
411 *oprnd
= gimple_assign_lhs (new_stmt
);
415 /* Create a_T = (NEW_TYPE) a_t; */
416 *oprnd
= gimple_assign_rhs1 (def_stmt
);
417 tmp
= create_tmp_var (new_type
, NULL
);
418 add_referenced_var (tmp
);
419 new_oprnd
= make_ssa_name (tmp
, NULL
);
420 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
, *oprnd
,
422 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
423 VEC_safe_push (gimple
, heap
, *stmts
, def_stmt
);
427 *half_type
= new_type
;
432 /* Function vect_recog_widen_mult_pattern
434 Try to find the following pattern:
437 TYPE a_T, b_T, prod_T;
443 S5 prod_T = a_T * b_T;
445 where type 'TYPE' is at least double the size of type 'type'.
447 Also detect unsgigned cases:
449 unsigned type a_t, b_t;
450 unsigned TYPE u_prod_T;
451 TYPE a_T, b_T, prod_T;
457 S5 prod_T = a_T * b_T;
458 S6 u_prod_T = (unsigned TYPE) prod_T;
460 and multiplication by constants:
467 S5 prod_T = a_T * CONST;
469 A special case of multiplication by constants is when 'TYPE' is 4 times
470 bigger than 'type', but CONST fits an intermediate type 2 times smaller
471 than 'TYPE'. In that case we create an additional pattern stmt for S3
472 to create a variable of the intermediate type, and perform widen-mult
473 on the intermediate type as well:
477 TYPE a_T, prod_T, prod_T';
481 '--> a_it = (interm_type) a_t;
482 S5 prod_T = a_T * CONST;
483 '--> prod_T' = a_it w* CONST;
487 * STMTS: Contains a stmt from which the pattern search begins. In the
488 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
489 is detected. In case of unsigned widen-mult, the original stmt (S5) is
490 replaced with S6 in STMTS. In case of multiplication by a constant
491 of an intermediate type (the last case above), STMTS also contains S3
492 (inserted before S5).
496 * TYPE_IN: The type of the input arguments to the pattern.
498 * TYPE_OUT: The type of the output of this pattern.
500 * Return value: A new stmt that will be used to replace the sequence of
501 stmts that constitute the pattern. In this case it will be:
502 WIDEN_MULT <a_t, b_t>
506 vect_recog_widen_mult_pattern (VEC (gimple
, heap
) **stmts
,
507 tree
*type_in
, tree
*type_out
)
509 gimple last_stmt
= VEC_pop (gimple
, *stmts
);
510 gimple def_stmt0
, def_stmt1
;
512 tree type
, half_type0
, half_type1
;
514 tree vectype
, vectype_out
= NULL_TREE
;
517 enum tree_code dummy_code
;
519 VEC (tree
, heap
) *dummy_vec
;
522 if (!is_gimple_assign (last_stmt
))
525 type
= gimple_expr_type (last_stmt
);
527 /* Starting from LAST_STMT, follow the defs of its uses in search
528 of the above pattern. */
530 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
533 oprnd0
= gimple_assign_rhs1 (last_stmt
);
534 oprnd1
= gimple_assign_rhs2 (last_stmt
);
535 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
536 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
539 /* Check argument 0. */
540 if (!widened_name_p (oprnd0
, last_stmt
, &half_type0
, &def_stmt0
, false))
542 /* Check argument 1. */
543 op1_ok
= widened_name_p (oprnd1
, last_stmt
, &half_type1
, &def_stmt1
, false);
547 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
548 oprnd1
= gimple_assign_rhs1 (def_stmt1
);
552 if (TREE_CODE (oprnd1
) == INTEGER_CST
553 && TREE_CODE (half_type0
) == INTEGER_TYPE
554 && vect_handle_widen_op_by_const (last_stmt
, MULT_EXPR
, oprnd1
,
555 &oprnd0
, stmts
, type
,
556 &half_type0
, def_stmt0
))
557 half_type1
= half_type0
;
562 /* Handle unsigned case. Look for
563 S6 u_prod_T = (unsigned TYPE) prod_T;
564 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
565 if (TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type0
))
567 tree lhs
= gimple_assign_lhs (last_stmt
), use_lhs
;
568 imm_use_iterator imm_iter
;
571 gimple use_stmt
= NULL
;
574 if (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (half_type1
))
577 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
579 if (is_gimple_debug (USE_STMT (use_p
)))
581 use_stmt
= USE_STMT (use_p
);
585 if (nuses
!= 1 || !is_gimple_assign (use_stmt
)
586 || gimple_assign_rhs_code (use_stmt
) != NOP_EXPR
)
589 use_lhs
= gimple_assign_lhs (use_stmt
);
590 use_type
= TREE_TYPE (use_lhs
);
591 if (!INTEGRAL_TYPE_P (use_type
)
592 || (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (use_type
))
593 || (TYPE_PRECISION (type
) != TYPE_PRECISION (use_type
)))
597 last_stmt
= use_stmt
;
600 if (!types_compatible_p (half_type0
, half_type1
))
603 /* Pattern detected. */
604 if (vect_print_dump_info (REPORT_DETAILS
))
605 fprintf (vect_dump
, "vect_recog_widen_mult_pattern: detected: ");
607 /* Check target support */
608 vectype
= get_vectype_for_scalar_type (half_type0
);
609 vectype_out
= get_vectype_for_scalar_type (type
);
612 || !supportable_widening_operation (WIDEN_MULT_EXPR
, last_stmt
,
613 vectype_out
, vectype
,
614 &dummy
, &dummy
, &dummy_code
,
615 &dummy_code
, &dummy_int
, &dummy_vec
))
619 *type_out
= vectype_out
;
621 /* Pattern supported. Create a stmt to be used to replace the pattern: */
622 var
= vect_recog_temp_ssa_var (type
, NULL
);
623 pattern_stmt
= gimple_build_assign_with_ops (WIDEN_MULT_EXPR
, var
, oprnd0
,
626 if (vect_print_dump_info (REPORT_DETAILS
))
627 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
629 VEC_safe_push (gimple
, heap
, *stmts
, last_stmt
);
634 /* Function vect_recog_pow_pattern
636 Try to find the following pattern:
640 with POW being one of pow, powf, powi, powif and N being
645 * LAST_STMT: A stmt from which the pattern search begins.
649 * TYPE_IN: The type of the input arguments to the pattern.
651 * TYPE_OUT: The type of the output of this pattern.
653 * Return value: A new stmt that will be used to replace the sequence of
654 stmts that constitute the pattern. In this case it will be:
661 vect_recog_pow_pattern (VEC (gimple
, heap
) **stmts
, tree
*type_in
,
664 gimple last_stmt
= VEC_index (gimple
, *stmts
, 0);
665 tree fn
, base
, exp
= NULL
;
669 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
672 fn
= gimple_call_fndecl (last_stmt
);
673 if (fn
== NULL_TREE
|| DECL_BUILT_IN_CLASS (fn
) != BUILT_IN_NORMAL
)
676 switch (DECL_FUNCTION_CODE (fn
))
682 base
= gimple_call_arg (last_stmt
, 0);
683 exp
= gimple_call_arg (last_stmt
, 1);
684 if (TREE_CODE (exp
) != REAL_CST
685 && TREE_CODE (exp
) != INTEGER_CST
)
693 /* We now have a pow or powi builtin function call with a constant
696 *type_out
= NULL_TREE
;
698 /* Catch squaring. */
699 if ((host_integerp (exp
, 0)
700 && tree_low_cst (exp
, 0) == 2)
701 || (TREE_CODE (exp
) == REAL_CST
702 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconst2
)))
704 *type_in
= TREE_TYPE (base
);
706 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
707 stmt
= gimple_build_assign_with_ops (MULT_EXPR
, var
, base
, base
);
711 /* Catch square root. */
712 if (TREE_CODE (exp
) == REAL_CST
713 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconsthalf
))
715 tree newfn
= mathfn_built_in (TREE_TYPE (base
), BUILT_IN_SQRT
);
716 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (base
));
719 gimple stmt
= gimple_build_call (newfn
, 1, base
);
720 if (vectorizable_function (stmt
, *type_in
, *type_in
)
723 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
724 gimple_call_set_lhs (stmt
, var
);
734 /* Function vect_recog_widen_sum_pattern
736 Try to find the following pattern:
739 TYPE x_T, sum = init;
741 sum_0 = phi <init, sum_1>
744 S3 sum_1 = x_T + sum_0;
746 where type 'TYPE' is at least double the size of type 'type', i.e - we're
747 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
748 a special case of a reduction computation.
752 * LAST_STMT: A stmt from which the pattern search begins. In the example,
753 when this function is called with S3, the pattern {S2,S3} will be detected.
757 * TYPE_IN: The type of the input arguments to the pattern.
759 * TYPE_OUT: The type of the output of this pattern.
761 * Return value: A new stmt that will be used to replace the sequence of
762 stmts that constitute the pattern. In this case it will be:
763 WIDEN_SUM <x_t, sum_0>
765 Note: The widening-sum idiom is a widening reduction pattern that is
766 vectorized without preserving all the intermediate results. It
767 produces only N/2 (widened) results (by summing up pairs of
768 intermediate results) rather than all N results. Therefore, we
769 cannot allow this pattern when we want to get all the results and in
770 the correct order (as is the case when this computation is in an
771 inner-loop nested in an outer-loop that us being vectorized). */
774 vect_recog_widen_sum_pattern (VEC (gimple
, heap
) **stmts
, tree
*type_in
,
777 gimple stmt
, last_stmt
= VEC_index (gimple
, *stmts
, 0);
779 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
780 tree type
, half_type
;
782 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
783 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
786 if (!is_gimple_assign (last_stmt
))
789 type
= gimple_expr_type (last_stmt
);
791 /* Look for the following pattern
794 In which DX is at least double the size of X, and sum_1 has been
795 recognized as a reduction variable.
798 /* Starting from LAST_STMT, follow the defs of its uses in search
799 of the above pattern. */
801 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
804 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
)
807 oprnd0
= gimple_assign_rhs1 (last_stmt
);
808 oprnd1
= gimple_assign_rhs2 (last_stmt
);
809 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
810 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
813 /* So far so good. Since last_stmt was detected as a (summation) reduction,
814 we know that oprnd1 is the reduction variable (defined by a loop-header
815 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
816 Left to check that oprnd0 is defined by a cast from type 'type' to type
819 if (!widened_name_p (oprnd0
, last_stmt
, &half_type
, &stmt
, true))
822 oprnd0
= gimple_assign_rhs1 (stmt
);
823 *type_in
= half_type
;
826 /* Pattern detected. Create a stmt to be used to replace the pattern: */
827 var
= vect_recog_temp_ssa_var (type
, NULL
);
828 pattern_stmt
= gimple_build_assign_with_ops (WIDEN_SUM_EXPR
, var
,
831 if (vect_print_dump_info (REPORT_DETAILS
))
833 fprintf (vect_dump
, "vect_recog_widen_sum_pattern: detected: ");
834 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
837 /* We don't allow changing the order of the computation in the inner-loop
838 when doing outer-loop vectorization. */
839 gcc_assert (!nested_in_vect_loop_p (loop
, last_stmt
));
845 /* Return TRUE if the operation in STMT can be performed on a smaller type.
848 STMT - a statement to check.
849 DEF - we support operations with two operands, one of which is constant.
850 The other operand can be defined by a demotion operation, or by a
851 previous statement in a sequence of over-promoted operations. In the
852 later case DEF is used to replace that operand. (It is defined by a
853 pattern statement we created for the previous statement in the
857 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
858 NULL, it's the type of DEF.
859 STMTS - additional pattern statements. If a pattern statement (type
860 conversion) is created in this function, its original statement is
864 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
865 operands to use in the new pattern statement for STMT (will be created
866 in vect_recog_over_widening_pattern ()).
867 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
868 statements for STMT: the first one is a type promotion and the second
869 one is the operation itself. We return the type promotion statement
870 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_STMT of
871 the second pattern statement. */
874 vect_operation_fits_smaller_type (gimple stmt
, tree def
, tree
*new_type
,
875 tree
*op0
, tree
*op1
, gimple
*new_def_stmt
,
876 VEC (gimple
, heap
) **stmts
)
879 tree const_oprnd
, oprnd
;
880 tree interm_type
= NULL_TREE
, half_type
, tmp
, new_oprnd
, type
;
881 gimple def_stmt
, new_stmt
;
883 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt
));
884 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
886 *new_def_stmt
= NULL
;
888 if (!is_gimple_assign (stmt
))
891 code
= gimple_assign_rhs_code (stmt
);
892 if (code
!= LSHIFT_EXPR
&& code
!= RSHIFT_EXPR
893 && code
!= BIT_IOR_EXPR
&& code
!= BIT_XOR_EXPR
&& code
!= BIT_AND_EXPR
)
896 oprnd
= gimple_assign_rhs1 (stmt
);
897 const_oprnd
= gimple_assign_rhs2 (stmt
);
898 type
= gimple_expr_type (stmt
);
900 if (TREE_CODE (oprnd
) != SSA_NAME
901 || TREE_CODE (const_oprnd
) != INTEGER_CST
)
904 /* If we are in the middle of a sequence, we use DEF from a previous
905 statement. Otherwise, OPRND has to be a result of type promotion. */
908 half_type
= *new_type
;
914 if (!widened_name_p (oprnd
, stmt
, &half_type
, &def_stmt
, false)
915 || !gimple_bb (def_stmt
)
916 || !flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
))
917 || !vinfo_for_stmt (def_stmt
))
921 /* Can we perform the operation on a smaller type? */
927 if (!int_fits_type_p (const_oprnd
, half_type
))
929 /* HALF_TYPE is not enough. Try a bigger type if possible. */
930 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
933 interm_type
= build_nonstandard_integer_type (
934 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
935 if (!int_fits_type_p (const_oprnd
, interm_type
))
942 /* Try intermediate type - HALF_TYPE is not enough for sure. */
943 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
946 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
947 (e.g., if the original value was char, the shift amount is at most 8
948 if we want to use short). */
949 if (compare_tree_int (const_oprnd
, TYPE_PRECISION (half_type
)) == 1)
952 interm_type
= build_nonstandard_integer_type (
953 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
955 if (!vect_supportable_shift (code
, interm_type
))
961 if (vect_supportable_shift (code
, half_type
))
964 /* Try intermediate type - HALF_TYPE is not supported. */
965 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
968 interm_type
= build_nonstandard_integer_type (
969 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
971 if (!vect_supportable_shift (code
, interm_type
))
980 /* There are four possible cases:
981 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
982 the first statement in the sequence)
983 a. The original, HALF_TYPE, is not enough - we replace the promotion
984 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
985 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
987 2. OPRND is defined by a pattern statement we created.
988 a. Its type is not sufficient for the operation, we create a new stmt:
989 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
990 this statement in NEW_DEF_STMT, and it is later put in
991 STMT_VINFO_PATTERN_DEF_STMT of the pattern statement for STMT.
992 b. OPRND is good to use in the new statement. */
997 /* Replace the original type conversion HALF_TYPE->TYPE with
998 HALF_TYPE->INTERM_TYPE. */
999 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
1001 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
1002 /* Check if the already created pattern stmt is what we need. */
1003 if (!is_gimple_assign (new_stmt
)
1004 || gimple_assign_rhs_code (new_stmt
) != NOP_EXPR
1005 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != interm_type
)
1008 VEC_safe_push (gimple
, heap
, *stmts
, def_stmt
);
1009 oprnd
= gimple_assign_lhs (new_stmt
);
1013 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1014 oprnd
= gimple_assign_rhs1 (def_stmt
);
1015 tmp
= create_tmp_reg (interm_type
, NULL
);
1016 add_referenced_var (tmp
);
1017 new_oprnd
= make_ssa_name (tmp
, NULL
);
1018 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
1020 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
1021 VEC_safe_push (gimple
, heap
, *stmts
, def_stmt
);
1027 /* Retrieve the operand before the type promotion. */
1028 oprnd
= gimple_assign_rhs1 (def_stmt
);
1035 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1036 tmp
= create_tmp_reg (interm_type
, NULL
);
1037 add_referenced_var (tmp
);
1038 new_oprnd
= make_ssa_name (tmp
, NULL
);
1039 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
1042 *new_def_stmt
= new_stmt
;
1045 /* Otherwise, OPRND is already set. */
1049 *new_type
= interm_type
;
1051 *new_type
= half_type
;
1054 *op1
= fold_convert (*new_type
, const_oprnd
);
1060 /* Try to find a statement or a sequence of statements that can be performed
1064 TYPE x_T, res0_T, res1_T;
1067 S2 x_T = (TYPE) x_t;
1068 S3 res0_T = op (x_T, C0);
1069 S4 res1_T = op (res0_T, C1);
1070 S5 ... = () res1_T; - type demotion
1072 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1074 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1075 be 'type' or some intermediate type. For now, we expect S5 to be a type
1076 demotion operation. We also check that S3 and S4 have only one use. */
1079 vect_recog_over_widening_pattern (VEC (gimple
, heap
) **stmts
,
1080 tree
*type_in
, tree
*type_out
)
1082 gimple stmt
= VEC_pop (gimple
, *stmts
);
1083 gimple pattern_stmt
= NULL
, new_def_stmt
, prev_stmt
= NULL
, use_stmt
= NULL
;
1084 tree op0
, op1
, vectype
= NULL_TREE
, lhs
, use_lhs
, use_type
;
1085 imm_use_iterator imm_iter
;
1086 use_operand_p use_p
;
1088 tree var
= NULL_TREE
, new_type
= NULL_TREE
, tmp
, new_oprnd
;
1090 struct loop
*loop
= (gimple_bb (stmt
))->loop_father
;
1096 if (!vinfo_for_stmt (stmt
)
1097 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt
)))
1100 new_def_stmt
= NULL
;
1101 if (!vect_operation_fits_smaller_type (stmt
, var
, &new_type
,
1102 &op0
, &op1
, &new_def_stmt
,
1111 /* STMT can be performed on a smaller type. Check its uses. */
1112 lhs
= gimple_assign_lhs (stmt
);
1114 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
1116 if (is_gimple_debug (USE_STMT (use_p
)))
1118 use_stmt
= USE_STMT (use_p
);
1122 if (nuses
!= 1 || !is_gimple_assign (use_stmt
)
1123 || !gimple_bb (use_stmt
)
1124 || !flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
1127 /* Create pattern statement for STMT. */
1128 vectype
= get_vectype_for_scalar_type (new_type
);
1132 /* We want to collect all the statements for which we create pattern
1133 statetments, except for the case when the last statement in the
1134 sequence doesn't have a corresponding pattern statement. In such
1135 case we associate the last pattern statement with the last statement
1136 in the sequence. Therefore, we only add the original statement to
1137 the list if we know that it is not the last. */
1139 VEC_safe_push (gimple
, heap
, *stmts
, prev_stmt
);
1141 var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1143 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt
), var
,
1145 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
1146 STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (stmt
)) = new_def_stmt
;
1148 if (vect_print_dump_info (REPORT_DETAILS
))
1150 fprintf (vect_dump
, "created pattern stmt: ");
1151 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
1154 type
= gimple_expr_type (stmt
);
1161 /* We got a sequence. We expect it to end with a type demotion operation.
1162 Otherwise, we quit (for now). There are three possible cases: the
1163 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1164 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1165 NEW_TYPE differs (we create a new conversion statement). */
1166 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
1168 use_lhs
= gimple_assign_lhs (use_stmt
);
1169 use_type
= TREE_TYPE (use_lhs
);
1170 /* Support only type promotion or signedess change. Check that USE_TYPE
1171 is not bigger than the original type. */
1172 if (!INTEGRAL_TYPE_P (use_type
)
1173 || TYPE_PRECISION (new_type
) > TYPE_PRECISION (use_type
)
1174 || TYPE_PRECISION (type
) < TYPE_PRECISION (use_type
))
1177 if (TYPE_UNSIGNED (new_type
) != TYPE_UNSIGNED (use_type
)
1178 || TYPE_PRECISION (new_type
) != TYPE_PRECISION (use_type
))
1180 /* Create NEW_TYPE->USE_TYPE conversion. */
1181 tmp
= create_tmp_reg (use_type
, NULL
);
1182 add_referenced_var (tmp
);
1183 new_oprnd
= make_ssa_name (tmp
, NULL
);
1184 pattern_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
1186 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt
)) = pattern_stmt
;
1188 *type_in
= get_vectype_for_scalar_type (new_type
);
1189 *type_out
= get_vectype_for_scalar_type (use_type
);
1191 /* We created a pattern statement for the last statement in the
1192 sequence, so we don't need to associate it with the pattern
1193 statement created for PREV_STMT. Therefore, we add PREV_STMT
1194 to the list in order to mark it later in vect_pattern_recog_1. */
1196 VEC_safe_push (gimple
, heap
, *stmts
, prev_stmt
);
1201 STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (use_stmt
))
1202 = STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (prev_stmt
));
1205 *type_out
= NULL_TREE
;
1208 VEC_safe_push (gimple
, heap
, *stmts
, use_stmt
);
1211 /* TODO: support general case, create a conversion to the correct type. */
1214 /* Pattern detected. */
1215 if (vect_print_dump_info (REPORT_DETAILS
))
1217 fprintf (vect_dump
, "vect_recog_over_widening_pattern: detected: ");
1218 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
1221 return pattern_stmt
;
1224 /* Detect widening shift pattern:
1230 S2 a_T = (TYPE) a_t;
1231 S3 res_T = a_T << CONST;
1233 where type 'TYPE' is at least double the size of type 'type'.
1235 Also detect unsigned cases:
1238 unsigned TYPE u_res_T;
1242 S2 a_T = (TYPE) a_t;
1243 S3 res_T = a_T << CONST;
1244 S4 u_res_T = (unsigned TYPE) res_T;
1246 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1247 create an additional pattern stmt for S2 to create a variable of an
1248 intermediate type, and perform widen-shift on the intermediate type:
1252 TYPE a_T, res_T, res_T';
1255 S2 a_T = (TYPE) a_t;
1256 '--> a_it = (interm_type) a_t;
1257 S3 res_T = a_T << CONST;
1258 '--> res_T' = a_it <<* CONST;
1262 * STMTS: Contains a stmt from which the pattern search begins.
1263 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1264 in STMTS. When an intermediate type is used and a pattern statement is
1265 created for S2, we also put S2 here (before S3).
1269 * TYPE_IN: The type of the input arguments to the pattern.
1271 * TYPE_OUT: The type of the output of this pattern.
1273 * Return value: A new stmt that will be used to replace the sequence of
1274 stmts that constitute the pattern. In this case it will be:
1275 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1278 vect_recog_widen_shift_pattern (VEC (gimple
, heap
) **stmts
,
1279 tree
*type_in
, tree
*type_out
)
1281 gimple last_stmt
= VEC_pop (gimple
, *stmts
);
1283 tree oprnd0
, oprnd1
;
1284 tree type
, half_type0
;
1285 gimple pattern_stmt
, orig_stmt
= NULL
;
1286 tree vectype
, vectype_out
= NULL_TREE
;
1289 enum tree_code dummy_code
;
1291 VEC (tree
, heap
) * dummy_vec
;
1292 gimple use_stmt
= NULL
;
1293 bool over_widen
= false;
1295 if (!is_gimple_assign (last_stmt
) || !vinfo_for_stmt (last_stmt
))
1298 orig_stmt
= last_stmt
;
1299 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt
)))
1301 /* This statement was also detected as over-widening operation (it can't
1302 be any other pattern, because only over-widening detects shifts).
1303 LAST_STMT is the final type demotion statement, but its related
1304 statement is shift. We analyze the related statement to catch cases:
1311 S1 a_T = (TYPE) a_t;
1312 S2 res_T = a_T << CONST;
1313 S3 res = (itype)res_T;
1315 (size of type * 2 <= size of itype
1316 and size of itype * 2 <= size of TYPE)
1318 code after over-widening pattern detection:
1320 S1 a_T = (TYPE) a_t;
1321 --> a_it = (itype) a_t;
1322 S2 res_T = a_T << CONST;
1323 S3 res = (itype)res_T; <--- LAST_STMT
1324 --> res = a_it << CONST;
1328 S1 a_T = (TYPE) a_t;
1329 --> a_it = (itype) a_t; - redundant
1330 S2 res_T = a_T << CONST;
1331 S3 res = (itype)res_T;
1332 --> res = a_t w<< CONST;
1334 i.e., we replace the three statements with res = a_t w<< CONST. */
1335 last_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_stmt
));
1339 if (gimple_assign_rhs_code (last_stmt
) != LSHIFT_EXPR
)
1342 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1343 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1344 if (TREE_CODE (oprnd0
) != SSA_NAME
|| TREE_CODE (oprnd1
) != INTEGER_CST
)
1347 /* Check operand 0: it has to be defined by a type promotion. */
1348 if (!widened_name_p (oprnd0
, last_stmt
, &half_type0
, &def_stmt0
, false))
1351 /* Check operand 1: has to be positive. We check that it fits the type
1352 in vect_handle_widen_op_by_const (). */
1353 if (tree_int_cst_compare (oprnd1
, size_zero_node
) <= 0)
1356 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
1357 type
= gimple_expr_type (last_stmt
);
1359 /* Check if this a widening operation. */
1360 if (!vect_handle_widen_op_by_const (last_stmt
, LSHIFT_EXPR
, oprnd1
,
1362 type
, &half_type0
, def_stmt0
))
1365 /* Handle unsigned case. Look for
1366 S4 u_res_T = (unsigned TYPE) res_T;
1367 Use unsigned TYPE as the type for WIDEN_LSHIFT_EXPR. */
1368 if (TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type0
))
1370 tree lhs
= gimple_assign_lhs (last_stmt
), use_lhs
;
1371 imm_use_iterator imm_iter
;
1372 use_operand_p use_p
;
1378 /* In case of over-widening pattern, S4 should be ORIG_STMT itself.
1379 We check here that TYPE is the correct type for the operation,
1380 i.e., it's the type of the original result. */
1381 tree orig_type
= gimple_expr_type (orig_stmt
);
1382 if ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (orig_type
))
1383 || (TYPE_PRECISION (type
) != TYPE_PRECISION (orig_type
)))
1388 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
1390 if (is_gimple_debug (USE_STMT (use_p
)))
1392 use_stmt
= USE_STMT (use_p
);
1396 if (nuses
!= 1 || !is_gimple_assign (use_stmt
)
1397 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
1400 use_lhs
= gimple_assign_lhs (use_stmt
);
1401 use_type
= TREE_TYPE (use_lhs
);
1403 if (!INTEGRAL_TYPE_P (use_type
)
1404 || (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (use_type
))
1405 || (TYPE_PRECISION (type
) != TYPE_PRECISION (use_type
)))
1412 /* Pattern detected. */
1413 if (vect_print_dump_info (REPORT_DETAILS
))
1414 fprintf (vect_dump
, "vect_recog_widen_shift_pattern: detected: ");
1416 /* Check target support. */
1417 vectype
= get_vectype_for_scalar_type (half_type0
);
1418 vectype_out
= get_vectype_for_scalar_type (type
);
1422 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR
, last_stmt
,
1423 vectype_out
, vectype
,
1424 &dummy
, &dummy
, &dummy_code
,
1425 &dummy_code
, &dummy_int
,
1430 *type_out
= vectype_out
;
1432 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1433 var
= vect_recog_temp_ssa_var (type
, NULL
);
1435 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR
, var
, oprnd0
, oprnd1
);
1437 if (vect_print_dump_info (REPORT_DETAILS
))
1438 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
1441 last_stmt
= use_stmt
;
1443 last_stmt
= orig_stmt
;
1445 VEC_safe_push (gimple
, heap
, *stmts
, last_stmt
);
1446 return pattern_stmt
;
1449 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1457 S3 res_T = b_T op a_t;
1459 where type 'TYPE' is a type with different size than 'type',
1460 and op is <<, >> or rotate.
1465 TYPE b_T, c_T, res_T;
1468 S1 a_t = (type) c_T;
1470 S3 res_T = b_T op a_t;
1474 * STMTS: Contains a stmt from which the pattern search begins,
1475 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1476 with a shift/rotate which has same type on both operands, in the
1477 second case just b_T op c_T, in the first case with added cast
1478 from a_t to c_T in STMT_VINFO_PATTERN_DEF_STMT.
1482 * TYPE_IN: The type of the input arguments to the pattern.
1484 * TYPE_OUT: The type of the output of this pattern.
1486 * Return value: A new stmt that will be used to replace the shift/rotate
1490 vect_recog_vector_vector_shift_pattern (VEC (gimple
, heap
) **stmts
,
1491 tree
*type_in
, tree
*type_out
)
1493 gimple last_stmt
= VEC_pop (gimple
, *stmts
);
1494 tree oprnd0
, oprnd1
, lhs
, var
;
1495 gimple pattern_stmt
, def_stmt
;
1496 enum tree_code rhs_code
;
1497 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1498 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1499 enum vect_def_type dt
;
1502 if (!is_gimple_assign (last_stmt
))
1505 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1517 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
1520 lhs
= gimple_assign_lhs (last_stmt
);
1521 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1522 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1523 if (TREE_CODE (oprnd0
) != SSA_NAME
1524 || TREE_CODE (oprnd1
) != SSA_NAME
1525 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
1526 || TYPE_PRECISION (TREE_TYPE (oprnd1
))
1527 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1
)))
1528 || TYPE_PRECISION (TREE_TYPE (lhs
))
1529 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
1532 if (!vect_is_simple_use (oprnd1
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
1535 if (dt
!= vect_internal_def
)
1538 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (oprnd0
));
1539 *type_out
= *type_in
;
1540 if (*type_in
== NULL_TREE
)
1544 if (gimple_assign_cast_p (def_stmt
))
1546 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
1547 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
1548 && TYPE_PRECISION (TREE_TYPE (rhs1
))
1549 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
1553 if (def
== NULL_TREE
)
1555 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
1556 def_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, def
, oprnd1
,
1558 STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo
) = def_stmt
;
1561 /* Pattern detected. */
1562 if (vect_print_dump_info (REPORT_DETAILS
))
1563 fprintf (vect_dump
, "vect_recog_vector_vector_shift_pattern: detected: ");
1565 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1566 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
1567 pattern_stmt
= gimple_build_assign_with_ops (rhs_code
, var
, oprnd0
, def
);
1569 if (vect_print_dump_info (REPORT_DETAILS
))
1570 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
1572 VEC_safe_push (gimple
, heap
, *stmts
, last_stmt
);
1573 return pattern_stmt
;
1576 /* Function vect_recog_mixed_size_cond_pattern
1578 Try to find the following pattern:
1583 S1 a_T = x_t CMP y_t ? b_T : c_T;
1585 where type 'TYPE' is an integral type which has different size
1586 from 'type'. b_T and c_T are constants and if 'TYPE' is wider
1587 than 'type', the constants need to fit into an integer type
1588 with the same width as 'type'.
1592 * LAST_STMT: A stmt from which the pattern search begins.
1596 * TYPE_IN: The type of the input arguments to the pattern.
1598 * TYPE_OUT: The type of the output of this pattern.
1600 * Return value: A new stmt that will be used to replace the pattern.
1601 Additionally a def_stmt is added.
1603 a_it = x_t CMP y_t ? b_it : c_it;
1604 a_T = (TYPE) a_it; */
1607 vect_recog_mixed_size_cond_pattern (VEC (gimple
, heap
) **stmts
, tree
*type_in
,
1610 gimple last_stmt
= VEC_index (gimple
, *stmts
, 0);
1611 tree cond_expr
, then_clause
, else_clause
;
1612 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
), def_stmt_info
;
1613 tree type
, vectype
, comp_vectype
, itype
, vecitype
;
1614 enum machine_mode cmpmode
;
1615 gimple pattern_stmt
, def_stmt
;
1616 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1618 if (!is_gimple_assign (last_stmt
)
1619 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
1620 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
1623 cond_expr
= gimple_assign_rhs1 (last_stmt
);
1624 then_clause
= gimple_assign_rhs2 (last_stmt
);
1625 else_clause
= gimple_assign_rhs3 (last_stmt
);
1627 if (TREE_CODE (then_clause
) != INTEGER_CST
1628 || TREE_CODE (else_clause
) != INTEGER_CST
)
1631 if (!COMPARISON_CLASS_P (cond_expr
))
1635 = get_vectype_for_scalar_type (TREE_TYPE (TREE_OPERAND (cond_expr
, 0)));
1636 if (comp_vectype
== NULL_TREE
)
1639 type
= gimple_expr_type (last_stmt
);
1640 cmpmode
= GET_MODE_INNER (TYPE_MODE (comp_vectype
));
1642 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) == GET_MODE_BITSIZE (cmpmode
))
1645 vectype
= get_vectype_for_scalar_type (type
);
1646 if (vectype
== NULL_TREE
)
1649 if (expand_vec_cond_expr_p (vectype
, comp_vectype
))
1652 itype
= build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode
),
1653 TYPE_UNSIGNED (type
));
1654 if (itype
== NULL_TREE
1655 || GET_MODE_BITSIZE (TYPE_MODE (itype
)) != GET_MODE_BITSIZE (cmpmode
))
1658 vecitype
= get_vectype_for_scalar_type (itype
);
1659 if (vecitype
== NULL_TREE
)
1662 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
))
1665 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) > GET_MODE_BITSIZE (cmpmode
))
1667 if (!int_fits_type_p (then_clause
, itype
)
1668 || !int_fits_type_p (else_clause
, itype
))
1673 = gimple_build_assign_with_ops3 (COND_EXPR
,
1674 vect_recog_temp_ssa_var (itype
, NULL
),
1675 unshare_expr (cond_expr
),
1676 fold_convert (itype
, then_clause
),
1677 fold_convert (itype
, else_clause
));
1679 = gimple_build_assign_with_ops (NOP_EXPR
,
1680 vect_recog_temp_ssa_var (type
, NULL
),
1681 gimple_assign_lhs (def_stmt
), NULL_TREE
);
1683 STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo
) = def_stmt
;
1684 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
, NULL
);
1685 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
1686 STMT_VINFO_VECTYPE (def_stmt_info
) = vecitype
;
1687 *type_in
= vecitype
;
1688 *type_out
= vectype
;
1690 return pattern_stmt
;
1694 /* Helper function of vect_recog_bool_pattern. Called recursively, return
1695 true if bool VAR can be optimized that way. */
1698 check_bool_pattern (tree var
, loop_vec_info loop_vinfo
)
1701 enum vect_def_type dt
;
1703 enum tree_code rhs_code
;
1705 if (!vect_is_simple_use (var
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
1708 if (dt
!= vect_internal_def
)
1711 if (!is_gimple_assign (def_stmt
))
1714 if (!has_single_use (def
))
1717 rhs1
= gimple_assign_rhs1 (def_stmt
);
1718 rhs_code
= gimple_assign_rhs_code (def_stmt
);
1722 return check_bool_pattern (rhs1
, loop_vinfo
);
1725 if ((TYPE_PRECISION (TREE_TYPE (rhs1
)) != 1
1726 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
1727 && TREE_CODE (TREE_TYPE (rhs1
)) != BOOLEAN_TYPE
)
1729 return check_bool_pattern (rhs1
, loop_vinfo
);
1732 return check_bool_pattern (rhs1
, loop_vinfo
);
1737 if (!check_bool_pattern (rhs1
, loop_vinfo
))
1739 return check_bool_pattern (gimple_assign_rhs2 (def_stmt
), loop_vinfo
);
1742 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
1744 tree vecitype
, comp_vectype
;
1746 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
1747 if (comp_vectype
== NULL_TREE
)
1750 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
1752 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
1754 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
1755 vecitype
= get_vectype_for_scalar_type (itype
);
1756 if (vecitype
== NULL_TREE
)
1760 vecitype
= comp_vectype
;
1761 return expand_vec_cond_expr_p (vecitype
, comp_vectype
);
1768 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
1769 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
1770 to PATTERN_DEF_STMT and adding a cast as RELATED_STMT. */
1773 adjust_bool_pattern_cast (tree type
, tree var
)
1775 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (SSA_NAME_DEF_STMT (var
));
1776 gimple cast_stmt
, pattern_stmt
;
1778 gcc_assert (!STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo
));
1779 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
1780 STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo
) = pattern_stmt
;
1782 = gimple_build_assign_with_ops (NOP_EXPR
,
1783 vect_recog_temp_ssa_var (type
, NULL
),
1784 gimple_assign_lhs (pattern_stmt
),
1786 STMT_VINFO_RELATED_STMT (stmt_vinfo
) = cast_stmt
;
1787 return gimple_assign_lhs (cast_stmt
);
1791 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
1792 recursively. VAR is an SSA_NAME that should be transformed from bool
1793 to a wider integer type, OUT_TYPE is the desired final integer type of
1794 the whole pattern, TRUEVAL should be NULL unless optimizing
1795 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
1796 in the then_clause, STMTS is where statements with added pattern stmts
1797 should be pushed to. */
1800 adjust_bool_pattern (tree var
, tree out_type
, tree trueval
,
1801 VEC (gimple
, heap
) **stmts
)
1803 gimple stmt
= SSA_NAME_DEF_STMT (var
);
1804 enum tree_code rhs_code
, def_rhs_code
;
1805 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
1807 gimple pattern_stmt
, def_stmt
;
1809 rhs1
= gimple_assign_rhs1 (stmt
);
1810 rhs2
= gimple_assign_rhs2 (stmt
);
1811 rhs_code
= gimple_assign_rhs_code (stmt
);
1812 loc
= gimple_location (stmt
);
1817 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
1818 itype
= TREE_TYPE (irhs1
);
1820 = gimple_build_assign_with_ops (SSA_NAME
,
1821 vect_recog_temp_ssa_var (itype
, NULL
),
1826 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
1827 itype
= TREE_TYPE (irhs1
);
1829 = gimple_build_assign_with_ops (BIT_XOR_EXPR
,
1830 vect_recog_temp_ssa_var (itype
, NULL
),
1831 irhs1
, build_int_cst (itype
, 1));
1835 /* Try to optimize x = y & (a < b ? 1 : 0); into
1836 x = (a < b ? y : 0);
1842 S1 a_b = x1 CMP1 y1;
1843 S2 b_b = x2 CMP2 y2;
1845 S4 d_T = (TYPE) c_b;
1847 we would normally emit:
1849 S1' a_T = x1 CMP1 y1 ? 1 : 0;
1850 S2' b_T = x2 CMP2 y2 ? 1 : 0;
1851 S3' c_T = a_T & b_T;
1854 but we can save one stmt by using the
1855 result of one of the COND_EXPRs in the other COND_EXPR and leave
1856 BIT_AND_EXPR stmt out:
1858 S1' a_T = x1 CMP1 y1 ? 1 : 0;
1859 S3' c_T = x2 CMP2 y2 ? a_T : 0;
1862 At least when VEC_COND_EXPR is implemented using masks
1863 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
1864 computes the comparison masks and ands it, in one case with
1865 all ones vector, in the other case with a vector register.
1866 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
1867 often more expensive. */
1868 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
1869 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
1870 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
1872 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
1873 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
1874 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
1875 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
1878 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
1879 irhs2
= adjust_bool_pattern (rhs2
, out_type
, irhs1
, stmts
);
1880 tstmt
= VEC_pop (gimple
, *stmts
);
1881 gcc_assert (tstmt
== def_stmt
);
1882 VEC_quick_push (gimple
, *stmts
, stmt
);
1883 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
1884 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
1885 gcc_assert (!STMT_VINFO_PATTERN_DEF_STMT (stmt_def_vinfo
));
1886 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
1890 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
1893 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
1894 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
1895 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
1897 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
1898 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
1899 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
1900 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
1903 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
1904 irhs1
= adjust_bool_pattern (rhs1
, out_type
, irhs2
, stmts
);
1905 tstmt
= VEC_pop (gimple
, *stmts
);
1906 gcc_assert (tstmt
== def_stmt
);
1907 VEC_quick_push (gimple
, *stmts
, stmt
);
1908 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
1909 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
1910 gcc_assert (!STMT_VINFO_PATTERN_DEF_STMT (stmt_def_vinfo
));
1911 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
1915 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
1921 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
1922 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
1924 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
1925 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
1927 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
1928 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
1929 int out_prec
= TYPE_PRECISION (out_type
);
1930 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
1931 irhs2
= adjust_bool_pattern_cast (TREE_TYPE (irhs1
), rhs2
);
1932 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
1933 irhs1
= adjust_bool_pattern_cast (TREE_TYPE (irhs2
), rhs1
);
1936 irhs1
= adjust_bool_pattern_cast (out_type
, rhs1
);
1937 irhs2
= adjust_bool_pattern_cast (out_type
, rhs2
);
1940 itype
= TREE_TYPE (irhs1
);
1942 = gimple_build_assign_with_ops (rhs_code
,
1943 vect_recog_temp_ssa_var (itype
, NULL
),
1948 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
1949 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
1950 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
1952 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
1954 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
1957 itype
= TREE_TYPE (rhs1
);
1958 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
1959 if (trueval
== NULL_TREE
)
1960 trueval
= build_int_cst (itype
, 1);
1962 gcc_checking_assert (useless_type_conversion_p (itype
,
1963 TREE_TYPE (trueval
)));
1965 = gimple_build_assign_with_ops3 (COND_EXPR
,
1966 vect_recog_temp_ssa_var (itype
, NULL
),
1968 build_int_cst (itype
, 0));
1972 VEC_safe_push (gimple
, heap
, *stmts
, stmt
);
1973 gimple_set_location (pattern_stmt
, loc
);
1974 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
1975 return gimple_assign_lhs (pattern_stmt
);
1979 /* Function vect_recog_bool_pattern
1981 Try to find pattern like following:
1983 bool a_b, b_b, c_b, d_b, e_b;
1986 S1 a_b = x1 CMP1 y1;
1987 S2 b_b = x2 CMP2 y2;
1989 S4 d_b = x3 CMP3 y3;
1991 S6 f_T = (TYPE) e_b;
1993 where type 'TYPE' is an integral type.
1997 * LAST_STMT: A stmt at the end from which the pattern
1998 search begins, i.e. cast of a bool to
2003 * TYPE_IN: The type of the input arguments to the pattern.
2005 * TYPE_OUT: The type of the output of this pattern.
2007 * Return value: A new stmt that will be used to replace the pattern.
2009 Assuming size of TYPE is the same as size of all comparisons
2010 (otherwise some casts would be added where needed), the above
2011 sequence we create related pattern stmts:
2012 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2013 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2014 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2015 S5' e_T = c_T | d_T;
2018 Instead of the above S3' we could emit:
2019 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2020 S3' c_T = a_T | b_T;
2021 but the above is more efficient. */
2024 vect_recog_bool_pattern (VEC (gimple
, heap
) **stmts
, tree
*type_in
,
2027 gimple last_stmt
= VEC_pop (gimple
, *stmts
);
2028 enum tree_code rhs_code
;
2029 tree var
, lhs
, rhs
, vectype
;
2030 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2031 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2032 gimple pattern_stmt
;
2034 if (!is_gimple_assign (last_stmt
))
2037 var
= gimple_assign_rhs1 (last_stmt
);
2038 lhs
= gimple_assign_lhs (last_stmt
);
2040 if ((TYPE_PRECISION (TREE_TYPE (var
)) != 1
2041 || !TYPE_UNSIGNED (TREE_TYPE (var
)))
2042 && TREE_CODE (TREE_TYPE (var
)) != BOOLEAN_TYPE
)
2045 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2046 if (CONVERT_EXPR_CODE_P (rhs_code
))
2048 if (TREE_CODE (TREE_TYPE (lhs
)) != INTEGER_TYPE
2049 || TYPE_PRECISION (TREE_TYPE (lhs
)) == 1)
2051 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
2052 if (vectype
== NULL_TREE
)
2055 if (!check_bool_pattern (var
, loop_vinfo
))
2058 rhs
= adjust_bool_pattern (var
, TREE_TYPE (lhs
), NULL_TREE
, stmts
);
2059 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
2060 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
2062 = gimple_build_assign_with_ops (SSA_NAME
, lhs
, rhs
, NULL_TREE
);
2065 = gimple_build_assign_with_ops (NOP_EXPR
, lhs
, rhs
, NULL_TREE
);
2066 *type_out
= vectype
;
2068 VEC_safe_push (gimple
, heap
, *stmts
, last_stmt
);
2069 return pattern_stmt
;
2071 else if (rhs_code
== SSA_NAME
2072 && STMT_VINFO_DATA_REF (stmt_vinfo
))
2074 stmt_vec_info pattern_stmt_info
;
2075 vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
2076 gcc_assert (vectype
!= NULL_TREE
);
2077 if (!VECTOR_MODE_P (TYPE_MODE (vectype
)))
2079 if (!check_bool_pattern (var
, loop_vinfo
))
2082 rhs
= adjust_bool_pattern (var
, TREE_TYPE (vectype
), NULL_TREE
, stmts
);
2083 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
2084 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
2086 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
2088 = gimple_build_assign_with_ops (NOP_EXPR
, rhs2
, rhs
, NULL_TREE
);
2089 STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo
) = cast_stmt
;
2093 = gimple_build_assign_with_ops (SSA_NAME
, lhs
, rhs
, NULL_TREE
);
2094 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
, NULL
);
2095 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
2096 STMT_VINFO_DATA_REF (pattern_stmt_info
)
2097 = STMT_VINFO_DATA_REF (stmt_vinfo
);
2098 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info
)
2099 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo
);
2100 STMT_VINFO_DR_INIT (pattern_stmt_info
) = STMT_VINFO_DR_INIT (stmt_vinfo
);
2101 STMT_VINFO_DR_OFFSET (pattern_stmt_info
)
2102 = STMT_VINFO_DR_OFFSET (stmt_vinfo
);
2103 STMT_VINFO_DR_STEP (pattern_stmt_info
) = STMT_VINFO_DR_STEP (stmt_vinfo
);
2104 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info
)
2105 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo
);
2106 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo
)) = pattern_stmt
;
2107 *type_out
= vectype
;
2109 VEC_safe_push (gimple
, heap
, *stmts
, last_stmt
);
2110 return pattern_stmt
;
2117 /* Mark statements that are involved in a pattern. */
2120 vect_mark_pattern_stmts (gimple orig_stmt
, gimple pattern_stmt
,
2121 tree pattern_vectype
)
2123 stmt_vec_info pattern_stmt_info
, def_stmt_info
;
2124 stmt_vec_info orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
2125 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (orig_stmt_info
);
2128 pattern_stmt_info
= vinfo_for_stmt (pattern_stmt
);
2129 if (pattern_stmt_info
== NULL
)
2131 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
, NULL
);
2132 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
2134 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt
));
2136 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt
;
2137 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
2138 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
2139 STMT_VINFO_VECTYPE (pattern_stmt_info
) = pattern_vectype
;
2140 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
2141 STMT_VINFO_RELATED_STMT (orig_stmt_info
) = pattern_stmt
;
2142 STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info
)
2143 = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info
);
2144 if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info
))
2146 def_stmt
= STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info
);
2147 def_stmt_info
= vinfo_for_stmt (def_stmt
);
2148 if (def_stmt_info
== NULL
)
2150 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
, NULL
);
2151 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
2153 gimple_set_bb (def_stmt
, gimple_bb (orig_stmt
));
2154 STMT_VINFO_RELATED_STMT (def_stmt_info
) = orig_stmt
;
2155 STMT_VINFO_DEF_TYPE (def_stmt_info
)
2156 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
2157 if (STMT_VINFO_VECTYPE (def_stmt_info
) == NULL_TREE
)
2158 STMT_VINFO_VECTYPE (def_stmt_info
) = pattern_vectype
;
2162 /* Function vect_pattern_recog_1
2165 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
2166 computation pattern.
2167 STMT: A stmt from which the pattern search should start.
2169 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
2170 expression that computes the same functionality and can be used to
2171 replace the sequence of stmts that are involved in the pattern.
2174 This function checks if the expression returned by PATTERN_RECOG_FUNC is
2175 supported in vector form by the target. We use 'TYPE_IN' to obtain the
2176 relevant vector type. If 'TYPE_IN' is already a vector type, then this
2177 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
2178 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
2179 to the available target pattern.
2181 This function also does some bookkeeping, as explained in the documentation
2182 for vect_recog_pattern. */
2185 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func
,
2186 gimple_stmt_iterator si
,
2187 VEC (gimple
, heap
) **stmts_to_replace
)
2189 gimple stmt
= gsi_stmt (si
), pattern_stmt
;
2190 stmt_vec_info stmt_info
;
2191 loop_vec_info loop_vinfo
;
2192 tree pattern_vectype
;
2193 tree type_in
, type_out
;
2194 enum tree_code code
;
2198 VEC_truncate (gimple
, *stmts_to_replace
, 0);
2199 VEC_quick_push (gimple
, *stmts_to_replace
, stmt
);
2200 pattern_stmt
= (* vect_recog_func
) (stmts_to_replace
, &type_in
, &type_out
);
2204 stmt
= VEC_last (gimple
, *stmts_to_replace
);
2205 stmt_info
= vinfo_for_stmt (stmt
);
2206 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2208 if (VECTOR_MODE_P (TYPE_MODE (type_in
)))
2210 /* No need to check target support (already checked by the pattern
2211 recognition function). */
2212 pattern_vectype
= type_out
? type_out
: type_in
;
2216 enum machine_mode vec_mode
;
2217 enum insn_code icode
;
2220 /* Check target support */
2221 type_in
= get_vectype_for_scalar_type (type_in
);
2225 type_out
= get_vectype_for_scalar_type (type_out
);
2230 pattern_vectype
= type_out
;
2232 if (is_gimple_assign (pattern_stmt
))
2233 code
= gimple_assign_rhs_code (pattern_stmt
);
2236 gcc_assert (is_gimple_call (pattern_stmt
));
2240 optab
= optab_for_tree_code (code
, type_in
, optab_default
);
2241 vec_mode
= TYPE_MODE (type_in
);
2243 || (icode
= optab_handler (optab
, vec_mode
)) == CODE_FOR_nothing
2244 || (insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (type_out
)))
2248 /* Found a vectorizable pattern. */
2249 if (vect_print_dump_info (REPORT_DETAILS
))
2251 fprintf (vect_dump
, "pattern recognized: ");
2252 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
2255 /* Mark the stmts that are involved in the pattern. */
2256 vect_mark_pattern_stmts (stmt
, pattern_stmt
, pattern_vectype
);
2258 /* Patterns cannot be vectorized using SLP, because they change the order of
2260 FOR_EACH_VEC_ELT (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
, next
)
2262 VEC_ordered_remove (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
);
2264 /* It is possible that additional pattern stmts are created and inserted in
2265 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
2266 relevant statements. */
2267 for (i
= 0; VEC_iterate (gimple
, *stmts_to_replace
, i
, stmt
)
2268 && (unsigned) i
< (VEC_length (gimple
, *stmts_to_replace
) - 1);
2271 stmt_info
= vinfo_for_stmt (stmt
);
2272 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2273 if (vect_print_dump_info (REPORT_DETAILS
))
2275 fprintf (vect_dump
, "additional pattern stmt: ");
2276 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
2279 vect_mark_pattern_stmts (stmt
, pattern_stmt
, NULL_TREE
);
2284 /* Function vect_pattern_recog
2287 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
2290 Output - for each computation idiom that is detected we create a new stmt
2291 that provides the same functionality and that can be vectorized. We
2292 also record some information in the struct_stmt_info of the relevant
2293 stmts, as explained below:
2295 At the entry to this function we have the following stmts, with the
2296 following initial value in the STMT_VINFO fields:
2298 stmt in_pattern_p related_stmt vec_stmt
2299 S1: a_i = .... - - -
2300 S2: a_2 = ..use(a_i).. - - -
2301 S3: a_1 = ..use(a_2).. - - -
2302 S4: a_0 = ..use(a_1).. - - -
2303 S5: ... = ..use(a_0).. - - -
2305 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
2306 represented by a single stmt. We then:
2307 - create a new stmt S6 equivalent to the pattern (the stmt is not
2308 inserted into the code)
2309 - fill in the STMT_VINFO fields as follows:
2311 in_pattern_p related_stmt vec_stmt
2312 S1: a_i = .... - - -
2313 S2: a_2 = ..use(a_i).. - - -
2314 S3: a_1 = ..use(a_2).. - - -
2315 S4: a_0 = ..use(a_1).. true S6 -
2316 '---> S6: a_new = .... - S4 -
2317 S5: ... = ..use(a_0).. - - -
2319 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
2320 to each other through the RELATED_STMT field).
2322 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
2323 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
2324 remain irrelevant unless used by stmts other than S4.
2326 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
2327 (because they are marked as irrelevant). It will vectorize S6, and record
2328 a pointer to the new vector stmt VS6 from S6 (as usual).
2329 S4 will be skipped, and S5 will be vectorized as usual:
2331 in_pattern_p related_stmt vec_stmt
2332 S1: a_i = .... - - -
2333 S2: a_2 = ..use(a_i).. - - -
2334 S3: a_1 = ..use(a_2).. - - -
2335 > VS6: va_new = .... - - -
2336 S4: a_0 = ..use(a_1).. true S6 VS6
2337 '---> S6: a_new = .... - S4 VS6
2338 > VS5: ... = ..vuse(va_new).. - - -
2339 S5: ... = ..use(a_0).. - - -
2341 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
2342 elsewhere), and we'll end up with:
2345 VS5: ... = ..vuse(va_new)..
2347 In case of more than one pattern statements, e.g., widen-mult with
2351 S2 a_T = (TYPE) a_t;
2352 '--> S3: a_it = (interm_type) a_t;
2353 S4 prod_T = a_T * CONST;
2354 '--> S5: prod_T' = a_it w* CONST;
2356 there may be other users of a_T outside the pattern. In that case S2 will
2357 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
2358 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
2359 be recorded in S3. */
2362 vect_pattern_recog (loop_vec_info loop_vinfo
)
2364 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2365 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
2366 unsigned int nbbs
= loop
->num_nodes
;
2367 gimple_stmt_iterator si
;
2369 vect_recog_func_ptr vect_recog_func
;
2370 VEC (gimple
, heap
) *stmts_to_replace
= VEC_alloc (gimple
, heap
, 1);
2372 if (vect_print_dump_info (REPORT_DETAILS
))
2373 fprintf (vect_dump
, "=== vect_pattern_recog ===");
2375 /* Scan through the loop stmts, applying the pattern recognition
2376 functions starting at each stmt visited: */
2377 for (i
= 0; i
< nbbs
; i
++)
2379 basic_block bb
= bbs
[i
];
2380 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
2382 /* Scan over all generic vect_recog_xxx_pattern functions. */
2383 for (j
= 0; j
< NUM_PATTERNS
; j
++)
2385 vect_recog_func
= vect_vect_recog_func_ptrs
[j
];
2386 vect_pattern_recog_1 (vect_recog_func
, si
,
2392 VEC_free (gimple
, heap
, stmts_to_replace
);