1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Nuzman <dorit@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
37 #include "tree-data-ref.h"
38 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
42 /* Pattern recognition functions */
43 static gimple
vect_recog_widen_sum_pattern (VEC (gimple
, heap
) **, tree
*,
45 static gimple
vect_recog_widen_mult_pattern (VEC (gimple
, heap
) **, tree
*,
47 static gimple
vect_recog_dot_prod_pattern (VEC (gimple
, heap
) **, tree
*,
49 static gimple
vect_recog_pow_pattern (VEC (gimple
, heap
) **, tree
*, tree
*);
50 static gimple
vect_recog_over_widening_pattern (VEC (gimple
, heap
) **, tree
*,
52 static gimple
vect_recog_widen_shift_pattern (VEC (gimple
, heap
) **,
54 static gimple
vect_recog_vector_vector_shift_pattern (VEC (gimple
, heap
) **,
56 static gimple
vect_recog_mixed_size_cond_pattern (VEC (gimple
, heap
) **,
58 static gimple
vect_recog_bool_pattern (VEC (gimple
, heap
) **, tree
*, tree
*);
59 static vect_recog_func_ptr vect_vect_recog_func_ptrs
[NUM_PATTERNS
] = {
60 vect_recog_widen_mult_pattern
,
61 vect_recog_widen_sum_pattern
,
62 vect_recog_dot_prod_pattern
,
63 vect_recog_pow_pattern
,
64 vect_recog_over_widening_pattern
,
65 vect_recog_widen_shift_pattern
,
66 vect_recog_vector_vector_shift_pattern
,
67 vect_recog_mixed_size_cond_pattern
,
68 vect_recog_bool_pattern
};
70 /* Function widened_name_p
72 Check whether NAME, an ssa-name used in USE_STMT,
73 is a result of a type-promotion, such that:
74 DEF_STMT: NAME = NOP (name0)
75 where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
76 If CHECK_SIGN is TRUE, check that either both types are signed or both are
80 widened_name_p (tree name
, gimple use_stmt
, tree
*half_type
, gimple
*def_stmt
,
85 loop_vec_info loop_vinfo
;
86 stmt_vec_info stmt_vinfo
;
87 tree type
= TREE_TYPE (name
);
89 enum vect_def_type dt
;
92 stmt_vinfo
= vinfo_for_stmt (use_stmt
);
93 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
95 if (!vect_is_simple_use (name
, loop_vinfo
, NULL
, def_stmt
, &def
, &dt
))
98 if (dt
!= vect_internal_def
99 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
105 if (!is_gimple_assign (*def_stmt
))
108 if (gimple_assign_rhs_code (*def_stmt
) != NOP_EXPR
)
111 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
113 *half_type
= TREE_TYPE (oprnd0
);
114 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*half_type
)
115 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*half_type
)) && check_sign
)
116 || (TYPE_PRECISION (type
) < (TYPE_PRECISION (*half_type
) * 2)))
119 if (!vect_is_simple_use (oprnd0
, loop_vinfo
, NULL
, &dummy_gimple
, &dummy
,
126 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
127 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
130 vect_recog_temp_ssa_var (tree type
, gimple stmt
)
132 tree var
= create_tmp_var (type
, "patt");
134 add_referenced_var (var
);
135 var
= make_ssa_name (var
, stmt
);
139 /* Function vect_recog_dot_prod_pattern
141 Try to find the following pattern:
147 sum_0 = phi <init, sum_1>
150 S3 x_T = (TYPE1) x_t;
151 S4 y_T = (TYPE1) y_t;
153 [S6 prod = (TYPE2) prod; #optional]
154 S7 sum_1 = prod + sum_0;
156 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
157 same size of 'TYPE1' or bigger. This is a special case of a reduction
162 * STMTS: Contains a stmt from which the pattern search begins. In the
163 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
168 * TYPE_IN: The type of the input arguments to the pattern.
170 * TYPE_OUT: The type of the output of this pattern.
172 * Return value: A new stmt that will be used to replace the sequence of
173 stmts that constitute the pattern. In this case it will be:
174 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
176 Note: The dot-prod idiom is a widening reduction pattern that is
177 vectorized without preserving all the intermediate results. It
178 produces only N/2 (widened) results (by summing up pairs of
179 intermediate results) rather than all N results. Therefore, we
180 cannot allow this pattern when we want to get all the results and in
181 the correct order (as is the case when this computation is in an
182 inner-loop nested in an outer-loop that us being vectorized). */
185 vect_recog_dot_prod_pattern (VEC (gimple
, heap
) **stmts
, tree
*type_in
,
188 gimple stmt
, last_stmt
= VEC_index (gimple
, *stmts
, 0);
190 tree oprnd00
, oprnd01
;
191 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
192 tree type
, half_type
;
195 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
196 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
199 if (!is_gimple_assign (last_stmt
))
202 type
= gimple_expr_type (last_stmt
);
204 /* Look for the following pattern
208 DDPROD = (TYPE2) DPROD;
209 sum_1 = DDPROD + sum_0;
211 - DX is double the size of X
212 - DY is double the size of Y
213 - DX, DY, DPROD all have the same type
214 - sum is the same size of DPROD or bigger
215 - sum has been recognized as a reduction variable.
217 This is equivalent to:
218 DPROD = X w* Y; #widen mult
219 sum_1 = DPROD w+ sum_0; #widen summation
221 DPROD = X w* Y; #widen mult
222 sum_1 = DPROD + sum_0; #summation
225 /* Starting from LAST_STMT, follow the defs of its uses in search
226 of the above pattern. */
228 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
231 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
233 /* Has been detected as widening-summation? */
235 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
236 type
= gimple_expr_type (stmt
);
237 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
239 oprnd0
= gimple_assign_rhs1 (stmt
);
240 oprnd1
= gimple_assign_rhs2 (stmt
);
241 half_type
= TREE_TYPE (oprnd0
);
247 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
)
249 oprnd0
= gimple_assign_rhs1 (last_stmt
);
250 oprnd1
= gimple_assign_rhs2 (last_stmt
);
251 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
252 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
256 if (widened_name_p (oprnd0
, stmt
, &half_type
, &def_stmt
, true))
259 oprnd0
= gimple_assign_rhs1 (stmt
);
265 /* So far so good. Since last_stmt was detected as a (summation) reduction,
266 we know that oprnd1 is the reduction variable (defined by a loop-header
267 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
268 Left to check that oprnd0 is defined by a (widen_)mult_expr */
269 if (TREE_CODE (oprnd0
) != SSA_NAME
)
272 prod_type
= half_type
;
273 stmt
= SSA_NAME_DEF_STMT (oprnd0
);
275 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
276 if (!gimple_bb (stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
279 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
280 inside the loop (in case we are analyzing an outer-loop). */
281 if (!is_gimple_assign (stmt
))
283 stmt_vinfo
= vinfo_for_stmt (stmt
);
284 gcc_assert (stmt_vinfo
);
285 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
287 if (gimple_assign_rhs_code (stmt
) != MULT_EXPR
)
289 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
291 /* Has been detected as a widening multiplication? */
293 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
294 if (gimple_assign_rhs_code (stmt
) != WIDEN_MULT_EXPR
)
296 stmt_vinfo
= vinfo_for_stmt (stmt
);
297 gcc_assert (stmt_vinfo
);
298 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_internal_def
);
299 oprnd00
= gimple_assign_rhs1 (stmt
);
300 oprnd01
= gimple_assign_rhs2 (stmt
);
304 tree half_type0
, half_type1
;
308 oprnd0
= gimple_assign_rhs1 (stmt
);
309 oprnd1
= gimple_assign_rhs2 (stmt
);
310 if (!types_compatible_p (TREE_TYPE (oprnd0
), prod_type
)
311 || !types_compatible_p (TREE_TYPE (oprnd1
), prod_type
))
313 if (!widened_name_p (oprnd0
, stmt
, &half_type0
, &def_stmt
, true))
315 oprnd00
= gimple_assign_rhs1 (def_stmt
);
316 if (!widened_name_p (oprnd1
, stmt
, &half_type1
, &def_stmt
, true))
318 oprnd01
= gimple_assign_rhs1 (def_stmt
);
319 if (!types_compatible_p (half_type0
, half_type1
))
321 if (TYPE_PRECISION (prod_type
) != TYPE_PRECISION (half_type0
) * 2)
325 half_type
= TREE_TYPE (oprnd00
);
326 *type_in
= half_type
;
329 /* Pattern detected. Create a stmt to be used to replace the pattern: */
330 var
= vect_recog_temp_ssa_var (type
, NULL
);
331 pattern_stmt
= gimple_build_assign_with_ops3 (DOT_PROD_EXPR
, var
,
332 oprnd00
, oprnd01
, oprnd1
);
334 if (vect_print_dump_info (REPORT_DETAILS
))
336 fprintf (vect_dump
, "vect_recog_dot_prod_pattern: detected: ");
337 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
340 /* We don't allow changing the order of the computation in the inner-loop
341 when doing outer-loop vectorization. */
342 gcc_assert (!nested_in_vect_loop_p (loop
, last_stmt
));
348 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
351 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
352 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
354 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
355 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
356 that satisfies the above restrictions, we can perform a widening opeartion
357 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
358 with a_it = (interm_type) a_t; */
361 vect_handle_widen_op_by_const (gimple stmt
, enum tree_code code
,
362 tree const_oprnd
, tree
*oprnd
,
363 VEC (gimple
, heap
) **stmts
, tree type
,
364 tree
*half_type
, gimple def_stmt
)
366 tree new_type
, new_oprnd
, tmp
;
368 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt
));
369 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
371 if (code
!= MULT_EXPR
&& code
!= LSHIFT_EXPR
)
374 if (((code
== MULT_EXPR
&& int_fits_type_p (const_oprnd
, *half_type
))
375 || (code
== LSHIFT_EXPR
376 && compare_tree_int (const_oprnd
, TYPE_PRECISION (*half_type
))
378 && TYPE_PRECISION (type
) == (TYPE_PRECISION (*half_type
) * 2))
380 /* CONST_OPRND is a constant of HALF_TYPE. */
381 *oprnd
= gimple_assign_rhs1 (def_stmt
);
385 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (*half_type
) * 4)
386 || !gimple_bb (def_stmt
)
387 || !flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
))
388 || !vinfo_for_stmt (def_stmt
))
391 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
392 a type 2 times bigger than HALF_TYPE. */
393 new_type
= build_nonstandard_integer_type (TYPE_PRECISION (type
) / 2,
394 TYPE_UNSIGNED (type
));
395 if ((code
== MULT_EXPR
&& !int_fits_type_p (const_oprnd
, new_type
))
396 || (code
== LSHIFT_EXPR
397 && compare_tree_int (const_oprnd
, TYPE_PRECISION (new_type
)) == 1))
400 /* Use NEW_TYPE for widening operation. */
401 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
403 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
404 /* Check if the already created pattern stmt is what we need. */
405 if (!is_gimple_assign (new_stmt
)
406 || gimple_assign_rhs_code (new_stmt
) != NOP_EXPR
407 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != new_type
)
410 VEC_safe_push (gimple
, heap
, *stmts
, def_stmt
);
411 *oprnd
= gimple_assign_lhs (new_stmt
);
415 /* Create a_T = (NEW_TYPE) a_t; */
416 *oprnd
= gimple_assign_rhs1 (def_stmt
);
417 tmp
= create_tmp_var (new_type
, NULL
);
418 add_referenced_var (tmp
);
419 new_oprnd
= make_ssa_name (tmp
, NULL
);
420 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
, *oprnd
,
422 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
423 VEC_safe_push (gimple
, heap
, *stmts
, def_stmt
);
427 *half_type
= new_type
;
432 /* Function vect_recog_widen_mult_pattern
434 Try to find the following pattern:
437 TYPE a_T, b_T, prod_T;
443 S5 prod_T = a_T * b_T;
445 where type 'TYPE' is at least double the size of type 'type'.
447 Also detect unsgigned cases:
449 unsigned type a_t, b_t;
450 unsigned TYPE u_prod_T;
451 TYPE a_T, b_T, prod_T;
457 S5 prod_T = a_T * b_T;
458 S6 u_prod_T = (unsigned TYPE) prod_T;
460 and multiplication by constants:
467 S5 prod_T = a_T * CONST;
469 A special case of multiplication by constants is when 'TYPE' is 4 times
470 bigger than 'type', but CONST fits an intermediate type 2 times smaller
471 than 'TYPE'. In that case we create an additional pattern stmt for S3
472 to create a variable of the intermediate type, and perform widen-mult
473 on the intermediate type as well:
477 TYPE a_T, prod_T, prod_T';
481 '--> a_it = (interm_type) a_t;
482 S5 prod_T = a_T * CONST;
483 '--> prod_T' = a_it w* CONST;
487 * STMTS: Contains a stmt from which the pattern search begins. In the
488 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
489 is detected. In case of unsigned widen-mult, the original stmt (S5) is
490 replaced with S6 in STMTS. In case of multiplication by a constant
491 of an intermediate type (the last case above), STMTS also contains S3
492 (inserted before S5).
496 * TYPE_IN: The type of the input arguments to the pattern.
498 * TYPE_OUT: The type of the output of this pattern.
500 * Return value: A new stmt that will be used to replace the sequence of
501 stmts that constitute the pattern. In this case it will be:
502 WIDEN_MULT <a_t, b_t>
506 vect_recog_widen_mult_pattern (VEC (gimple
, heap
) **stmts
,
507 tree
*type_in
, tree
*type_out
)
509 gimple last_stmt
= VEC_pop (gimple
, *stmts
);
510 gimple def_stmt0
, def_stmt1
;
512 tree type
, half_type0
, half_type1
;
514 tree vectype
, vectype_out
= NULL_TREE
;
517 enum tree_code dummy_code
;
519 VEC (tree
, heap
) *dummy_vec
;
522 if (!is_gimple_assign (last_stmt
))
525 type
= gimple_expr_type (last_stmt
);
527 /* Starting from LAST_STMT, follow the defs of its uses in search
528 of the above pattern. */
530 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
533 oprnd0
= gimple_assign_rhs1 (last_stmt
);
534 oprnd1
= gimple_assign_rhs2 (last_stmt
);
535 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
536 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
539 /* Check argument 0. */
540 if (!widened_name_p (oprnd0
, last_stmt
, &half_type0
, &def_stmt0
, false))
542 /* Check argument 1. */
543 op1_ok
= widened_name_p (oprnd1
, last_stmt
, &half_type1
, &def_stmt1
, false);
547 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
548 oprnd1
= gimple_assign_rhs1 (def_stmt1
);
552 if (TREE_CODE (oprnd1
) == INTEGER_CST
553 && TREE_CODE (half_type0
) == INTEGER_TYPE
554 && vect_handle_widen_op_by_const (last_stmt
, MULT_EXPR
, oprnd1
,
555 &oprnd0
, stmts
, type
,
556 &half_type0
, def_stmt0
))
557 half_type1
= half_type0
;
562 /* Handle unsigned case. Look for
563 S6 u_prod_T = (unsigned TYPE) prod_T;
564 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
565 if (TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type0
))
567 tree lhs
= gimple_assign_lhs (last_stmt
), use_lhs
;
568 imm_use_iterator imm_iter
;
571 gimple use_stmt
= NULL
;
574 if (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (half_type1
))
577 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
579 if (is_gimple_debug (USE_STMT (use_p
)))
581 use_stmt
= USE_STMT (use_p
);
585 if (nuses
!= 1 || !is_gimple_assign (use_stmt
)
586 || gimple_assign_rhs_code (use_stmt
) != NOP_EXPR
)
589 use_lhs
= gimple_assign_lhs (use_stmt
);
590 use_type
= TREE_TYPE (use_lhs
);
591 if (!INTEGRAL_TYPE_P (use_type
)
592 || (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (use_type
))
593 || (TYPE_PRECISION (type
) != TYPE_PRECISION (use_type
)))
597 last_stmt
= use_stmt
;
600 if (!types_compatible_p (half_type0
, half_type1
))
603 /* Pattern detected. */
604 if (vect_print_dump_info (REPORT_DETAILS
))
605 fprintf (vect_dump
, "vect_recog_widen_mult_pattern: detected: ");
607 /* Check target support */
608 vectype
= get_vectype_for_scalar_type (half_type0
);
609 vectype_out
= get_vectype_for_scalar_type (type
);
612 || !supportable_widening_operation (WIDEN_MULT_EXPR
, last_stmt
,
613 vectype_out
, vectype
,
614 &dummy
, &dummy
, &dummy_code
,
615 &dummy_code
, &dummy_int
, &dummy_vec
))
619 *type_out
= vectype_out
;
621 /* Pattern supported. Create a stmt to be used to replace the pattern: */
622 var
= vect_recog_temp_ssa_var (type
, NULL
);
623 pattern_stmt
= gimple_build_assign_with_ops (WIDEN_MULT_EXPR
, var
, oprnd0
,
626 if (vect_print_dump_info (REPORT_DETAILS
))
627 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
629 VEC_safe_push (gimple
, heap
, *stmts
, last_stmt
);
634 /* Function vect_recog_pow_pattern
636 Try to find the following pattern:
640 with POW being one of pow, powf, powi, powif and N being
645 * LAST_STMT: A stmt from which the pattern search begins.
649 * TYPE_IN: The type of the input arguments to the pattern.
651 * TYPE_OUT: The type of the output of this pattern.
653 * Return value: A new stmt that will be used to replace the sequence of
654 stmts that constitute the pattern. In this case it will be:
661 vect_recog_pow_pattern (VEC (gimple
, heap
) **stmts
, tree
*type_in
,
664 gimple last_stmt
= VEC_index (gimple
, *stmts
, 0);
665 tree fn
, base
, exp
= NULL
;
669 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
672 fn
= gimple_call_fndecl (last_stmt
);
673 if (fn
== NULL_TREE
|| DECL_BUILT_IN_CLASS (fn
) != BUILT_IN_NORMAL
)
676 switch (DECL_FUNCTION_CODE (fn
))
682 base
= gimple_call_arg (last_stmt
, 0);
683 exp
= gimple_call_arg (last_stmt
, 1);
684 if (TREE_CODE (exp
) != REAL_CST
685 && TREE_CODE (exp
) != INTEGER_CST
)
693 /* We now have a pow or powi builtin function call with a constant
696 *type_out
= NULL_TREE
;
698 /* Catch squaring. */
699 if ((host_integerp (exp
, 0)
700 && tree_low_cst (exp
, 0) == 2)
701 || (TREE_CODE (exp
) == REAL_CST
702 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconst2
)))
704 *type_in
= TREE_TYPE (base
);
706 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
707 stmt
= gimple_build_assign_with_ops (MULT_EXPR
, var
, base
, base
);
711 /* Catch square root. */
712 if (TREE_CODE (exp
) == REAL_CST
713 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconsthalf
))
715 tree newfn
= mathfn_built_in (TREE_TYPE (base
), BUILT_IN_SQRT
);
716 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (base
));
719 gimple stmt
= gimple_build_call (newfn
, 1, base
);
720 if (vectorizable_function (stmt
, *type_in
, *type_in
)
723 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
724 gimple_call_set_lhs (stmt
, var
);
734 /* Function vect_recog_widen_sum_pattern
736 Try to find the following pattern:
739 TYPE x_T, sum = init;
741 sum_0 = phi <init, sum_1>
744 S3 sum_1 = x_T + sum_0;
746 where type 'TYPE' is at least double the size of type 'type', i.e - we're
747 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
748 a special case of a reduction computation.
752 * LAST_STMT: A stmt from which the pattern search begins. In the example,
753 when this function is called with S3, the pattern {S2,S3} will be detected.
757 * TYPE_IN: The type of the input arguments to the pattern.
759 * TYPE_OUT: The type of the output of this pattern.
761 * Return value: A new stmt that will be used to replace the sequence of
762 stmts that constitute the pattern. In this case it will be:
763 WIDEN_SUM <x_t, sum_0>
765 Note: The widening-sum idiom is a widening reduction pattern that is
766 vectorized without preserving all the intermediate results. It
767 produces only N/2 (widened) results (by summing up pairs of
768 intermediate results) rather than all N results. Therefore, we
769 cannot allow this pattern when we want to get all the results and in
770 the correct order (as is the case when this computation is in an
771 inner-loop nested in an outer-loop that us being vectorized). */
774 vect_recog_widen_sum_pattern (VEC (gimple
, heap
) **stmts
, tree
*type_in
,
777 gimple stmt
, last_stmt
= VEC_index (gimple
, *stmts
, 0);
779 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
780 tree type
, half_type
;
782 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
783 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
786 if (!is_gimple_assign (last_stmt
))
789 type
= gimple_expr_type (last_stmt
);
791 /* Look for the following pattern
794 In which DX is at least double the size of X, and sum_1 has been
795 recognized as a reduction variable.
798 /* Starting from LAST_STMT, follow the defs of its uses in search
799 of the above pattern. */
801 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
804 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
)
807 oprnd0
= gimple_assign_rhs1 (last_stmt
);
808 oprnd1
= gimple_assign_rhs2 (last_stmt
);
809 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
810 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
813 /* So far so good. Since last_stmt was detected as a (summation) reduction,
814 we know that oprnd1 is the reduction variable (defined by a loop-header
815 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
816 Left to check that oprnd0 is defined by a cast from type 'type' to type
819 if (!widened_name_p (oprnd0
, last_stmt
, &half_type
, &stmt
, true))
822 oprnd0
= gimple_assign_rhs1 (stmt
);
823 *type_in
= half_type
;
826 /* Pattern detected. Create a stmt to be used to replace the pattern: */
827 var
= vect_recog_temp_ssa_var (type
, NULL
);
828 pattern_stmt
= gimple_build_assign_with_ops (WIDEN_SUM_EXPR
, var
,
831 if (vect_print_dump_info (REPORT_DETAILS
))
833 fprintf (vect_dump
, "vect_recog_widen_sum_pattern: detected: ");
834 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
837 /* We don't allow changing the order of the computation in the inner-loop
838 when doing outer-loop vectorization. */
839 gcc_assert (!nested_in_vect_loop_p (loop
, last_stmt
));
845 /* Return TRUE if the operation in STMT can be performed on a smaller type.
848 STMT - a statement to check.
849 DEF - we support operations with two operands, one of which is constant.
850 The other operand can be defined by a demotion operation, or by a
851 previous statement in a sequence of over-promoted operations. In the
852 later case DEF is used to replace that operand. (It is defined by a
853 pattern statement we created for the previous statement in the
857 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
858 NULL, it's the type of DEF.
859 STMTS - additional pattern statements. If a pattern statement (type
860 conversion) is created in this function, its original statement is
864 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
865 operands to use in the new pattern statement for STMT (will be created
866 in vect_recog_over_widening_pattern ()).
867 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
868 statements for STMT: the first one is a type promotion and the second
869 one is the operation itself. We return the type promotion statement
870 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_STMT of
871 the second pattern statement. */
874 vect_operation_fits_smaller_type (gimple stmt
, tree def
, tree
*new_type
,
875 tree
*op0
, tree
*op1
, gimple
*new_def_stmt
,
876 VEC (gimple
, heap
) **stmts
)
879 tree const_oprnd
, oprnd
;
880 tree interm_type
= NULL_TREE
, half_type
, tmp
, new_oprnd
, type
;
881 gimple def_stmt
, new_stmt
;
883 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt
));
884 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
886 *new_def_stmt
= NULL
;
888 if (!is_gimple_assign (stmt
))
891 code
= gimple_assign_rhs_code (stmt
);
892 if (code
!= LSHIFT_EXPR
&& code
!= RSHIFT_EXPR
893 && code
!= BIT_IOR_EXPR
&& code
!= BIT_XOR_EXPR
&& code
!= BIT_AND_EXPR
)
896 oprnd
= gimple_assign_rhs1 (stmt
);
897 const_oprnd
= gimple_assign_rhs2 (stmt
);
898 type
= gimple_expr_type (stmt
);
900 if (TREE_CODE (oprnd
) != SSA_NAME
901 || TREE_CODE (const_oprnd
) != INTEGER_CST
)
904 /* If we are in the middle of a sequence, we use DEF from a previous
905 statement. Otherwise, OPRND has to be a result of type promotion. */
908 half_type
= *new_type
;
914 if (!widened_name_p (oprnd
, stmt
, &half_type
, &def_stmt
, false)
915 || !gimple_bb (def_stmt
)
916 || !flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
))
917 || !vinfo_for_stmt (def_stmt
))
921 /* Can we perform the operation on a smaller type? */
927 if (!int_fits_type_p (const_oprnd
, half_type
))
929 /* HALF_TYPE is not enough. Try a bigger type if possible. */
930 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
933 interm_type
= build_nonstandard_integer_type (
934 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
935 if (!int_fits_type_p (const_oprnd
, interm_type
))
942 /* Try intermediate type - HALF_TYPE is not enough for sure. */
943 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
946 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
947 (e.g., if the original value was char, the shift amount is at most 8
948 if we want to use short). */
949 if (compare_tree_int (const_oprnd
, TYPE_PRECISION (half_type
)) == 1)
952 interm_type
= build_nonstandard_integer_type (
953 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
955 if (!vect_supportable_shift (code
, interm_type
))
961 if (vect_supportable_shift (code
, half_type
))
964 /* Try intermediate type - HALF_TYPE is not supported. */
965 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
968 interm_type
= build_nonstandard_integer_type (
969 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
971 if (!vect_supportable_shift (code
, interm_type
))
980 /* There are four possible cases:
981 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
982 the first statement in the sequence)
983 a. The original, HALF_TYPE, is not enough - we replace the promotion
984 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
985 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
987 2. OPRND is defined by a pattern statement we created.
988 a. Its type is not sufficient for the operation, we create a new stmt:
989 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
990 this statement in NEW_DEF_STMT, and it is later put in
991 STMT_VINFO_PATTERN_DEF_STMT of the pattern statement for STMT.
992 b. OPRND is good to use in the new statement. */
997 /* Replace the original type conversion HALF_TYPE->TYPE with
998 HALF_TYPE->INTERM_TYPE. */
999 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
1001 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
1002 /* Check if the already created pattern stmt is what we need. */
1003 if (!is_gimple_assign (new_stmt
)
1004 || gimple_assign_rhs_code (new_stmt
) != NOP_EXPR
1005 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != interm_type
)
1008 VEC_safe_push (gimple
, heap
, *stmts
, def_stmt
);
1009 oprnd
= gimple_assign_lhs (new_stmt
);
1013 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1014 oprnd
= gimple_assign_rhs1 (def_stmt
);
1015 tmp
= create_tmp_reg (interm_type
, NULL
);
1016 add_referenced_var (tmp
);
1017 new_oprnd
= make_ssa_name (tmp
, NULL
);
1018 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
1020 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
1021 VEC_safe_push (gimple
, heap
, *stmts
, def_stmt
);
1027 /* Retrieve the operand before the type promotion. */
1028 oprnd
= gimple_assign_rhs1 (def_stmt
);
1035 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1036 tmp
= create_tmp_reg (interm_type
, NULL
);
1037 add_referenced_var (tmp
);
1038 new_oprnd
= make_ssa_name (tmp
, NULL
);
1039 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
1042 *new_def_stmt
= new_stmt
;
1045 /* Otherwise, OPRND is already set. */
1049 *new_type
= interm_type
;
1051 *new_type
= half_type
;
1054 *op1
= fold_convert (*new_type
, const_oprnd
);
1060 /* Try to find a statement or a sequence of statements that can be performed
1064 TYPE x_T, res0_T, res1_T;
1067 S2 x_T = (TYPE) x_t;
1068 S3 res0_T = op (x_T, C0);
1069 S4 res1_T = op (res0_T, C1);
1070 S5 ... = () res1_T; - type demotion
1072 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1074 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1075 be 'type' or some intermediate type. For now, we expect S5 to be a type
1076 demotion operation. We also check that S3 and S4 have only one use. */
1079 vect_recog_over_widening_pattern (VEC (gimple
, heap
) **stmts
,
1080 tree
*type_in
, tree
*type_out
)
1082 gimple stmt
= VEC_pop (gimple
, *stmts
);
1083 gimple pattern_stmt
= NULL
, new_def_stmt
, prev_stmt
= NULL
, use_stmt
= NULL
;
1084 tree op0
, op1
, vectype
= NULL_TREE
, lhs
, use_lhs
, use_type
;
1085 imm_use_iterator imm_iter
;
1086 use_operand_p use_p
;
1088 tree var
= NULL_TREE
, new_type
= NULL_TREE
, tmp
, new_oprnd
;
1090 struct loop
*loop
= (gimple_bb (stmt
))->loop_father
;
1095 if (!vinfo_for_stmt (stmt
)
1096 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt
)))
1099 new_def_stmt
= NULL
;
1100 if (!vect_operation_fits_smaller_type (stmt
, var
, &new_type
,
1101 &op0
, &op1
, &new_def_stmt
,
1110 /* STMT can be performed on a smaller type. Check its uses. */
1111 lhs
= gimple_assign_lhs (stmt
);
1113 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
1115 if (is_gimple_debug (USE_STMT (use_p
)))
1117 use_stmt
= USE_STMT (use_p
);
1121 if (nuses
!= 1 || !is_gimple_assign (use_stmt
)
1122 || !gimple_bb (use_stmt
)
1123 || !flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
1126 /* Create pattern statement for STMT. */
1127 vectype
= get_vectype_for_scalar_type (new_type
);
1131 /* We want to collect all the statements for which we create pattern
1132 statetments, except for the case when the last statement in the
1133 sequence doesn't have a corresponding pattern statement. In such
1134 case we associate the last pattern statement with the last statement
1135 in the sequence. Therefore, we only add the original statement to
1136 the list if we know that it is not the last. */
1138 VEC_safe_push (gimple
, heap
, *stmts
, prev_stmt
);
1140 var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1142 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt
), var
,
1144 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
1145 STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (stmt
)) = new_def_stmt
;
1147 if (vect_print_dump_info (REPORT_DETAILS
))
1149 fprintf (vect_dump
, "created pattern stmt: ");
1150 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
1159 /* We got a sequence. We expect it to end with a type demotion operation.
1160 Otherwise, we quit (for now). There are three possible cases: the
1161 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1162 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1163 NEW_TYPE differs (we create a new conversion statement). */
1164 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
1166 use_lhs
= gimple_assign_lhs (use_stmt
);
1167 use_type
= TREE_TYPE (use_lhs
);
1168 /* Support only type promotion or signedess change. */
1169 if (!INTEGRAL_TYPE_P (use_type
)
1170 || TYPE_PRECISION (new_type
) > TYPE_PRECISION (use_type
))
1173 if (TYPE_UNSIGNED (new_type
) != TYPE_UNSIGNED (use_type
)
1174 || TYPE_PRECISION (new_type
) != TYPE_PRECISION (use_type
))
1176 /* Create NEW_TYPE->USE_TYPE conversion. */
1177 tmp
= create_tmp_reg (use_type
, NULL
);
1178 add_referenced_var (tmp
);
1179 new_oprnd
= make_ssa_name (tmp
, NULL
);
1180 pattern_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
1182 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt
)) = pattern_stmt
;
1184 *type_in
= get_vectype_for_scalar_type (new_type
);
1185 *type_out
= get_vectype_for_scalar_type (use_type
);
1187 /* We created a pattern statement for the last statement in the
1188 sequence, so we don't need to associate it with the pattern
1189 statement created for PREV_STMT. Therefore, we add PREV_STMT
1190 to the list in order to mark it later in vect_pattern_recog_1. */
1192 VEC_safe_push (gimple
, heap
, *stmts
, prev_stmt
);
1197 STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (use_stmt
))
1198 = STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (prev_stmt
));
1201 *type_out
= NULL_TREE
;
1204 VEC_safe_push (gimple
, heap
, *stmts
, use_stmt
);
1207 /* TODO: support general case, create a conversion to the correct type. */
1210 /* Pattern detected. */
1211 if (vect_print_dump_info (REPORT_DETAILS
))
1213 fprintf (vect_dump
, "vect_recog_over_widening_pattern: detected: ");
1214 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
1217 return pattern_stmt
;
1220 /* Detect widening shift pattern:
1226 S2 a_T = (TYPE) a_t;
1227 S3 res_T = a_T << CONST;
1229 where type 'TYPE' is at least double the size of type 'type'.
1231 Also detect unsigned cases:
1234 unsigned TYPE u_res_T;
1238 S2 a_T = (TYPE) a_t;
1239 S3 res_T = a_T << CONST;
1240 S4 u_res_T = (unsigned TYPE) res_T;
1242 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1243 create an additional pattern stmt for S2 to create a variable of an
1244 intermediate type, and perform widen-shift on the intermediate type:
1248 TYPE a_T, res_T, res_T';
1251 S2 a_T = (TYPE) a_t;
1252 '--> a_it = (interm_type) a_t;
1253 S3 res_T = a_T << CONST;
1254 '--> res_T' = a_it <<* CONST;
1258 * STMTS: Contains a stmt from which the pattern search begins.
1259 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1260 in STMTS. When an intermediate type is used and a pattern statement is
1261 created for S2, we also put S2 here (before S3).
1265 * TYPE_IN: The type of the input arguments to the pattern.
1267 * TYPE_OUT: The type of the output of this pattern.
1269 * Return value: A new stmt that will be used to replace the sequence of
1270 stmts that constitute the pattern. In this case it will be:
1271 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1274 vect_recog_widen_shift_pattern (VEC (gimple
, heap
) **stmts
,
1275 tree
*type_in
, tree
*type_out
)
1277 gimple last_stmt
= VEC_pop (gimple
, *stmts
);
1279 tree oprnd0
, oprnd1
;
1280 tree type
, half_type0
;
1281 gimple pattern_stmt
, orig_stmt
= NULL
;
1282 tree vectype
, vectype_out
= NULL_TREE
;
1285 enum tree_code dummy_code
;
1287 VEC (tree
, heap
) * dummy_vec
;
1288 gimple use_stmt
= NULL
;
1289 bool over_widen
= false;
1291 if (!is_gimple_assign (last_stmt
) || !vinfo_for_stmt (last_stmt
))
1294 orig_stmt
= last_stmt
;
1295 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt
)))
1297 /* This statement was also detected as over-widening operation (it can't
1298 be any other pattern, because only over-widening detects shifts).
1299 LAST_STMT is the final type demotion statement, but its related
1300 statement is shift. We analyze the related statement to catch cases:
1307 S1 a_T = (TYPE) a_t;
1308 S2 res_T = a_T << CONST;
1309 S3 res = (itype)res_T;
1311 (size of type * 2 <= size of itype
1312 and size of itype * 2 <= size of TYPE)
1314 code after over-widening pattern detection:
1316 S1 a_T = (TYPE) a_t;
1317 --> a_it = (itype) a_t;
1318 S2 res_T = a_T << CONST;
1319 S3 res = (itype)res_T; <--- LAST_STMT
1320 --> res = a_it << CONST;
1324 S1 a_T = (TYPE) a_t;
1325 --> a_it = (itype) a_t; - redundant
1326 S2 res_T = a_T << CONST;
1327 S3 res = (itype)res_T;
1328 --> res = a_t w<< CONST;
1330 i.e., we replace the three statements with res = a_t w<< CONST. */
1331 last_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_stmt
));
1335 if (gimple_assign_rhs_code (last_stmt
) != LSHIFT_EXPR
)
1338 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1339 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1340 if (TREE_CODE (oprnd0
) != SSA_NAME
|| TREE_CODE (oprnd1
) != INTEGER_CST
)
1343 /* Check operand 0: it has to be defined by a type promotion. */
1344 if (!widened_name_p (oprnd0
, last_stmt
, &half_type0
, &def_stmt0
, false))
1347 /* Check operand 1: has to be positive. We check that it fits the type
1348 in vect_handle_widen_op_by_const (). */
1349 if (tree_int_cst_compare (oprnd1
, size_zero_node
) <= 0)
1352 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
1353 type
= gimple_expr_type (last_stmt
);
1355 /* Check if this a widening operation. */
1356 if (!vect_handle_widen_op_by_const (last_stmt
, LSHIFT_EXPR
, oprnd1
,
1358 type
, &half_type0
, def_stmt0
))
1361 /* Handle unsigned case. Look for
1362 S4 u_res_T = (unsigned TYPE) res_T;
1363 Use unsigned TYPE as the type for WIDEN_LSHIFT_EXPR. */
1364 if (TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type0
))
1366 tree lhs
= gimple_assign_lhs (last_stmt
), use_lhs
;
1367 imm_use_iterator imm_iter
;
1368 use_operand_p use_p
;
1374 /* In case of over-widening pattern, S4 should be ORIG_STMT itself.
1375 We check here that TYPE is the correct type for the operation,
1376 i.e., it's the type of the original result. */
1377 tree orig_type
= gimple_expr_type (orig_stmt
);
1378 if ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (orig_type
))
1379 || (TYPE_PRECISION (type
) != TYPE_PRECISION (orig_type
)))
1384 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
1386 if (is_gimple_debug (USE_STMT (use_p
)))
1388 use_stmt
= USE_STMT (use_p
);
1392 if (nuses
!= 1 || !is_gimple_assign (use_stmt
)
1393 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
1396 use_lhs
= gimple_assign_lhs (use_stmt
);
1397 use_type
= TREE_TYPE (use_lhs
);
1399 if (!INTEGRAL_TYPE_P (use_type
)
1400 || (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (use_type
))
1401 || (TYPE_PRECISION (type
) != TYPE_PRECISION (use_type
)))
1408 /* Pattern detected. */
1409 if (vect_print_dump_info (REPORT_DETAILS
))
1410 fprintf (vect_dump
, "vect_recog_widen_shift_pattern: detected: ");
1412 /* Check target support. */
1413 vectype
= get_vectype_for_scalar_type (half_type0
);
1414 vectype_out
= get_vectype_for_scalar_type (type
);
1418 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR
, last_stmt
,
1419 vectype_out
, vectype
,
1420 &dummy
, &dummy
, &dummy_code
,
1421 &dummy_code
, &dummy_int
,
1426 *type_out
= vectype_out
;
1428 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1429 var
= vect_recog_temp_ssa_var (type
, NULL
);
1431 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR
, var
, oprnd0
, oprnd1
);
1433 if (vect_print_dump_info (REPORT_DETAILS
))
1434 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
1437 last_stmt
= use_stmt
;
1439 last_stmt
= orig_stmt
;
1441 VEC_safe_push (gimple
, heap
, *stmts
, last_stmt
);
1442 return pattern_stmt
;
1445 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1453 S3 res_T = b_T op a_t;
1455 where type 'TYPE' is a type with different size than 'type',
1456 and op is <<, >> or rotate.
1461 TYPE b_T, c_T, res_T;
1464 S1 a_t = (type) c_T;
1466 S3 res_T = b_T op a_t;
1470 * STMTS: Contains a stmt from which the pattern search begins,
1471 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1472 with a shift/rotate which has same type on both operands, in the
1473 second case just b_T op c_T, in the first case with added cast
1474 from a_t to c_T in STMT_VINFO_PATTERN_DEF_STMT.
1478 * TYPE_IN: The type of the input arguments to the pattern.
1480 * TYPE_OUT: The type of the output of this pattern.
1482 * Return value: A new stmt that will be used to replace the shift/rotate
1486 vect_recog_vector_vector_shift_pattern (VEC (gimple
, heap
) **stmts
,
1487 tree
*type_in
, tree
*type_out
)
1489 gimple last_stmt
= VEC_pop (gimple
, *stmts
);
1490 tree oprnd0
, oprnd1
, lhs
, var
;
1491 gimple pattern_stmt
, def_stmt
;
1492 enum tree_code rhs_code
;
1493 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1494 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1495 enum vect_def_type dt
;
1498 if (!is_gimple_assign (last_stmt
))
1501 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1513 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
1516 lhs
= gimple_assign_lhs (last_stmt
);
1517 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1518 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1519 if (TREE_CODE (oprnd0
) != SSA_NAME
1520 || TREE_CODE (oprnd1
) != SSA_NAME
1521 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
1522 || TYPE_PRECISION (TREE_TYPE (oprnd1
))
1523 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1
)))
1524 || TYPE_PRECISION (TREE_TYPE (lhs
))
1525 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
1528 if (!vect_is_simple_use (oprnd1
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
1531 if (dt
!= vect_internal_def
)
1534 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (oprnd0
));
1535 *type_out
= *type_in
;
1536 if (*type_in
== NULL_TREE
)
1540 if (gimple_assign_cast_p (def_stmt
))
1542 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
1543 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
1544 && TYPE_PRECISION (TREE_TYPE (rhs1
))
1545 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
1549 if (def
== NULL_TREE
)
1551 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
1552 def_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, def
, oprnd1
,
1554 STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo
) = def_stmt
;
1557 /* Pattern detected. */
1558 if (vect_print_dump_info (REPORT_DETAILS
))
1559 fprintf (vect_dump
, "vect_recog_vector_vector_shift_pattern: detected: ");
1561 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1562 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
1563 pattern_stmt
= gimple_build_assign_with_ops (rhs_code
, var
, oprnd0
, def
);
1565 if (vect_print_dump_info (REPORT_DETAILS
))
1566 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
1568 VEC_safe_push (gimple
, heap
, *stmts
, last_stmt
);
1569 return pattern_stmt
;
1572 /* Function vect_recog_mixed_size_cond_pattern
1574 Try to find the following pattern:
1579 S1 a_T = x_t CMP y_t ? b_T : c_T;
1581 where type 'TYPE' is an integral type which has different size
1582 from 'type'. b_T and c_T are constants and if 'TYPE' is wider
1583 than 'type', the constants need to fit into an integer type
1584 with the same width as 'type'.
1588 * LAST_STMT: A stmt from which the pattern search begins.
1592 * TYPE_IN: The type of the input arguments to the pattern.
1594 * TYPE_OUT: The type of the output of this pattern.
1596 * Return value: A new stmt that will be used to replace the pattern.
1597 Additionally a def_stmt is added.
1599 a_it = x_t CMP y_t ? b_it : c_it;
1600 a_T = (TYPE) a_it; */
1603 vect_recog_mixed_size_cond_pattern (VEC (gimple
, heap
) **stmts
, tree
*type_in
,
1606 gimple last_stmt
= VEC_index (gimple
, *stmts
, 0);
1607 tree cond_expr
, then_clause
, else_clause
;
1608 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
), def_stmt_info
;
1609 tree type
, vectype
, comp_vectype
, itype
, vecitype
;
1610 enum machine_mode cmpmode
;
1611 gimple pattern_stmt
, def_stmt
;
1612 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1614 if (!is_gimple_assign (last_stmt
)
1615 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
1616 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
1619 cond_expr
= gimple_assign_rhs1 (last_stmt
);
1620 then_clause
= gimple_assign_rhs2 (last_stmt
);
1621 else_clause
= gimple_assign_rhs3 (last_stmt
);
1623 if (TREE_CODE (then_clause
) != INTEGER_CST
1624 || TREE_CODE (else_clause
) != INTEGER_CST
)
1627 if (!COMPARISON_CLASS_P (cond_expr
))
1631 = get_vectype_for_scalar_type (TREE_TYPE (TREE_OPERAND (cond_expr
, 0)));
1632 if (comp_vectype
== NULL_TREE
)
1635 type
= gimple_expr_type (last_stmt
);
1636 cmpmode
= GET_MODE_INNER (TYPE_MODE (comp_vectype
));
1638 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) == GET_MODE_BITSIZE (cmpmode
))
1641 vectype
= get_vectype_for_scalar_type (type
);
1642 if (vectype
== NULL_TREE
)
1645 if (expand_vec_cond_expr_p (vectype
, comp_vectype
))
1648 itype
= build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode
),
1649 TYPE_UNSIGNED (type
));
1650 if (itype
== NULL_TREE
1651 || GET_MODE_BITSIZE (TYPE_MODE (itype
)) != GET_MODE_BITSIZE (cmpmode
))
1654 vecitype
= get_vectype_for_scalar_type (itype
);
1655 if (vecitype
== NULL_TREE
)
1658 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
))
1661 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) > GET_MODE_BITSIZE (cmpmode
))
1663 if (!int_fits_type_p (then_clause
, itype
)
1664 || !int_fits_type_p (else_clause
, itype
))
1669 = gimple_build_assign_with_ops3 (COND_EXPR
,
1670 vect_recog_temp_ssa_var (itype
, NULL
),
1671 unshare_expr (cond_expr
),
1672 fold_convert (itype
, then_clause
),
1673 fold_convert (itype
, else_clause
));
1675 = gimple_build_assign_with_ops (NOP_EXPR
,
1676 vect_recog_temp_ssa_var (type
, NULL
),
1677 gimple_assign_lhs (def_stmt
), NULL_TREE
);
1679 STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo
) = def_stmt
;
1680 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
, NULL
);
1681 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
1682 STMT_VINFO_VECTYPE (def_stmt_info
) = vecitype
;
1683 *type_in
= vecitype
;
1684 *type_out
= vectype
;
1686 return pattern_stmt
;
1690 /* Helper function of vect_recog_bool_pattern. Called recursively, return
1691 true if bool VAR can be optimized that way. */
1694 check_bool_pattern (tree var
, loop_vec_info loop_vinfo
)
1697 enum vect_def_type dt
;
1699 enum tree_code rhs_code
;
1701 if (!vect_is_simple_use (var
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
1704 if (dt
!= vect_internal_def
)
1707 if (!is_gimple_assign (def_stmt
))
1710 if (!has_single_use (def
))
1713 rhs1
= gimple_assign_rhs1 (def_stmt
);
1714 rhs_code
= gimple_assign_rhs_code (def_stmt
);
1718 return check_bool_pattern (rhs1
, loop_vinfo
);
1721 if ((TYPE_PRECISION (TREE_TYPE (rhs1
)) != 1
1722 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
1723 && TREE_CODE (TREE_TYPE (rhs1
)) != BOOLEAN_TYPE
)
1725 return check_bool_pattern (rhs1
, loop_vinfo
);
1728 return check_bool_pattern (rhs1
, loop_vinfo
);
1733 if (!check_bool_pattern (rhs1
, loop_vinfo
))
1735 return check_bool_pattern (gimple_assign_rhs2 (def_stmt
), loop_vinfo
);
1738 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
1740 tree vecitype
, comp_vectype
;
1742 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
1743 if (comp_vectype
== NULL_TREE
)
1746 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
1748 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
1750 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
1751 vecitype
= get_vectype_for_scalar_type (itype
);
1752 if (vecitype
== NULL_TREE
)
1756 vecitype
= comp_vectype
;
1757 return expand_vec_cond_expr_p (vecitype
, comp_vectype
);
1764 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
1765 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
1766 to PATTERN_DEF_STMT and adding a cast as RELATED_STMT. */
1769 adjust_bool_pattern_cast (tree type
, tree var
)
1771 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (SSA_NAME_DEF_STMT (var
));
1772 gimple cast_stmt
, pattern_stmt
;
1774 gcc_assert (!STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo
));
1775 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
1776 STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo
) = pattern_stmt
;
1778 = gimple_build_assign_with_ops (NOP_EXPR
,
1779 vect_recog_temp_ssa_var (type
, NULL
),
1780 gimple_assign_lhs (pattern_stmt
),
1782 STMT_VINFO_RELATED_STMT (stmt_vinfo
) = cast_stmt
;
1783 return gimple_assign_lhs (cast_stmt
);
1787 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
1788 recursively. VAR is an SSA_NAME that should be transformed from bool
1789 to a wider integer type, OUT_TYPE is the desired final integer type of
1790 the whole pattern, TRUEVAL should be NULL unless optimizing
1791 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
1792 in the then_clause, STMTS is where statements with added pattern stmts
1793 should be pushed to. */
1796 adjust_bool_pattern (tree var
, tree out_type
, tree trueval
,
1797 VEC (gimple
, heap
) **stmts
)
1799 gimple stmt
= SSA_NAME_DEF_STMT (var
);
1800 enum tree_code rhs_code
, def_rhs_code
;
1801 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
1803 gimple pattern_stmt
, def_stmt
;
1805 rhs1
= gimple_assign_rhs1 (stmt
);
1806 rhs2
= gimple_assign_rhs2 (stmt
);
1807 rhs_code
= gimple_assign_rhs_code (stmt
);
1808 loc
= gimple_location (stmt
);
1813 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
1814 itype
= TREE_TYPE (irhs1
);
1816 = gimple_build_assign_with_ops (SSA_NAME
,
1817 vect_recog_temp_ssa_var (itype
, NULL
),
1822 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
1823 itype
= TREE_TYPE (irhs1
);
1825 = gimple_build_assign_with_ops (BIT_XOR_EXPR
,
1826 vect_recog_temp_ssa_var (itype
, NULL
),
1827 irhs1
, build_int_cst (itype
, 1));
1831 /* Try to optimize x = y & (a < b ? 1 : 0); into
1832 x = (a < b ? y : 0);
1838 S1 a_b = x1 CMP1 y1;
1839 S2 b_b = x2 CMP2 y2;
1841 S4 d_T = (TYPE) c_b;
1843 we would normally emit:
1845 S1' a_T = x1 CMP1 y1 ? 1 : 0;
1846 S2' b_T = x2 CMP2 y2 ? 1 : 0;
1847 S3' c_T = a_T & b_T;
1850 but we can save one stmt by using the
1851 result of one of the COND_EXPRs in the other COND_EXPR and leave
1852 BIT_AND_EXPR stmt out:
1854 S1' a_T = x1 CMP1 y1 ? 1 : 0;
1855 S3' c_T = x2 CMP2 y2 ? a_T : 0;
1858 At least when VEC_COND_EXPR is implemented using masks
1859 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
1860 computes the comparison masks and ands it, in one case with
1861 all ones vector, in the other case with a vector register.
1862 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
1863 often more expensive. */
1864 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
1865 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
1866 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
1868 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
1869 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
1870 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
1871 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
1874 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
1875 irhs2
= adjust_bool_pattern (rhs2
, out_type
, irhs1
, stmts
);
1876 tstmt
= VEC_pop (gimple
, *stmts
);
1877 gcc_assert (tstmt
== def_stmt
);
1878 VEC_quick_push (gimple
, *stmts
, stmt
);
1879 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
1880 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
1881 gcc_assert (!STMT_VINFO_PATTERN_DEF_STMT (stmt_def_vinfo
));
1882 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
1886 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
1889 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
1890 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
1891 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
1893 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
1894 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
1895 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
1896 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
1899 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
1900 irhs1
= adjust_bool_pattern (rhs1
, out_type
, irhs2
, stmts
);
1901 tstmt
= VEC_pop (gimple
, *stmts
);
1902 gcc_assert (tstmt
== def_stmt
);
1903 VEC_quick_push (gimple
, *stmts
, stmt
);
1904 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
1905 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
1906 gcc_assert (!STMT_VINFO_PATTERN_DEF_STMT (stmt_def_vinfo
));
1907 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
1911 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
1917 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
1918 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
1920 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
1921 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
1923 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
1924 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
1925 int out_prec
= TYPE_PRECISION (out_type
);
1926 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
1927 irhs2
= adjust_bool_pattern_cast (TREE_TYPE (irhs1
), rhs2
);
1928 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
1929 irhs1
= adjust_bool_pattern_cast (TREE_TYPE (irhs2
), rhs1
);
1932 irhs1
= adjust_bool_pattern_cast (out_type
, rhs1
);
1933 irhs2
= adjust_bool_pattern_cast (out_type
, rhs2
);
1936 itype
= TREE_TYPE (irhs1
);
1938 = gimple_build_assign_with_ops (rhs_code
,
1939 vect_recog_temp_ssa_var (itype
, NULL
),
1944 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
1945 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
1946 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
1948 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
1950 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
1953 itype
= TREE_TYPE (rhs1
);
1954 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
1955 if (trueval
== NULL_TREE
)
1956 trueval
= build_int_cst (itype
, 1);
1958 gcc_checking_assert (useless_type_conversion_p (itype
,
1959 TREE_TYPE (trueval
)));
1961 = gimple_build_assign_with_ops3 (COND_EXPR
,
1962 vect_recog_temp_ssa_var (itype
, NULL
),
1964 build_int_cst (itype
, 0));
1968 VEC_safe_push (gimple
, heap
, *stmts
, stmt
);
1969 gimple_set_location (pattern_stmt
, loc
);
1970 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
1971 return gimple_assign_lhs (pattern_stmt
);
1975 /* Function vect_recog_bool_pattern
1977 Try to find pattern like following:
1979 bool a_b, b_b, c_b, d_b, e_b;
1982 S1 a_b = x1 CMP1 y1;
1983 S2 b_b = x2 CMP2 y2;
1985 S4 d_b = x3 CMP3 y3;
1987 S6 f_T = (TYPE) e_b;
1989 where type 'TYPE' is an integral type.
1993 * LAST_STMT: A stmt at the end from which the pattern
1994 search begins, i.e. cast of a bool to
1999 * TYPE_IN: The type of the input arguments to the pattern.
2001 * TYPE_OUT: The type of the output of this pattern.
2003 * Return value: A new stmt that will be used to replace the pattern.
2005 Assuming size of TYPE is the same as size of all comparisons
2006 (otherwise some casts would be added where needed), the above
2007 sequence we create related pattern stmts:
2008 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2009 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2010 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2011 S5' e_T = c_T | d_T;
2014 Instead of the above S3' we could emit:
2015 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2016 S3' c_T = a_T | b_T;
2017 but the above is more efficient. */
2020 vect_recog_bool_pattern (VEC (gimple
, heap
) **stmts
, tree
*type_in
,
2023 gimple last_stmt
= VEC_pop (gimple
, *stmts
);
2024 enum tree_code rhs_code
;
2025 tree var
, lhs
, rhs
, vectype
;
2026 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2027 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2028 gimple pattern_stmt
;
2030 if (!is_gimple_assign (last_stmt
))
2033 var
= gimple_assign_rhs1 (last_stmt
);
2034 lhs
= gimple_assign_lhs (last_stmt
);
2036 if ((TYPE_PRECISION (TREE_TYPE (var
)) != 1
2037 || !TYPE_UNSIGNED (TREE_TYPE (var
)))
2038 && TREE_CODE (TREE_TYPE (var
)) != BOOLEAN_TYPE
)
2041 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2042 if (CONVERT_EXPR_CODE_P (rhs_code
))
2044 if (TREE_CODE (TREE_TYPE (lhs
)) != INTEGER_TYPE
)
2046 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
2047 if (vectype
== NULL_TREE
)
2050 if (!check_bool_pattern (var
, loop_vinfo
))
2053 rhs
= adjust_bool_pattern (var
, TREE_TYPE (lhs
), NULL_TREE
, stmts
);
2054 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
2055 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
2057 = gimple_build_assign_with_ops (SSA_NAME
, lhs
, rhs
, NULL_TREE
);
2060 = gimple_build_assign_with_ops (NOP_EXPR
, lhs
, rhs
, NULL_TREE
);
2061 *type_out
= vectype
;
2063 VEC_safe_push (gimple
, heap
, *stmts
, last_stmt
);
2064 return pattern_stmt
;
2066 else if (rhs_code
== SSA_NAME
2067 && STMT_VINFO_DATA_REF (stmt_vinfo
))
2069 stmt_vec_info pattern_stmt_info
;
2070 vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
2071 gcc_assert (vectype
!= NULL_TREE
);
2072 if (!check_bool_pattern (var
, loop_vinfo
))
2075 rhs
= adjust_bool_pattern (var
, TREE_TYPE (vectype
), NULL_TREE
, stmts
);
2076 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
2077 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
2079 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
2081 = gimple_build_assign_with_ops (NOP_EXPR
, rhs2
, rhs
, NULL_TREE
);
2082 STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo
) = cast_stmt
;
2086 = gimple_build_assign_with_ops (SSA_NAME
, lhs
, rhs
, NULL_TREE
);
2087 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
, NULL
);
2088 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
2089 STMT_VINFO_DATA_REF (pattern_stmt_info
)
2090 = STMT_VINFO_DATA_REF (stmt_vinfo
);
2091 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info
)
2092 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo
);
2093 STMT_VINFO_DR_INIT (pattern_stmt_info
) = STMT_VINFO_DR_INIT (stmt_vinfo
);
2094 STMT_VINFO_DR_OFFSET (pattern_stmt_info
)
2095 = STMT_VINFO_DR_OFFSET (stmt_vinfo
);
2096 STMT_VINFO_DR_STEP (pattern_stmt_info
) = STMT_VINFO_DR_STEP (stmt_vinfo
);
2097 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info
)
2098 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo
);
2099 *type_out
= vectype
;
2101 VEC_safe_push (gimple
, heap
, *stmts
, last_stmt
);
2102 return pattern_stmt
;
2109 /* Mark statements that are involved in a pattern. */
2112 vect_mark_pattern_stmts (gimple orig_stmt
, gimple pattern_stmt
,
2113 tree pattern_vectype
)
2115 stmt_vec_info pattern_stmt_info
, def_stmt_info
;
2116 stmt_vec_info orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
2117 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (orig_stmt_info
);
2120 pattern_stmt_info
= vinfo_for_stmt (pattern_stmt
);
2121 if (pattern_stmt_info
== NULL
)
2123 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
, NULL
);
2124 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
2126 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt
));
2128 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt
;
2129 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
2130 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
2131 STMT_VINFO_VECTYPE (pattern_stmt_info
) = pattern_vectype
;
2132 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
2133 STMT_VINFO_RELATED_STMT (orig_stmt_info
) = pattern_stmt
;
2134 STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info
)
2135 = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info
);
2136 if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info
))
2138 def_stmt
= STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info
);
2139 def_stmt_info
= vinfo_for_stmt (def_stmt
);
2140 if (def_stmt_info
== NULL
)
2142 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
, NULL
);
2143 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
2145 gimple_set_bb (def_stmt
, gimple_bb (orig_stmt
));
2146 STMT_VINFO_RELATED_STMT (def_stmt_info
) = orig_stmt
;
2147 STMT_VINFO_DEF_TYPE (def_stmt_info
)
2148 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
2149 if (STMT_VINFO_VECTYPE (def_stmt_info
) == NULL_TREE
)
2150 STMT_VINFO_VECTYPE (def_stmt_info
) = pattern_vectype
;
2154 /* Function vect_pattern_recog_1
2157 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
2158 computation pattern.
2159 STMT: A stmt from which the pattern search should start.
2161 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
2162 expression that computes the same functionality and can be used to
2163 replace the sequence of stmts that are involved in the pattern.
2166 This function checks if the expression returned by PATTERN_RECOG_FUNC is
2167 supported in vector form by the target. We use 'TYPE_IN' to obtain the
2168 relevant vector type. If 'TYPE_IN' is already a vector type, then this
2169 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
2170 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
2171 to the available target pattern.
2173 This function also does some bookkeeping, as explained in the documentation
2174 for vect_recog_pattern. */
2177 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func
,
2178 gimple_stmt_iterator si
,
2179 VEC (gimple
, heap
) **stmts_to_replace
)
2181 gimple stmt
= gsi_stmt (si
), pattern_stmt
;
2182 stmt_vec_info stmt_info
;
2183 loop_vec_info loop_vinfo
;
2184 tree pattern_vectype
;
2185 tree type_in
, type_out
;
2186 enum tree_code code
;
2190 VEC_truncate (gimple
, *stmts_to_replace
, 0);
2191 VEC_quick_push (gimple
, *stmts_to_replace
, stmt
);
2192 pattern_stmt
= (* vect_recog_func
) (stmts_to_replace
, &type_in
, &type_out
);
2196 stmt
= VEC_last (gimple
, *stmts_to_replace
);
2197 stmt_info
= vinfo_for_stmt (stmt
);
2198 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2200 if (VECTOR_MODE_P (TYPE_MODE (type_in
)))
2202 /* No need to check target support (already checked by the pattern
2203 recognition function). */
2204 pattern_vectype
= type_out
? type_out
: type_in
;
2208 enum machine_mode vec_mode
;
2209 enum insn_code icode
;
2212 /* Check target support */
2213 type_in
= get_vectype_for_scalar_type (type_in
);
2217 type_out
= get_vectype_for_scalar_type (type_out
);
2222 pattern_vectype
= type_out
;
2224 if (is_gimple_assign (pattern_stmt
))
2225 code
= gimple_assign_rhs_code (pattern_stmt
);
2228 gcc_assert (is_gimple_call (pattern_stmt
));
2232 optab
= optab_for_tree_code (code
, type_in
, optab_default
);
2233 vec_mode
= TYPE_MODE (type_in
);
2235 || (icode
= optab_handler (optab
, vec_mode
)) == CODE_FOR_nothing
2236 || (insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (type_out
)))
2240 /* Found a vectorizable pattern. */
2241 if (vect_print_dump_info (REPORT_DETAILS
))
2243 fprintf (vect_dump
, "pattern recognized: ");
2244 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
2247 /* Mark the stmts that are involved in the pattern. */
2248 vect_mark_pattern_stmts (stmt
, pattern_stmt
, pattern_vectype
);
2250 /* Patterns cannot be vectorized using SLP, because they change the order of
2252 FOR_EACH_VEC_ELT (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
, next
)
2254 VEC_ordered_remove (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
);
2256 /* It is possible that additional pattern stmts are created and inserted in
2257 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
2258 relevant statements. */
2259 for (i
= 0; VEC_iterate (gimple
, *stmts_to_replace
, i
, stmt
)
2260 && (unsigned) i
< (VEC_length (gimple
, *stmts_to_replace
) - 1);
2263 stmt_info
= vinfo_for_stmt (stmt
);
2264 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2265 if (vect_print_dump_info (REPORT_DETAILS
))
2267 fprintf (vect_dump
, "additional pattern stmt: ");
2268 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
2271 vect_mark_pattern_stmts (stmt
, pattern_stmt
, NULL_TREE
);
2276 /* Function vect_pattern_recog
2279 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
2282 Output - for each computation idiom that is detected we create a new stmt
2283 that provides the same functionality and that can be vectorized. We
2284 also record some information in the struct_stmt_info of the relevant
2285 stmts, as explained below:
2287 At the entry to this function we have the following stmts, with the
2288 following initial value in the STMT_VINFO fields:
2290 stmt in_pattern_p related_stmt vec_stmt
2291 S1: a_i = .... - - -
2292 S2: a_2 = ..use(a_i).. - - -
2293 S3: a_1 = ..use(a_2).. - - -
2294 S4: a_0 = ..use(a_1).. - - -
2295 S5: ... = ..use(a_0).. - - -
2297 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
2298 represented by a single stmt. We then:
2299 - create a new stmt S6 equivalent to the pattern (the stmt is not
2300 inserted into the code)
2301 - fill in the STMT_VINFO fields as follows:
2303 in_pattern_p related_stmt vec_stmt
2304 S1: a_i = .... - - -
2305 S2: a_2 = ..use(a_i).. - - -
2306 S3: a_1 = ..use(a_2).. - - -
2307 S4: a_0 = ..use(a_1).. true S6 -
2308 '---> S6: a_new = .... - S4 -
2309 S5: ... = ..use(a_0).. - - -
2311 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
2312 to each other through the RELATED_STMT field).
2314 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
2315 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
2316 remain irrelevant unless used by stmts other than S4.
2318 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
2319 (because they are marked as irrelevant). It will vectorize S6, and record
2320 a pointer to the new vector stmt VS6 from S6 (as usual).
2321 S4 will be skipped, and S5 will be vectorized as usual:
2323 in_pattern_p related_stmt vec_stmt
2324 S1: a_i = .... - - -
2325 S2: a_2 = ..use(a_i).. - - -
2326 S3: a_1 = ..use(a_2).. - - -
2327 > VS6: va_new = .... - - -
2328 S4: a_0 = ..use(a_1).. true S6 VS6
2329 '---> S6: a_new = .... - S4 VS6
2330 > VS5: ... = ..vuse(va_new).. - - -
2331 S5: ... = ..use(a_0).. - - -
2333 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
2334 elsewhere), and we'll end up with:
2337 VS5: ... = ..vuse(va_new)..
2339 In case of more than one pattern statements, e.g., widen-mult with
2343 S2 a_T = (TYPE) a_t;
2344 '--> S3: a_it = (interm_type) a_t;
2345 S4 prod_T = a_T * CONST;
2346 '--> S5: prod_T' = a_it w* CONST;
2348 there may be other users of a_T outside the pattern. In that case S2 will
2349 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
2350 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
2351 be recorded in S3. */
2354 vect_pattern_recog (loop_vec_info loop_vinfo
)
2356 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2357 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
2358 unsigned int nbbs
= loop
->num_nodes
;
2359 gimple_stmt_iterator si
;
2361 vect_recog_func_ptr vect_recog_func
;
2362 VEC (gimple
, heap
) *stmts_to_replace
= VEC_alloc (gimple
, heap
, 1);
2364 if (vect_print_dump_info (REPORT_DETAILS
))
2365 fprintf (vect_dump
, "=== vect_pattern_recog ===");
2367 /* Scan through the loop stmts, applying the pattern recognition
2368 functions starting at each stmt visited: */
2369 for (i
= 0; i
< nbbs
; i
++)
2371 basic_block bb
= bbs
[i
];
2372 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
2374 /* Scan over all generic vect_recog_xxx_pattern functions. */
2375 for (j
= 0; j
< NUM_PATTERNS
; j
++)
2377 vect_recog_func
= vect_vect_recog_func_ptrs
[j
];
2378 vect_pattern_recog_1 (vect_recog_func
, si
,
2384 VEC_free (gimple
, heap
, stmts_to_replace
);