1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
26 #include "stor-layout.h"
33 #include "hard-reg-set.h"
36 #include "dominance.h"
37 #include "basic-block.h"
38 #include "gimple-pretty-print.h"
39 #include "tree-ssa-alias.h"
40 #include "internal-fn.h"
42 #include "gimple-expr.h"
46 #include "gimple-iterator.h"
47 #include "gimple-ssa.h"
48 #include "tree-phinodes.h"
49 #include "ssa-iterators.h"
50 #include "stringpool.h"
51 #include "tree-ssanames.h"
56 #include "tree-data-ref.h"
57 #include "tree-vectorizer.h"
58 #include "recog.h" /* FIXME: for insn_data */
59 #include "diagnostic-core.h"
63 /* Pattern recognition functions */
64 static gimple
vect_recog_widen_sum_pattern (vec
<gimple
> *, tree
*,
66 static gimple
vect_recog_widen_mult_pattern (vec
<gimple
> *, tree
*,
68 static gimple
vect_recog_dot_prod_pattern (vec
<gimple
> *, tree
*,
70 static gimple
vect_recog_sad_pattern (vec
<gimple
> *, tree
*,
72 static gimple
vect_recog_pow_pattern (vec
<gimple
> *, tree
*, tree
*);
73 static gimple
vect_recog_over_widening_pattern (vec
<gimple
> *, tree
*,
75 static gimple
vect_recog_widen_shift_pattern (vec
<gimple
> *,
77 static gimple
vect_recog_rotate_pattern (vec
<gimple
> *, tree
*, tree
*);
78 static gimple
vect_recog_vector_vector_shift_pattern (vec
<gimple
> *,
80 static gimple
vect_recog_divmod_pattern (vec
<gimple
> *,
82 static gimple
vect_recog_mixed_size_cond_pattern (vec
<gimple
> *,
84 static gimple
vect_recog_bool_pattern (vec
<gimple
> *, tree
*, tree
*);
85 static vect_recog_func_ptr vect_vect_recog_func_ptrs
[NUM_PATTERNS
] = {
86 vect_recog_widen_mult_pattern
,
87 vect_recog_widen_sum_pattern
,
88 vect_recog_dot_prod_pattern
,
89 vect_recog_sad_pattern
,
90 vect_recog_pow_pattern
,
91 vect_recog_widen_shift_pattern
,
92 vect_recog_over_widening_pattern
,
93 vect_recog_rotate_pattern
,
94 vect_recog_vector_vector_shift_pattern
,
95 vect_recog_divmod_pattern
,
96 vect_recog_mixed_size_cond_pattern
,
97 vect_recog_bool_pattern
};
100 append_pattern_def_seq (stmt_vec_info stmt_info
, gimple stmt
)
102 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
107 new_pattern_def_seq (stmt_vec_info stmt_info
, gimple stmt
)
109 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
110 append_pattern_def_seq (stmt_info
, stmt
);
113 /* Check whether STMT2 is in the same loop or basic block as STMT1.
114 Which of the two applies depends on whether we're currently doing
115 loop-based or basic-block-based vectorization, as determined by
116 the vinfo_for_stmt for STMT1 (which must be defined).
118 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
119 to be defined as well. */
122 vect_same_loop_or_bb_p (gimple stmt1
, gimple stmt2
)
124 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt1
);
125 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
126 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
128 if (!gimple_bb (stmt2
))
133 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
134 if (!flow_bb_inside_loop_p (loop
, gimple_bb (stmt2
)))
139 if (gimple_bb (stmt2
) != BB_VINFO_BB (bb_vinfo
)
140 || gimple_code (stmt2
) == GIMPLE_PHI
)
144 gcc_assert (vinfo_for_stmt (stmt2
));
148 /* If the LHS of DEF_STMT has a single use, and that statement is
149 in the same loop or basic block, return it. */
152 vect_single_imm_use (gimple def_stmt
)
154 tree lhs
= gimple_assign_lhs (def_stmt
);
158 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
161 if (!vect_same_loop_or_bb_p (def_stmt
, use_stmt
))
167 /* Check whether NAME, an ssa-name used in USE_STMT,
168 is a result of a type promotion, such that:
169 DEF_STMT: NAME = NOP (name0)
170 If CHECK_SIGN is TRUE, check that either both types are signed or both are
174 type_conversion_p (tree name
, gimple use_stmt
, bool check_sign
,
175 tree
*orig_type
, gimple
*def_stmt
, bool *promotion
)
179 loop_vec_info loop_vinfo
;
180 stmt_vec_info stmt_vinfo
;
181 tree type
= TREE_TYPE (name
);
183 enum vect_def_type dt
;
185 bb_vec_info bb_vinfo
;
187 stmt_vinfo
= vinfo_for_stmt (use_stmt
);
188 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
189 bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
190 if (!vect_is_simple_use (name
, use_stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
194 if (dt
!= vect_internal_def
195 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
201 if (!is_gimple_assign (*def_stmt
))
204 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt
)))
207 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
209 *orig_type
= TREE_TYPE (oprnd0
);
210 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*orig_type
)
211 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*orig_type
)) && check_sign
))
214 if (TYPE_PRECISION (type
) >= (TYPE_PRECISION (*orig_type
) * 2))
219 if (!vect_is_simple_use (oprnd0
, *def_stmt
, loop_vinfo
,
220 bb_vinfo
, &dummy_gimple
, &dummy
, &dt
))
226 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
227 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
230 vect_recog_temp_ssa_var (tree type
, gimple stmt
)
232 return make_temp_ssa_name (type
, stmt
, "patt");
235 /* Function vect_recog_dot_prod_pattern
237 Try to find the following pattern:
243 sum_0 = phi <init, sum_1>
246 S3 x_T = (TYPE1) x_t;
247 S4 y_T = (TYPE1) y_t;
249 [S6 prod = (TYPE2) prod; #optional]
250 S7 sum_1 = prod + sum_0;
252 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
253 same size of 'TYPE1' or bigger. This is a special case of a reduction
258 * STMTS: Contains a stmt from which the pattern search begins. In the
259 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
264 * TYPE_IN: The type of the input arguments to the pattern.
266 * TYPE_OUT: The type of the output of this pattern.
268 * Return value: A new stmt that will be used to replace the sequence of
269 stmts that constitute the pattern. In this case it will be:
270 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
272 Note: The dot-prod idiom is a widening reduction pattern that is
273 vectorized without preserving all the intermediate results. It
274 produces only N/2 (widened) results (by summing up pairs of
275 intermediate results) rather than all N results. Therefore, we
276 cannot allow this pattern when we want to get all the results and in
277 the correct order (as is the case when this computation is in an
278 inner-loop nested in an outer-loop that us being vectorized). */
281 vect_recog_dot_prod_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
284 gimple stmt
, last_stmt
= (*stmts
)[0];
286 tree oprnd00
, oprnd01
;
287 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
288 tree type
, half_type
;
291 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
299 loop
= LOOP_VINFO_LOOP (loop_info
);
301 if (!is_gimple_assign (last_stmt
))
304 type
= gimple_expr_type (last_stmt
);
306 /* Look for the following pattern
310 DDPROD = (TYPE2) DPROD;
311 sum_1 = DDPROD + sum_0;
313 - DX is double the size of X
314 - DY is double the size of Y
315 - DX, DY, DPROD all have the same type
316 - sum is the same size of DPROD or bigger
317 - sum has been recognized as a reduction variable.
319 This is equivalent to:
320 DPROD = X w* Y; #widen mult
321 sum_1 = DPROD w+ sum_0; #widen summation
323 DPROD = X w* Y; #widen mult
324 sum_1 = DPROD + sum_0; #summation
327 /* Starting from LAST_STMT, follow the defs of its uses in search
328 of the above pattern. */
330 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
333 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
335 /* Has been detected as widening-summation? */
337 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
338 type
= gimple_expr_type (stmt
);
339 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
341 oprnd0
= gimple_assign_rhs1 (stmt
);
342 oprnd1
= gimple_assign_rhs2 (stmt
);
343 half_type
= TREE_TYPE (oprnd0
);
349 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
)
351 oprnd0
= gimple_assign_rhs1 (last_stmt
);
352 oprnd1
= gimple_assign_rhs2 (last_stmt
);
353 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
354 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
358 if (type_conversion_p (oprnd0
, stmt
, true, &half_type
, &def_stmt
,
363 oprnd0
= gimple_assign_rhs1 (stmt
);
369 /* So far so good. Since last_stmt was detected as a (summation) reduction,
370 we know that oprnd1 is the reduction variable (defined by a loop-header
371 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
372 Left to check that oprnd0 is defined by a (widen_)mult_expr */
373 if (TREE_CODE (oprnd0
) != SSA_NAME
)
376 prod_type
= half_type
;
377 stmt
= SSA_NAME_DEF_STMT (oprnd0
);
379 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
380 if (!gimple_bb (stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
383 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
384 inside the loop (in case we are analyzing an outer-loop). */
385 if (!is_gimple_assign (stmt
))
387 stmt_vinfo
= vinfo_for_stmt (stmt
);
388 gcc_assert (stmt_vinfo
);
389 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
391 if (gimple_assign_rhs_code (stmt
) != MULT_EXPR
)
393 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
395 /* Has been detected as a widening multiplication? */
397 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
398 if (gimple_assign_rhs_code (stmt
) != WIDEN_MULT_EXPR
)
400 stmt_vinfo
= vinfo_for_stmt (stmt
);
401 gcc_assert (stmt_vinfo
);
402 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_internal_def
);
403 oprnd00
= gimple_assign_rhs1 (stmt
);
404 oprnd01
= gimple_assign_rhs2 (stmt
);
405 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt
))
406 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
);
410 tree half_type0
, half_type1
;
414 oprnd0
= gimple_assign_rhs1 (stmt
);
415 oprnd1
= gimple_assign_rhs2 (stmt
);
416 if (!types_compatible_p (TREE_TYPE (oprnd0
), prod_type
)
417 || !types_compatible_p (TREE_TYPE (oprnd1
), prod_type
))
419 if (!type_conversion_p (oprnd0
, stmt
, true, &half_type0
, &def_stmt
,
423 oprnd00
= gimple_assign_rhs1 (def_stmt
);
424 if (!type_conversion_p (oprnd1
, stmt
, true, &half_type1
, &def_stmt
,
428 oprnd01
= gimple_assign_rhs1 (def_stmt
);
429 if (!types_compatible_p (half_type0
, half_type1
))
431 if (TYPE_PRECISION (prod_type
) != TYPE_PRECISION (half_type0
) * 2)
435 half_type
= TREE_TYPE (oprnd00
);
436 *type_in
= half_type
;
439 /* Pattern detected. Create a stmt to be used to replace the pattern: */
440 var
= vect_recog_temp_ssa_var (type
, NULL
);
441 pattern_stmt
= gimple_build_assign_with_ops (DOT_PROD_EXPR
, var
,
442 oprnd00
, oprnd01
, oprnd1
);
444 if (dump_enabled_p ())
446 dump_printf_loc (MSG_NOTE
, vect_location
,
447 "vect_recog_dot_prod_pattern: detected: ");
448 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
449 dump_printf (MSG_NOTE
, "\n");
452 /* We don't allow changing the order of the computation in the inner-loop
453 when doing outer-loop vectorization. */
454 gcc_assert (!nested_in_vect_loop_p (loop
, last_stmt
));
460 /* Function vect_recog_sad_pattern
462 Try to find the following Sum of Absolute Difference (SAD) pattern:
465 signed TYPE1 diff, abs_diff;
468 sum_0 = phi <init, sum_1>
471 S3 x_T = (TYPE1) x_t;
472 S4 y_T = (TYPE1) y_t;
474 S6 abs_diff = ABS_EXPR <diff>;
475 [S7 abs_diff = (TYPE2) abs_diff; #optional]
476 S8 sum_1 = abs_diff + sum_0;
478 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
479 same size of 'TYPE1' or bigger. This is a special case of a reduction
484 * STMTS: Contains a stmt from which the pattern search begins. In the
485 example, when this function is called with S8, the pattern
486 {S3,S4,S5,S6,S7,S8} will be detected.
490 * TYPE_IN: The type of the input arguments to the pattern.
492 * TYPE_OUT: The type of the output of this pattern.
494 * Return value: A new stmt that will be used to replace the sequence of
495 stmts that constitute the pattern. In this case it will be:
496 SAD_EXPR <x_t, y_t, sum_0>
500 vect_recog_sad_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
503 gimple last_stmt
= (*stmts
)[0];
504 tree sad_oprnd0
, sad_oprnd1
;
505 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
507 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
514 loop
= LOOP_VINFO_LOOP (loop_info
);
516 if (!is_gimple_assign (last_stmt
))
519 tree sum_type
= gimple_expr_type (last_stmt
);
521 /* Look for the following pattern
525 DAD = ABS_EXPR <DDIFF>;
526 DDPROD = (TYPE2) DPROD;
529 - DX is at least double the size of X
530 - DY is at least double the size of Y
531 - DX, DY, DDIFF, DAD all have the same type
532 - sum is the same size of DAD or bigger
533 - sum has been recognized as a reduction variable.
535 This is equivalent to:
536 DDIFF = X w- Y; #widen sub
537 DAD = ABS_EXPR <DDIFF>;
538 sum_1 = DAD w+ sum_0; #widen summation
540 DDIFF = X w- Y; #widen sub
541 DAD = ABS_EXPR <DDIFF>;
542 sum_1 = DAD + sum_0; #summation
545 /* Starting from LAST_STMT, follow the defs of its uses in search
546 of the above pattern. */
548 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
551 tree plus_oprnd0
, plus_oprnd1
;
553 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
555 /* Has been detected as widening-summation? */
557 gimple stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
558 sum_type
= gimple_expr_type (stmt
);
559 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
561 plus_oprnd0
= gimple_assign_rhs1 (stmt
);
562 plus_oprnd1
= gimple_assign_rhs2 (stmt
);
563 half_type
= TREE_TYPE (plus_oprnd0
);
569 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
)
571 plus_oprnd0
= gimple_assign_rhs1 (last_stmt
);
572 plus_oprnd1
= gimple_assign_rhs2 (last_stmt
);
573 if (!types_compatible_p (TREE_TYPE (plus_oprnd0
), sum_type
)
574 || !types_compatible_p (TREE_TYPE (plus_oprnd1
), sum_type
))
577 /* The type conversion could be promotion, demotion,
578 or just signed -> unsigned. */
579 if (type_conversion_p (plus_oprnd0
, last_stmt
, false,
580 &half_type
, &def_stmt
, &promotion
))
581 plus_oprnd0
= gimple_assign_rhs1 (def_stmt
);
583 half_type
= sum_type
;
586 /* So far so good. Since last_stmt was detected as a (summation) reduction,
587 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
588 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
589 Then check that plus_oprnd0 is defined by an abs_expr. */
591 if (TREE_CODE (plus_oprnd0
) != SSA_NAME
)
594 tree abs_type
= half_type
;
595 gimple abs_stmt
= SSA_NAME_DEF_STMT (plus_oprnd0
);
597 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
598 if (!gimple_bb (abs_stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (abs_stmt
)))
601 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
602 inside the loop (in case we are analyzing an outer-loop). */
603 if (!is_gimple_assign (abs_stmt
))
606 stmt_vec_info abs_stmt_vinfo
= vinfo_for_stmt (abs_stmt
);
607 gcc_assert (abs_stmt_vinfo
);
608 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo
) != vect_internal_def
)
610 if (gimple_assign_rhs_code (abs_stmt
) != ABS_EXPR
)
613 tree abs_oprnd
= gimple_assign_rhs1 (abs_stmt
);
614 if (!types_compatible_p (TREE_TYPE (abs_oprnd
), abs_type
))
616 if (TYPE_UNSIGNED (abs_type
))
619 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
621 if (TREE_CODE (abs_oprnd
) != SSA_NAME
)
624 gimple diff_stmt
= SSA_NAME_DEF_STMT (abs_oprnd
);
626 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
627 if (!gimple_bb (diff_stmt
)
628 || !flow_bb_inside_loop_p (loop
, gimple_bb (diff_stmt
)))
631 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
632 inside the loop (in case we are analyzing an outer-loop). */
633 if (!is_gimple_assign (diff_stmt
))
636 stmt_vec_info diff_stmt_vinfo
= vinfo_for_stmt (diff_stmt
);
637 gcc_assert (diff_stmt_vinfo
);
638 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo
) != vect_internal_def
)
640 if (gimple_assign_rhs_code (diff_stmt
) != MINUS_EXPR
)
643 tree half_type0
, half_type1
;
646 tree minus_oprnd0
= gimple_assign_rhs1 (diff_stmt
);
647 tree minus_oprnd1
= gimple_assign_rhs2 (diff_stmt
);
649 if (!types_compatible_p (TREE_TYPE (minus_oprnd0
), abs_type
)
650 || !types_compatible_p (TREE_TYPE (minus_oprnd1
), abs_type
))
652 if (!type_conversion_p (minus_oprnd0
, diff_stmt
, false,
653 &half_type0
, &def_stmt
, &promotion
)
656 sad_oprnd0
= gimple_assign_rhs1 (def_stmt
);
658 if (!type_conversion_p (minus_oprnd1
, diff_stmt
, false,
659 &half_type1
, &def_stmt
, &promotion
)
662 sad_oprnd1
= gimple_assign_rhs1 (def_stmt
);
664 if (!types_compatible_p (half_type0
, half_type1
))
666 if (TYPE_PRECISION (abs_type
) < TYPE_PRECISION (half_type0
) * 2
667 || TYPE_PRECISION (sum_type
) < TYPE_PRECISION (half_type0
) * 2)
670 *type_in
= TREE_TYPE (sad_oprnd0
);
671 *type_out
= sum_type
;
673 /* Pattern detected. Create a stmt to be used to replace the pattern: */
674 tree var
= vect_recog_temp_ssa_var (sum_type
, NULL
);
675 gimple pattern_stmt
= gimple_build_assign_with_ops
676 (SAD_EXPR
, var
, sad_oprnd0
, sad_oprnd1
, plus_oprnd1
);
678 if (dump_enabled_p ())
680 dump_printf_loc (MSG_NOTE
, vect_location
,
681 "vect_recog_sad_pattern: detected: ");
682 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
683 dump_printf (MSG_NOTE
, "\n");
686 /* We don't allow changing the order of the computation in the inner-loop
687 when doing outer-loop vectorization. */
688 gcc_assert (!nested_in_vect_loop_p (loop
, last_stmt
));
694 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
697 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
698 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
700 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
701 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
702 that satisfies the above restrictions, we can perform a widening opeartion
703 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
704 with a_it = (interm_type) a_t; */
707 vect_handle_widen_op_by_const (gimple stmt
, enum tree_code code
,
708 tree const_oprnd
, tree
*oprnd
,
709 vec
<gimple
> *stmts
, tree type
,
710 tree
*half_type
, gimple def_stmt
)
712 tree new_type
, new_oprnd
;
715 if (code
!= MULT_EXPR
&& code
!= LSHIFT_EXPR
)
718 if (((code
== MULT_EXPR
&& int_fits_type_p (const_oprnd
, *half_type
))
719 || (code
== LSHIFT_EXPR
720 && compare_tree_int (const_oprnd
, TYPE_PRECISION (*half_type
))
722 && TYPE_PRECISION (type
) == (TYPE_PRECISION (*half_type
) * 2))
724 /* CONST_OPRND is a constant of HALF_TYPE. */
725 *oprnd
= gimple_assign_rhs1 (def_stmt
);
729 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (*half_type
) * 4))
732 if (!vect_same_loop_or_bb_p (stmt
, def_stmt
))
735 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
736 a type 2 times bigger than HALF_TYPE. */
737 new_type
= build_nonstandard_integer_type (TYPE_PRECISION (type
) / 2,
738 TYPE_UNSIGNED (type
));
739 if ((code
== MULT_EXPR
&& !int_fits_type_p (const_oprnd
, new_type
))
740 || (code
== LSHIFT_EXPR
741 && compare_tree_int (const_oprnd
, TYPE_PRECISION (new_type
)) == 1))
744 /* Use NEW_TYPE for widening operation. */
745 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
747 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
748 /* Check if the already created pattern stmt is what we need. */
749 if (!is_gimple_assign (new_stmt
)
750 || gimple_assign_rhs_code (new_stmt
) != NOP_EXPR
751 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != new_type
)
754 stmts
->safe_push (def_stmt
);
755 *oprnd
= gimple_assign_lhs (new_stmt
);
759 /* Create a_T = (NEW_TYPE) a_t; */
760 *oprnd
= gimple_assign_rhs1 (def_stmt
);
761 new_oprnd
= make_ssa_name (new_type
, NULL
);
762 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
, *oprnd
,
764 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
765 stmts
->safe_push (def_stmt
);
769 *half_type
= new_type
;
774 /* Function vect_recog_widen_mult_pattern
776 Try to find the following pattern:
780 TYPE a_T, b_T, prod_T;
786 S5 prod_T = a_T * b_T;
788 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
790 Also detect unsigned cases:
794 unsigned TYPE u_prod_T;
795 TYPE a_T, b_T, prod_T;
801 S5 prod_T = a_T * b_T;
802 S6 u_prod_T = (unsigned TYPE) prod_T;
804 and multiplication by constants:
811 S5 prod_T = a_T * CONST;
813 A special case of multiplication by constants is when 'TYPE' is 4 times
814 bigger than 'type', but CONST fits an intermediate type 2 times smaller
815 than 'TYPE'. In that case we create an additional pattern stmt for S3
816 to create a variable of the intermediate type, and perform widen-mult
817 on the intermediate type as well:
821 TYPE a_T, prod_T, prod_T';
825 '--> a_it = (interm_type) a_t;
826 S5 prod_T = a_T * CONST;
827 '--> prod_T' = a_it w* CONST;
831 * STMTS: Contains a stmt from which the pattern search begins. In the
832 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
833 is detected. In case of unsigned widen-mult, the original stmt (S5) is
834 replaced with S6 in STMTS. In case of multiplication by a constant
835 of an intermediate type (the last case above), STMTS also contains S3
836 (inserted before S5).
840 * TYPE_IN: The type of the input arguments to the pattern.
842 * TYPE_OUT: The type of the output of this pattern.
844 * Return value: A new stmt that will be used to replace the sequence of
845 stmts that constitute the pattern. In this case it will be:
846 WIDEN_MULT <a_t, b_t>
847 If the result of WIDEN_MULT needs to be converted to a larger type, the
848 returned stmt will be this type conversion stmt.
852 vect_recog_widen_mult_pattern (vec
<gimple
> *stmts
,
853 tree
*type_in
, tree
*type_out
)
855 gimple last_stmt
= stmts
->pop ();
856 gimple def_stmt0
, def_stmt1
;
858 tree type
, half_type0
, half_type1
;
859 gimple new_stmt
= NULL
, pattern_stmt
= NULL
;
860 tree vectype
, vecitype
;
862 enum tree_code dummy_code
;
868 if (!is_gimple_assign (last_stmt
))
871 type
= gimple_expr_type (last_stmt
);
873 /* Starting from LAST_STMT, follow the defs of its uses in search
874 of the above pattern. */
876 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
879 oprnd0
= gimple_assign_rhs1 (last_stmt
);
880 oprnd1
= gimple_assign_rhs2 (last_stmt
);
881 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
882 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
885 /* Check argument 0. */
886 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
890 /* Check argument 1. */
891 op1_ok
= type_conversion_p (oprnd1
, last_stmt
, false, &half_type1
,
892 &def_stmt1
, &promotion
);
894 if (op1_ok
&& promotion
)
896 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
897 oprnd1
= gimple_assign_rhs1 (def_stmt1
);
901 if (TREE_CODE (oprnd1
) == INTEGER_CST
902 && TREE_CODE (half_type0
) == INTEGER_TYPE
903 && vect_handle_widen_op_by_const (last_stmt
, MULT_EXPR
, oprnd1
,
904 &oprnd0
, stmts
, type
,
905 &half_type0
, def_stmt0
))
907 half_type1
= half_type0
;
908 oprnd1
= fold_convert (half_type1
, oprnd1
);
914 /* If the two arguments have different sizes, convert the one with
915 the smaller type into the larger type. */
916 if (TYPE_PRECISION (half_type0
) != TYPE_PRECISION (half_type1
))
919 gimple def_stmt
= NULL
;
921 if (TYPE_PRECISION (half_type0
) < TYPE_PRECISION (half_type1
))
923 def_stmt
= def_stmt0
;
924 half_type0
= half_type1
;
929 def_stmt
= def_stmt1
;
930 half_type1
= half_type0
;
934 tree old_oprnd
= gimple_assign_rhs1 (def_stmt
);
935 tree new_oprnd
= make_ssa_name (half_type0
, NULL
);
936 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
937 old_oprnd
, NULL_TREE
);
941 /* Handle unsigned case. Look for
942 S6 u_prod_T = (unsigned TYPE) prod_T;
943 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
944 if (TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type0
))
950 if (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (half_type1
))
953 use_stmt
= vect_single_imm_use (last_stmt
);
954 if (!use_stmt
|| !is_gimple_assign (use_stmt
)
955 || gimple_assign_rhs_code (use_stmt
) != NOP_EXPR
)
958 use_lhs
= gimple_assign_lhs (use_stmt
);
959 use_type
= TREE_TYPE (use_lhs
);
960 if (!INTEGRAL_TYPE_P (use_type
)
961 || (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (use_type
))
962 || (TYPE_PRECISION (type
) != TYPE_PRECISION (use_type
)))
966 last_stmt
= use_stmt
;
969 if (!types_compatible_p (half_type0
, half_type1
))
972 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
973 to get an intermediate result of type ITYPE. In this case we need
974 to build a statement to convert this intermediate result to type TYPE. */
976 if (TYPE_PRECISION (type
) > TYPE_PRECISION (half_type0
) * 2)
977 itype
= build_nonstandard_integer_type
978 (GET_MODE_BITSIZE (TYPE_MODE (half_type0
)) * 2,
979 TYPE_UNSIGNED (type
));
981 /* Pattern detected. */
982 if (dump_enabled_p ())
983 dump_printf_loc (MSG_NOTE
, vect_location
,
984 "vect_recog_widen_mult_pattern: detected:\n");
986 /* Check target support */
987 vectype
= get_vectype_for_scalar_type (half_type0
);
988 vecitype
= get_vectype_for_scalar_type (itype
);
991 || !supportable_widening_operation (WIDEN_MULT_EXPR
, last_stmt
,
993 &dummy_code
, &dummy_code
,
994 &dummy_int
, &dummy_vec
))
998 *type_out
= get_vectype_for_scalar_type (type
);
1000 /* Pattern supported. Create a stmt to be used to replace the pattern: */
1001 var
= vect_recog_temp_ssa_var (itype
, NULL
);
1002 pattern_stmt
= gimple_build_assign_with_ops (WIDEN_MULT_EXPR
, var
, oprnd0
,
1005 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1006 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1007 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1008 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
1010 /* If the original two operands have different sizes, we may need to convert
1011 the smaller one into the larget type. If this is the case, at this point
1012 the new stmt is already built. */
1015 append_pattern_def_seq (stmt_vinfo
, new_stmt
);
1016 stmt_vec_info new_stmt_info
1017 = new_stmt_vec_info (new_stmt
, loop_vinfo
, bb_vinfo
);
1018 set_vinfo_for_stmt (new_stmt
, new_stmt_info
);
1019 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
1022 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
1023 the result of the widen-mult operation into type TYPE. */
1026 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
1027 stmt_vec_info pattern_stmt_info
1028 = new_stmt_vec_info (pattern_stmt
, loop_vinfo
, bb_vinfo
);
1029 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
1030 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vecitype
;
1032 = gimple_build_assign_with_ops (NOP_EXPR
,
1033 vect_recog_temp_ssa_var (type
, NULL
),
1034 gimple_assign_lhs (pattern_stmt
),
1038 if (dump_enabled_p ())
1039 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1041 stmts
->safe_push (last_stmt
);
1042 return pattern_stmt
;
1046 /* Function vect_recog_pow_pattern
1048 Try to find the following pattern:
1052 with POW being one of pow, powf, powi, powif and N being
1057 * LAST_STMT: A stmt from which the pattern search begins.
1061 * TYPE_IN: The type of the input arguments to the pattern.
1063 * TYPE_OUT: The type of the output of this pattern.
1065 * Return value: A new stmt that will be used to replace the sequence of
1066 stmts that constitute the pattern. In this case it will be:
1073 vect_recog_pow_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
1076 gimple last_stmt
= (*stmts
)[0];
1077 tree fn
, base
, exp
= NULL
;
1081 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
1084 fn
= gimple_call_fndecl (last_stmt
);
1085 if (fn
== NULL_TREE
|| DECL_BUILT_IN_CLASS (fn
) != BUILT_IN_NORMAL
)
1088 switch (DECL_FUNCTION_CODE (fn
))
1090 case BUILT_IN_POWIF
:
1094 base
= gimple_call_arg (last_stmt
, 0);
1095 exp
= gimple_call_arg (last_stmt
, 1);
1096 if (TREE_CODE (exp
) != REAL_CST
1097 && TREE_CODE (exp
) != INTEGER_CST
)
1105 /* We now have a pow or powi builtin function call with a constant
1108 *type_out
= NULL_TREE
;
1110 /* Catch squaring. */
1111 if ((tree_fits_shwi_p (exp
)
1112 && tree_to_shwi (exp
) == 2)
1113 || (TREE_CODE (exp
) == REAL_CST
1114 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconst2
)))
1116 *type_in
= TREE_TYPE (base
);
1118 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1119 stmt
= gimple_build_assign_with_ops (MULT_EXPR
, var
, base
, base
);
1123 /* Catch square root. */
1124 if (TREE_CODE (exp
) == REAL_CST
1125 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconsthalf
))
1127 tree newfn
= mathfn_built_in (TREE_TYPE (base
), BUILT_IN_SQRT
);
1128 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (base
));
1131 gimple stmt
= gimple_build_call (newfn
, 1, base
);
1132 if (vectorizable_function (stmt
, *type_in
, *type_in
)
1135 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
1136 gimple_call_set_lhs (stmt
, var
);
1146 /* Function vect_recog_widen_sum_pattern
1148 Try to find the following pattern:
1151 TYPE x_T, sum = init;
1153 sum_0 = phi <init, sum_1>
1155 S2 x_T = (TYPE) x_t;
1156 S3 sum_1 = x_T + sum_0;
1158 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1159 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1160 a special case of a reduction computation.
1164 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1165 when this function is called with S3, the pattern {S2,S3} will be detected.
1169 * TYPE_IN: The type of the input arguments to the pattern.
1171 * TYPE_OUT: The type of the output of this pattern.
1173 * Return value: A new stmt that will be used to replace the sequence of
1174 stmts that constitute the pattern. In this case it will be:
1175 WIDEN_SUM <x_t, sum_0>
1177 Note: The widening-sum idiom is a widening reduction pattern that is
1178 vectorized without preserving all the intermediate results. It
1179 produces only N/2 (widened) results (by summing up pairs of
1180 intermediate results) rather than all N results. Therefore, we
1181 cannot allow this pattern when we want to get all the results and in
1182 the correct order (as is the case when this computation is in an
1183 inner-loop nested in an outer-loop that us being vectorized). */
1186 vect_recog_widen_sum_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
1189 gimple stmt
, last_stmt
= (*stmts
)[0];
1190 tree oprnd0
, oprnd1
;
1191 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1192 tree type
, half_type
;
1193 gimple pattern_stmt
;
1194 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1202 loop
= LOOP_VINFO_LOOP (loop_info
);
1204 if (!is_gimple_assign (last_stmt
))
1207 type
= gimple_expr_type (last_stmt
);
1209 /* Look for the following pattern
1212 In which DX is at least double the size of X, and sum_1 has been
1213 recognized as a reduction variable.
1216 /* Starting from LAST_STMT, follow the defs of its uses in search
1217 of the above pattern. */
1219 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
1222 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
)
1225 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1226 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1227 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
1228 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
1231 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1232 we know that oprnd1 is the reduction variable (defined by a loop-header
1233 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1234 Left to check that oprnd0 is defined by a cast from type 'type' to type
1237 if (!type_conversion_p (oprnd0
, last_stmt
, true, &half_type
, &stmt
,
1242 oprnd0
= gimple_assign_rhs1 (stmt
);
1243 *type_in
= half_type
;
1246 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1247 var
= vect_recog_temp_ssa_var (type
, NULL
);
1248 pattern_stmt
= gimple_build_assign_with_ops (WIDEN_SUM_EXPR
, var
,
1251 if (dump_enabled_p ())
1253 dump_printf_loc (MSG_NOTE
, vect_location
,
1254 "vect_recog_widen_sum_pattern: detected: ");
1255 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1256 dump_printf (MSG_NOTE
, "\n");
1259 /* We don't allow changing the order of the computation in the inner-loop
1260 when doing outer-loop vectorization. */
1261 gcc_assert (!nested_in_vect_loop_p (loop
, last_stmt
));
1263 return pattern_stmt
;
1267 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1270 STMT - a statement to check.
1271 DEF - we support operations with two operands, one of which is constant.
1272 The other operand can be defined by a demotion operation, or by a
1273 previous statement in a sequence of over-promoted operations. In the
1274 later case DEF is used to replace that operand. (It is defined by a
1275 pattern statement we created for the previous statement in the
1279 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1280 NULL, it's the type of DEF.
1281 STMTS - additional pattern statements. If a pattern statement (type
1282 conversion) is created in this function, its original statement is
1286 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1287 operands to use in the new pattern statement for STMT (will be created
1288 in vect_recog_over_widening_pattern ()).
1289 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1290 statements for STMT: the first one is a type promotion and the second
1291 one is the operation itself. We return the type promotion statement
1292 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1293 the second pattern statement. */
1296 vect_operation_fits_smaller_type (gimple stmt
, tree def
, tree
*new_type
,
1297 tree
*op0
, tree
*op1
, gimple
*new_def_stmt
,
1300 enum tree_code code
;
1301 tree const_oprnd
, oprnd
;
1302 tree interm_type
= NULL_TREE
, half_type
, new_oprnd
, type
;
1303 gimple def_stmt
, new_stmt
;
1309 *new_def_stmt
= NULL
;
1311 if (!is_gimple_assign (stmt
))
1314 code
= gimple_assign_rhs_code (stmt
);
1315 if (code
!= LSHIFT_EXPR
&& code
!= RSHIFT_EXPR
1316 && code
!= BIT_IOR_EXPR
&& code
!= BIT_XOR_EXPR
&& code
!= BIT_AND_EXPR
)
1319 oprnd
= gimple_assign_rhs1 (stmt
);
1320 const_oprnd
= gimple_assign_rhs2 (stmt
);
1321 type
= gimple_expr_type (stmt
);
1323 if (TREE_CODE (oprnd
) != SSA_NAME
1324 || TREE_CODE (const_oprnd
) != INTEGER_CST
)
1327 /* If oprnd has other uses besides that in stmt we cannot mark it
1328 as being part of a pattern only. */
1329 if (!has_single_use (oprnd
))
1332 /* If we are in the middle of a sequence, we use DEF from a previous
1333 statement. Otherwise, OPRND has to be a result of type promotion. */
1336 half_type
= *new_type
;
1342 if (!type_conversion_p (oprnd
, stmt
, false, &half_type
, &def_stmt
,
1345 || !vect_same_loop_or_bb_p (stmt
, def_stmt
))
1349 /* Can we perform the operation on a smaller type? */
1355 if (!int_fits_type_p (const_oprnd
, half_type
))
1357 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1358 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1361 interm_type
= build_nonstandard_integer_type (
1362 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1363 if (!int_fits_type_p (const_oprnd
, interm_type
))
1370 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1371 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1374 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1375 (e.g., if the original value was char, the shift amount is at most 8
1376 if we want to use short). */
1377 if (compare_tree_int (const_oprnd
, TYPE_PRECISION (half_type
)) == 1)
1380 interm_type
= build_nonstandard_integer_type (
1381 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1383 if (!vect_supportable_shift (code
, interm_type
))
1389 if (vect_supportable_shift (code
, half_type
))
1392 /* Try intermediate type - HALF_TYPE is not supported. */
1393 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1396 interm_type
= build_nonstandard_integer_type (
1397 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1399 if (!vect_supportable_shift (code
, interm_type
))
1408 /* There are four possible cases:
1409 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1410 the first statement in the sequence)
1411 a. The original, HALF_TYPE, is not enough - we replace the promotion
1412 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1413 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1415 2. OPRND is defined by a pattern statement we created.
1416 a. Its type is not sufficient for the operation, we create a new stmt:
1417 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1418 this statement in NEW_DEF_STMT, and it is later put in
1419 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1420 b. OPRND is good to use in the new statement. */
1425 /* Replace the original type conversion HALF_TYPE->TYPE with
1426 HALF_TYPE->INTERM_TYPE. */
1427 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
1429 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
1430 /* Check if the already created pattern stmt is what we need. */
1431 if (!is_gimple_assign (new_stmt
)
1432 || gimple_assign_rhs_code (new_stmt
) != NOP_EXPR
1433 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != interm_type
)
1436 stmts
->safe_push (def_stmt
);
1437 oprnd
= gimple_assign_lhs (new_stmt
);
1441 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1442 oprnd
= gimple_assign_rhs1 (def_stmt
);
1443 new_oprnd
= make_ssa_name (interm_type
, NULL
);
1444 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
1446 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
1447 stmts
->safe_push (def_stmt
);
1453 /* Retrieve the operand before the type promotion. */
1454 oprnd
= gimple_assign_rhs1 (def_stmt
);
1461 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1462 new_oprnd
= make_ssa_name (interm_type
, NULL
);
1463 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
1466 *new_def_stmt
= new_stmt
;
1469 /* Otherwise, OPRND is already set. */
1473 *new_type
= interm_type
;
1475 *new_type
= half_type
;
1478 *op1
= fold_convert (*new_type
, const_oprnd
);
1484 /* Try to find a statement or a sequence of statements that can be performed
1488 TYPE x_T, res0_T, res1_T;
1491 S2 x_T = (TYPE) x_t;
1492 S3 res0_T = op (x_T, C0);
1493 S4 res1_T = op (res0_T, C1);
1494 S5 ... = () res1_T; - type demotion
1496 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1498 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1499 be 'type' or some intermediate type. For now, we expect S5 to be a type
1500 demotion operation. We also check that S3 and S4 have only one use. */
1503 vect_recog_over_widening_pattern (vec
<gimple
> *stmts
,
1504 tree
*type_in
, tree
*type_out
)
1506 gimple stmt
= stmts
->pop ();
1507 gimple pattern_stmt
= NULL
, new_def_stmt
, prev_stmt
= NULL
, use_stmt
= NULL
;
1508 tree op0
, op1
, vectype
= NULL_TREE
, use_lhs
, use_type
;
1509 tree var
= NULL_TREE
, new_type
= NULL_TREE
, new_oprnd
;
1516 if (!vinfo_for_stmt (stmt
)
1517 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt
)))
1520 new_def_stmt
= NULL
;
1521 if (!vect_operation_fits_smaller_type (stmt
, var
, &new_type
,
1522 &op0
, &op1
, &new_def_stmt
,
1531 /* STMT can be performed on a smaller type. Check its uses. */
1532 use_stmt
= vect_single_imm_use (stmt
);
1533 if (!use_stmt
|| !is_gimple_assign (use_stmt
))
1536 /* Create pattern statement for STMT. */
1537 vectype
= get_vectype_for_scalar_type (new_type
);
1541 /* We want to collect all the statements for which we create pattern
1542 statetments, except for the case when the last statement in the
1543 sequence doesn't have a corresponding pattern statement. In such
1544 case we associate the last pattern statement with the last statement
1545 in the sequence. Therefore, we only add the original statement to
1546 the list if we know that it is not the last. */
1548 stmts
->safe_push (prev_stmt
);
1550 var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1552 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt
), var
,
1554 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
1555 new_pattern_def_seq (vinfo_for_stmt (stmt
), new_def_stmt
);
1557 if (dump_enabled_p ())
1559 dump_printf_loc (MSG_NOTE
, vect_location
,
1560 "created pattern stmt: ");
1561 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1562 dump_printf (MSG_NOTE
, "\n");
1565 type
= gimple_expr_type (stmt
);
1572 /* We got a sequence. We expect it to end with a type demotion operation.
1573 Otherwise, we quit (for now). There are three possible cases: the
1574 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1575 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1576 NEW_TYPE differs (we create a new conversion statement). */
1577 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
1579 use_lhs
= gimple_assign_lhs (use_stmt
);
1580 use_type
= TREE_TYPE (use_lhs
);
1581 /* Support only type demotion or signedess change. */
1582 if (!INTEGRAL_TYPE_P (use_type
)
1583 || TYPE_PRECISION (type
) <= TYPE_PRECISION (use_type
))
1586 /* Check that NEW_TYPE is not bigger than the conversion result. */
1587 if (TYPE_PRECISION (new_type
) > TYPE_PRECISION (use_type
))
1590 if (TYPE_UNSIGNED (new_type
) != TYPE_UNSIGNED (use_type
)
1591 || TYPE_PRECISION (new_type
) != TYPE_PRECISION (use_type
))
1593 /* Create NEW_TYPE->USE_TYPE conversion. */
1594 new_oprnd
= make_ssa_name (use_type
, NULL
);
1595 pattern_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
1597 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt
)) = pattern_stmt
;
1599 *type_in
= get_vectype_for_scalar_type (new_type
);
1600 *type_out
= get_vectype_for_scalar_type (use_type
);
1602 /* We created a pattern statement for the last statement in the
1603 sequence, so we don't need to associate it with the pattern
1604 statement created for PREV_STMT. Therefore, we add PREV_STMT
1605 to the list in order to mark it later in vect_pattern_recog_1. */
1607 stmts
->safe_push (prev_stmt
);
1612 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt
))
1613 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt
));
1616 *type_out
= NULL_TREE
;
1619 stmts
->safe_push (use_stmt
);
1622 /* TODO: support general case, create a conversion to the correct type. */
1625 /* Pattern detected. */
1626 if (dump_enabled_p ())
1628 dump_printf_loc (MSG_NOTE
, vect_location
,
1629 "vect_recog_over_widening_pattern: detected: ");
1630 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1631 dump_printf (MSG_NOTE
, "\n");
1634 return pattern_stmt
;
1637 /* Detect widening shift pattern:
1643 S2 a_T = (TYPE) a_t;
1644 S3 res_T = a_T << CONST;
1646 where type 'TYPE' is at least double the size of type 'type'.
1648 Also detect cases where the shift result is immediately converted
1649 to another type 'result_type' that is no larger in size than 'TYPE'.
1650 In those cases we perform a widen-shift that directly results in
1651 'result_type', to avoid a possible over-widening situation:
1655 result_type res_result;
1658 S2 a_T = (TYPE) a_t;
1659 S3 res_T = a_T << CONST;
1660 S4 res_result = (result_type) res_T;
1661 '--> res_result' = a_t w<< CONST;
1663 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1664 create an additional pattern stmt for S2 to create a variable of an
1665 intermediate type, and perform widen-shift on the intermediate type:
1669 TYPE a_T, res_T, res_T';
1672 S2 a_T = (TYPE) a_t;
1673 '--> a_it = (interm_type) a_t;
1674 S3 res_T = a_T << CONST;
1675 '--> res_T' = a_it <<* CONST;
1679 * STMTS: Contains a stmt from which the pattern search begins.
1680 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1681 in STMTS. When an intermediate type is used and a pattern statement is
1682 created for S2, we also put S2 here (before S3).
1686 * TYPE_IN: The type of the input arguments to the pattern.
1688 * TYPE_OUT: The type of the output of this pattern.
1690 * Return value: A new stmt that will be used to replace the sequence of
1691 stmts that constitute the pattern. In this case it will be:
1692 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1695 vect_recog_widen_shift_pattern (vec
<gimple
> *stmts
,
1696 tree
*type_in
, tree
*type_out
)
1698 gimple last_stmt
= stmts
->pop ();
1700 tree oprnd0
, oprnd1
;
1701 tree type
, half_type0
;
1702 gimple pattern_stmt
;
1703 tree vectype
, vectype_out
= NULL_TREE
;
1705 enum tree_code dummy_code
;
1707 vec
<tree
> dummy_vec
;
1711 if (!is_gimple_assign (last_stmt
) || !vinfo_for_stmt (last_stmt
))
1714 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt
)))
1717 if (gimple_assign_rhs_code (last_stmt
) != LSHIFT_EXPR
)
1720 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1721 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1722 if (TREE_CODE (oprnd0
) != SSA_NAME
|| TREE_CODE (oprnd1
) != INTEGER_CST
)
1725 /* Check operand 0: it has to be defined by a type promotion. */
1726 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
1731 /* Check operand 1: has to be positive. We check that it fits the type
1732 in vect_handle_widen_op_by_const (). */
1733 if (tree_int_cst_compare (oprnd1
, size_zero_node
) <= 0)
1736 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
1737 type
= gimple_expr_type (last_stmt
);
1739 /* Check for subsequent conversion to another type. */
1740 use_stmt
= vect_single_imm_use (last_stmt
);
1741 if (use_stmt
&& is_gimple_assign (use_stmt
)
1742 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
))
1743 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
1745 tree use_lhs
= gimple_assign_lhs (use_stmt
);
1746 tree use_type
= TREE_TYPE (use_lhs
);
1748 if (INTEGRAL_TYPE_P (use_type
)
1749 && TYPE_PRECISION (use_type
) <= TYPE_PRECISION (type
))
1751 last_stmt
= use_stmt
;
1756 /* Check if this a widening operation. */
1757 if (!vect_handle_widen_op_by_const (last_stmt
, LSHIFT_EXPR
, oprnd1
,
1759 type
, &half_type0
, def_stmt0
))
1762 /* Pattern detected. */
1763 if (dump_enabled_p ())
1764 dump_printf_loc (MSG_NOTE
, vect_location
,
1765 "vect_recog_widen_shift_pattern: detected:\n");
1767 /* Check target support. */
1768 vectype
= get_vectype_for_scalar_type (half_type0
);
1769 vectype_out
= get_vectype_for_scalar_type (type
);
1773 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR
, last_stmt
,
1774 vectype_out
, vectype
,
1775 &dummy_code
, &dummy_code
,
1776 &dummy_int
, &dummy_vec
))
1780 *type_out
= vectype_out
;
1782 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1783 var
= vect_recog_temp_ssa_var (type
, NULL
);
1785 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR
, var
, oprnd0
, oprnd1
);
1787 if (dump_enabled_p ())
1788 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1790 stmts
->safe_push (last_stmt
);
1791 return pattern_stmt
;
1794 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1798 S0 a_t = b_t r<< c_t;
1802 * STMTS: Contains a stmt from which the pattern search begins,
1803 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1807 S2 e_t = d_t & (B - 1);
1808 S3 f_t = b_t << c_t;
1809 S4 g_t = b_t >> e_t;
1812 where B is element bitsize of type.
1816 * TYPE_IN: The type of the input arguments to the pattern.
1818 * TYPE_OUT: The type of the output of this pattern.
1820 * Return value: A new stmt that will be used to replace the rotate
1824 vect_recog_rotate_pattern (vec
<gimple
> *stmts
, tree
*type_in
, tree
*type_out
)
1826 gimple last_stmt
= stmts
->pop ();
1827 tree oprnd0
, oprnd1
, lhs
, var
, var1
, var2
, vectype
, type
, stype
, def
, def2
;
1828 gimple pattern_stmt
, def_stmt
;
1829 enum tree_code rhs_code
;
1830 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1831 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1832 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1833 enum vect_def_type dt
;
1834 optab optab1
, optab2
;
1835 edge ext_def
= NULL
;
1837 if (!is_gimple_assign (last_stmt
))
1840 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1850 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
1853 lhs
= gimple_assign_lhs (last_stmt
);
1854 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1855 type
= TREE_TYPE (oprnd0
);
1856 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1857 if (TREE_CODE (oprnd0
) != SSA_NAME
1858 || TYPE_PRECISION (TREE_TYPE (lhs
)) != TYPE_PRECISION (type
)
1859 || !INTEGRAL_TYPE_P (type
)
1860 || !TYPE_UNSIGNED (type
))
1863 if (!vect_is_simple_use (oprnd1
, last_stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
1867 if (dt
!= vect_internal_def
1868 && dt
!= vect_constant_def
1869 && dt
!= vect_external_def
)
1872 vectype
= get_vectype_for_scalar_type (type
);
1873 if (vectype
== NULL_TREE
)
1876 /* If vector/vector or vector/scalar rotate is supported by the target,
1877 don't do anything here. */
1878 optab1
= optab_for_tree_code (rhs_code
, vectype
, optab_vector
);
1880 && optab_handler (optab1
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1883 if (bb_vinfo
!= NULL
|| dt
!= vect_internal_def
)
1885 optab2
= optab_for_tree_code (rhs_code
, vectype
, optab_scalar
);
1887 && optab_handler (optab2
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1891 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1892 don't do anything here either. */
1893 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_vector
);
1894 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_vector
);
1896 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1898 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1900 if (bb_vinfo
== NULL
&& dt
== vect_internal_def
)
1902 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_scalar
);
1903 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_scalar
);
1905 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1907 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1912 *type_out
= vectype
;
1913 if (*type_in
== NULL_TREE
)
1916 if (dt
== vect_external_def
1917 && TREE_CODE (oprnd1
) == SSA_NAME
1920 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1921 ext_def
= loop_preheader_edge (loop
);
1922 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1
))
1924 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (oprnd1
));
1926 || !dominated_by_p (CDI_DOMINATORS
, ext_def
->dest
, bb
))
1932 if (TREE_CODE (oprnd1
) == INTEGER_CST
1933 || TYPE_MODE (TREE_TYPE (oprnd1
)) == TYPE_MODE (type
))
1935 else if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
1937 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
1938 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (type
)
1939 && TYPE_PRECISION (TREE_TYPE (rhs1
))
1940 == TYPE_PRECISION (type
))
1944 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
1945 if (def
== NULL_TREE
)
1947 def
= vect_recog_temp_ssa_var (type
, NULL
);
1948 def_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, def
, oprnd1
,
1953 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1954 gcc_assert (!new_bb
);
1957 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1959 stype
= TREE_TYPE (def
);
1961 if (TREE_CODE (def
) == INTEGER_CST
)
1963 if (!tree_fits_uhwi_p (def
)
1964 || tree_to_uhwi (def
) >= GET_MODE_PRECISION (TYPE_MODE (type
))
1965 || integer_zerop (def
))
1967 def2
= build_int_cst (stype
,
1968 GET_MODE_PRECISION (TYPE_MODE (type
))
1969 - tree_to_uhwi (def
));
1973 tree vecstype
= get_vectype_for_scalar_type (stype
);
1974 stmt_vec_info def_stmt_vinfo
;
1976 if (vecstype
== NULL_TREE
)
1978 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1979 def_stmt
= gimple_build_assign_with_ops (NEGATE_EXPR
, def2
, def
,
1984 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1985 gcc_assert (!new_bb
);
1989 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
1990 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1991 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
1992 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1995 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1997 = build_int_cst (stype
, GET_MODE_PRECISION (TYPE_MODE (stype
)) - 1);
1998 def_stmt
= gimple_build_assign_with_ops (BIT_AND_EXPR
, def2
,
1999 gimple_assign_lhs (def_stmt
),
2004 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
2005 gcc_assert (!new_bb
);
2009 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2010 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2011 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
2012 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2016 var1
= vect_recog_temp_ssa_var (type
, NULL
);
2017 def_stmt
= gimple_build_assign_with_ops (rhs_code
== LROTATE_EXPR
2018 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
2020 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2022 var2
= vect_recog_temp_ssa_var (type
, NULL
);
2023 def_stmt
= gimple_build_assign_with_ops (rhs_code
== LROTATE_EXPR
2024 ? RSHIFT_EXPR
: LSHIFT_EXPR
,
2025 var2
, oprnd0
, def2
);
2026 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2028 /* Pattern detected. */
2029 if (dump_enabled_p ())
2030 dump_printf_loc (MSG_NOTE
, vect_location
,
2031 "vect_recog_rotate_pattern: detected:\n");
2033 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2034 var
= vect_recog_temp_ssa_var (type
, NULL
);
2035 pattern_stmt
= gimple_build_assign_with_ops (BIT_IOR_EXPR
, var
, var1
, var2
);
2037 if (dump_enabled_p ())
2038 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
2040 stmts
->safe_push (last_stmt
);
2041 return pattern_stmt
;
2044 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2052 S3 res_T = b_T op a_t;
2054 where type 'TYPE' is a type with different size than 'type',
2055 and op is <<, >> or rotate.
2060 TYPE b_T, c_T, res_T;
2063 S1 a_t = (type) c_T;
2065 S3 res_T = b_T op a_t;
2069 * STMTS: Contains a stmt from which the pattern search begins,
2070 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2071 with a shift/rotate which has same type on both operands, in the
2072 second case just b_T op c_T, in the first case with added cast
2073 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2077 * TYPE_IN: The type of the input arguments to the pattern.
2079 * TYPE_OUT: The type of the output of this pattern.
2081 * Return value: A new stmt that will be used to replace the shift/rotate
2085 vect_recog_vector_vector_shift_pattern (vec
<gimple
> *stmts
,
2086 tree
*type_in
, tree
*type_out
)
2088 gimple last_stmt
= stmts
->pop ();
2089 tree oprnd0
, oprnd1
, lhs
, var
;
2090 gimple pattern_stmt
, def_stmt
;
2091 enum tree_code rhs_code
;
2092 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2093 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2094 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2095 enum vect_def_type dt
;
2098 if (!is_gimple_assign (last_stmt
))
2101 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2113 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2116 lhs
= gimple_assign_lhs (last_stmt
);
2117 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2118 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2119 if (TREE_CODE (oprnd0
) != SSA_NAME
2120 || TREE_CODE (oprnd1
) != SSA_NAME
2121 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
2122 || TYPE_PRECISION (TREE_TYPE (oprnd1
))
2123 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1
)))
2124 || TYPE_PRECISION (TREE_TYPE (lhs
))
2125 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2128 if (!vect_is_simple_use (oprnd1
, last_stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
2132 if (dt
!= vect_internal_def
)
2135 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (oprnd0
));
2136 *type_out
= *type_in
;
2137 if (*type_in
== NULL_TREE
)
2141 if (gimple_assign_cast_p (def_stmt
))
2143 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
2144 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
2145 && TYPE_PRECISION (TREE_TYPE (rhs1
))
2146 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2150 if (def
== NULL_TREE
)
2152 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2153 def_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, def
, oprnd1
,
2155 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2158 /* Pattern detected. */
2159 if (dump_enabled_p ())
2160 dump_printf_loc (MSG_NOTE
, vect_location
,
2161 "vect_recog_vector_vector_shift_pattern: detected:\n");
2163 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2164 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2165 pattern_stmt
= gimple_build_assign_with_ops (rhs_code
, var
, oprnd0
, def
);
2167 if (dump_enabled_p ())
2168 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
2170 stmts
->safe_push (last_stmt
);
2171 return pattern_stmt
;
2174 /* Detect a signed division by a constant that wouldn't be
2175 otherwise vectorized:
2181 where type 'type' is an integral type and N is a constant.
2183 Similarly handle modulo by a constant:
2189 * STMTS: Contains a stmt from which the pattern search begins,
2190 i.e. the division stmt. S1 is replaced by if N is a power
2191 of two constant and type is signed:
2192 S3 y_t = b_t < 0 ? N - 1 : 0;
2194 S1' a_t = x_t >> log2 (N);
2196 S4 is replaced if N is a power of two constant and
2197 type is signed by (where *_T temporaries have unsigned type):
2198 S9 y_T = b_t < 0 ? -1U : 0U;
2199 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2200 S7 z_t = (type) z_T;
2202 S5 x_t = w_t & (N - 1);
2203 S4' a_t = x_t - z_t;
2207 * TYPE_IN: The type of the input arguments to the pattern.
2209 * TYPE_OUT: The type of the output of this pattern.
2211 * Return value: A new stmt that will be used to replace the division
2212 S1 or modulo S4 stmt. */
2215 vect_recog_divmod_pattern (vec
<gimple
> *stmts
,
2216 tree
*type_in
, tree
*type_out
)
2218 gimple last_stmt
= stmts
->pop ();
2219 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
2220 gimple pattern_stmt
, def_stmt
;
2221 enum tree_code rhs_code
;
2222 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2223 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2224 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2227 int dummy_int
, prec
;
2228 stmt_vec_info def_stmt_vinfo
;
2230 if (!is_gimple_assign (last_stmt
))
2233 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2236 case TRUNC_DIV_EXPR
:
2237 case TRUNC_MOD_EXPR
:
2243 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2246 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2247 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2248 itype
= TREE_TYPE (oprnd0
);
2249 if (TREE_CODE (oprnd0
) != SSA_NAME
2250 || TREE_CODE (oprnd1
) != INTEGER_CST
2251 || TREE_CODE (itype
) != INTEGER_TYPE
2252 || TYPE_PRECISION (itype
) != GET_MODE_PRECISION (TYPE_MODE (itype
)))
2255 vectype
= get_vectype_for_scalar_type (itype
);
2256 if (vectype
== NULL_TREE
)
2259 /* If the target can handle vectorized division or modulo natively,
2260 don't attempt to optimize this. */
2261 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
2262 if (optab
!= unknown_optab
)
2264 machine_mode vec_mode
= TYPE_MODE (vectype
);
2265 int icode
= (int) optab_handler (optab
, vec_mode
);
2266 if (icode
!= CODE_FOR_nothing
)
2270 prec
= TYPE_PRECISION (itype
);
2271 if (integer_pow2p (oprnd1
))
2273 if (TYPE_UNSIGNED (itype
) || tree_int_cst_sgn (oprnd1
) != 1)
2276 /* Pattern detected. */
2277 if (dump_enabled_p ())
2278 dump_printf_loc (MSG_NOTE
, vect_location
,
2279 "vect_recog_divmod_pattern: detected:\n");
2281 cond
= build2 (LT_EXPR
, boolean_type_node
, oprnd0
,
2282 build_int_cst (itype
, 0));
2283 if (rhs_code
== TRUNC_DIV_EXPR
)
2285 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
2288 = gimple_build_assign_with_ops (COND_EXPR
, var
, cond
,
2289 fold_build2 (MINUS_EXPR
, itype
,
2291 build_int_cst (itype
,
2293 build_int_cst (itype
, 0));
2294 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2295 var
= vect_recog_temp_ssa_var (itype
, NULL
);
2297 = gimple_build_assign_with_ops (PLUS_EXPR
, var
, oprnd0
,
2298 gimple_assign_lhs (def_stmt
));
2299 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2301 shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
2303 = gimple_build_assign_with_ops (RSHIFT_EXPR
,
2304 vect_recog_temp_ssa_var (itype
,
2311 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2312 if (compare_tree_int (oprnd1
, 2) == 0)
2314 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2316 = gimple_build_assign_with_ops (COND_EXPR
, signmask
, cond
,
2317 build_int_cst (itype
, 1),
2318 build_int_cst (itype
, 0));
2319 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2324 = build_nonstandard_integer_type (prec
, 1);
2325 tree vecutype
= get_vectype_for_scalar_type (utype
);
2327 = build_int_cst (utype
, GET_MODE_BITSIZE (TYPE_MODE (itype
))
2328 - tree_log2 (oprnd1
));
2329 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
2332 = gimple_build_assign_with_ops (COND_EXPR
, var
, cond
,
2333 build_int_cst (utype
, -1),
2334 build_int_cst (utype
, 0));
2336 = new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2337 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2338 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2339 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2340 var
= vect_recog_temp_ssa_var (utype
, NULL
);
2342 = gimple_build_assign_with_ops (RSHIFT_EXPR
, var
,
2343 gimple_assign_lhs (def_stmt
),
2346 = new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2347 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2348 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2349 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2350 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2352 = gimple_build_assign_with_ops (NOP_EXPR
, signmask
, var
,
2354 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2357 = gimple_build_assign_with_ops (PLUS_EXPR
,
2358 vect_recog_temp_ssa_var (itype
,
2361 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2363 = gimple_build_assign_with_ops (BIT_AND_EXPR
,
2364 vect_recog_temp_ssa_var (itype
,
2366 gimple_assign_lhs (def_stmt
),
2367 fold_build2 (MINUS_EXPR
, itype
,
2369 build_int_cst (itype
,
2371 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2374 = gimple_build_assign_with_ops (MINUS_EXPR
,
2375 vect_recog_temp_ssa_var (itype
,
2377 gimple_assign_lhs (def_stmt
),
2381 if (dump_enabled_p ())
2382 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
,
2385 stmts
->safe_push (last_stmt
);
2388 *type_out
= vectype
;
2389 return pattern_stmt
;
2392 if (prec
> HOST_BITS_PER_WIDE_INT
2393 || integer_zerop (oprnd1
))
2396 if (!can_mult_highpart_p (TYPE_MODE (vectype
), TYPE_UNSIGNED (itype
)))
2399 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2401 if (TYPE_UNSIGNED (itype
))
2403 unsigned HOST_WIDE_INT mh
, ml
;
2404 int pre_shift
, post_shift
;
2405 unsigned HOST_WIDE_INT d
= (TREE_INT_CST_LOW (oprnd1
)
2406 & GET_MODE_MASK (TYPE_MODE (itype
)));
2407 tree t1
, t2
, t3
, t4
;
2409 if (d
>= ((unsigned HOST_WIDE_INT
) 1 << (prec
- 1)))
2410 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2413 /* Find a suitable multiplier and right shift count
2414 instead of multiplying with D. */
2415 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
2417 /* If the suggested multiplier is more than SIZE bits, we can do better
2418 for even divisors, using an initial right shift. */
2419 if (mh
!= 0 && (d
& 1) == 0)
2421 pre_shift
= floor_log2 (d
& -d
);
2422 mh
= choose_multiplier (d
>> pre_shift
, prec
, prec
- pre_shift
,
2423 &ml
, &post_shift
, &dummy_int
);
2431 if (post_shift
- 1 >= prec
)
2434 /* t1 = oprnd0 h* ml;
2438 q = t4 >> (post_shift - 1); */
2439 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2441 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR
, t1
, oprnd0
,
2442 build_int_cst (itype
, ml
));
2443 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2445 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2447 = gimple_build_assign_with_ops (MINUS_EXPR
, t2
, oprnd0
, t1
);
2448 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2450 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2452 = gimple_build_assign_with_ops (RSHIFT_EXPR
, t3
, t2
,
2454 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2456 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2458 = gimple_build_assign_with_ops (PLUS_EXPR
, t4
, t1
, t3
);
2460 if (post_shift
!= 1)
2462 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2464 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2466 = gimple_build_assign_with_ops (RSHIFT_EXPR
, q
, t4
,
2467 build_int_cst (itype
,
2474 pattern_stmt
= def_stmt
;
2479 if (pre_shift
>= prec
|| post_shift
>= prec
)
2482 /* t1 = oprnd0 >> pre_shift;
2484 q = t2 >> post_shift; */
2487 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2489 = gimple_build_assign_with_ops (RSHIFT_EXPR
, t1
, oprnd0
,
2490 build_int_cst (NULL
,
2492 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2497 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2499 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR
, t2
, t1
,
2500 build_int_cst (itype
, ml
));
2504 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2506 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2508 = gimple_build_assign_with_ops (RSHIFT_EXPR
, q
, t2
,
2509 build_int_cst (itype
,
2515 pattern_stmt
= def_stmt
;
2520 unsigned HOST_WIDE_INT ml
;
2522 HOST_WIDE_INT d
= TREE_INT_CST_LOW (oprnd1
);
2523 unsigned HOST_WIDE_INT abs_d
;
2525 tree t1
, t2
, t3
, t4
;
2527 /* Give up for -1. */
2531 /* Since d might be INT_MIN, we have to cast to
2532 unsigned HOST_WIDE_INT before negating to avoid
2533 undefined signed overflow. */
2535 ? (unsigned HOST_WIDE_INT
) d
2536 : - (unsigned HOST_WIDE_INT
) d
);
2538 /* n rem d = n rem -d */
2539 if (rhs_code
== TRUNC_MOD_EXPR
&& d
< 0)
2542 oprnd1
= build_int_cst (itype
, abs_d
);
2544 else if (HOST_BITS_PER_WIDE_INT
>= prec
2545 && abs_d
== (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2546 /* This case is not handled correctly below. */
2549 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
, &post_shift
, &dummy_int
);
2550 if (ml
>= (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2553 ml
|= (~(unsigned HOST_WIDE_INT
) 0) << (prec
- 1);
2555 if (post_shift
>= prec
)
2558 /* t1 = oprnd0 h* ml; */
2559 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2561 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR
, t1
, oprnd0
,
2562 build_int_cst (itype
, ml
));
2566 /* t2 = t1 + oprnd0; */
2567 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2568 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2570 = gimple_build_assign_with_ops (PLUS_EXPR
, t2
, t1
, oprnd0
);
2577 /* t3 = t2 >> post_shift; */
2578 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2579 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2581 = gimple_build_assign_with_ops (RSHIFT_EXPR
, t3
, t2
,
2582 build_int_cst (itype
, post_shift
));
2587 wide_int oprnd0_min
, oprnd0_max
;
2589 if (get_range_info (oprnd0
, &oprnd0_min
, &oprnd0_max
) == VR_RANGE
)
2591 if (!wi::neg_p (oprnd0_min
, TYPE_SIGN (itype
)))
2593 else if (wi::neg_p (oprnd0_max
, TYPE_SIGN (itype
)))
2597 if (msb
== 0 && d
>= 0)
2601 pattern_stmt
= def_stmt
;
2605 /* t4 = oprnd0 >> (prec - 1);
2606 or if we know from VRP that oprnd0 >= 0
2608 or if we know from VRP that oprnd0 < 0
2610 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2611 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2614 = gimple_build_assign_with_ops (INTEGER_CST
,
2615 t4
, build_int_cst (itype
, msb
),
2619 = gimple_build_assign_with_ops (RSHIFT_EXPR
, t4
, oprnd0
,
2620 build_int_cst (itype
, prec
- 1));
2621 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2623 /* q = t3 - t4; or q = t4 - t3; */
2624 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2626 = gimple_build_assign_with_ops (MINUS_EXPR
, q
, d
< 0 ? t4
: t3
,
2631 if (rhs_code
== TRUNC_MOD_EXPR
)
2635 /* We divided. Now finish by:
2638 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2640 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2642 = gimple_build_assign_with_ops (MULT_EXPR
, t1
, q
, oprnd1
);
2643 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2645 r
= vect_recog_temp_ssa_var (itype
, NULL
);
2647 = gimple_build_assign_with_ops (MINUS_EXPR
, r
, oprnd0
, t1
);
2650 /* Pattern detected. */
2651 if (dump_enabled_p ())
2653 dump_printf_loc (MSG_NOTE
, vect_location
,
2654 "vect_recog_divmod_pattern: detected: ");
2655 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
2656 dump_printf (MSG_NOTE
, "\n");
2659 stmts
->safe_push (last_stmt
);
2662 *type_out
= vectype
;
2663 return pattern_stmt
;
2666 /* Function vect_recog_mixed_size_cond_pattern
2668 Try to find the following pattern:
2673 S1 a_T = x_t CMP y_t ? b_T : c_T;
2675 where type 'TYPE' is an integral type which has different size
2676 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2677 than 'type', the constants need to fit into an integer type
2678 with the same width as 'type') or results of conversion from 'type'.
2682 * LAST_STMT: A stmt from which the pattern search begins.
2686 * TYPE_IN: The type of the input arguments to the pattern.
2688 * TYPE_OUT: The type of the output of this pattern.
2690 * Return value: A new stmt that will be used to replace the pattern.
2691 Additionally a def_stmt is added.
2693 a_it = x_t CMP y_t ? b_it : c_it;
2694 a_T = (TYPE) a_it; */
2697 vect_recog_mixed_size_cond_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
2700 gimple last_stmt
= (*stmts
)[0];
2701 tree cond_expr
, then_clause
, else_clause
;
2702 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
), def_stmt_info
;
2703 tree type
, vectype
, comp_vectype
, itype
= NULL_TREE
, vecitype
;
2704 machine_mode cmpmode
;
2705 gimple pattern_stmt
, def_stmt
;
2706 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2707 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2708 tree orig_type0
= NULL_TREE
, orig_type1
= NULL_TREE
;
2709 gimple def_stmt0
= NULL
, def_stmt1
= NULL
;
2711 tree comp_scalar_type
;
2713 if (!is_gimple_assign (last_stmt
)
2714 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
2715 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
2718 cond_expr
= gimple_assign_rhs1 (last_stmt
);
2719 then_clause
= gimple_assign_rhs2 (last_stmt
);
2720 else_clause
= gimple_assign_rhs3 (last_stmt
);
2722 if (!COMPARISON_CLASS_P (cond_expr
))
2725 comp_scalar_type
= TREE_TYPE (TREE_OPERAND (cond_expr
, 0));
2726 comp_vectype
= get_vectype_for_scalar_type (comp_scalar_type
);
2727 if (comp_vectype
== NULL_TREE
)
2730 type
= gimple_expr_type (last_stmt
);
2731 if (types_compatible_p (type
, comp_scalar_type
)
2732 || ((TREE_CODE (then_clause
) != INTEGER_CST
2733 || TREE_CODE (else_clause
) != INTEGER_CST
)
2734 && !INTEGRAL_TYPE_P (comp_scalar_type
))
2735 || !INTEGRAL_TYPE_P (type
))
2738 if ((TREE_CODE (then_clause
) != INTEGER_CST
2739 && !type_conversion_p (then_clause
, last_stmt
, false, &orig_type0
,
2740 &def_stmt0
, &promotion
))
2741 || (TREE_CODE (else_clause
) != INTEGER_CST
2742 && !type_conversion_p (else_clause
, last_stmt
, false, &orig_type1
,
2743 &def_stmt1
, &promotion
)))
2746 if (orig_type0
&& orig_type1
2747 && !types_compatible_p (orig_type0
, orig_type1
))
2752 if (!types_compatible_p (orig_type0
, comp_scalar_type
))
2754 then_clause
= gimple_assign_rhs1 (def_stmt0
);
2760 if (!types_compatible_p (orig_type1
, comp_scalar_type
))
2762 else_clause
= gimple_assign_rhs1 (def_stmt1
);
2766 cmpmode
= GET_MODE_INNER (TYPE_MODE (comp_vectype
));
2768 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) == GET_MODE_BITSIZE (cmpmode
))
2771 vectype
= get_vectype_for_scalar_type (type
);
2772 if (vectype
== NULL_TREE
)
2775 if (expand_vec_cond_expr_p (vectype
, comp_vectype
))
2778 if (itype
== NULL_TREE
)
2779 itype
= build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode
),
2780 TYPE_UNSIGNED (type
));
2782 if (itype
== NULL_TREE
2783 || GET_MODE_BITSIZE (TYPE_MODE (itype
)) != GET_MODE_BITSIZE (cmpmode
))
2786 vecitype
= get_vectype_for_scalar_type (itype
);
2787 if (vecitype
== NULL_TREE
)
2790 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
))
2793 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) > GET_MODE_BITSIZE (cmpmode
))
2795 if ((TREE_CODE (then_clause
) == INTEGER_CST
2796 && !int_fits_type_p (then_clause
, itype
))
2797 || (TREE_CODE (else_clause
) == INTEGER_CST
2798 && !int_fits_type_p (else_clause
, itype
)))
2803 = gimple_build_assign_with_ops (COND_EXPR
,
2804 vect_recog_temp_ssa_var (itype
, NULL
),
2805 unshare_expr (cond_expr
),
2806 fold_convert (itype
, then_clause
),
2807 fold_convert (itype
, else_clause
));
2809 = gimple_build_assign_with_ops (NOP_EXPR
,
2810 vect_recog_temp_ssa_var (type
, NULL
),
2811 gimple_assign_lhs (def_stmt
), NULL_TREE
);
2813 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2814 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2815 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
2816 STMT_VINFO_VECTYPE (def_stmt_info
) = vecitype
;
2817 *type_in
= vecitype
;
2818 *type_out
= vectype
;
2820 if (dump_enabled_p ())
2821 dump_printf_loc (MSG_NOTE
, vect_location
,
2822 "vect_recog_mixed_size_cond_pattern: detected:\n");
2824 return pattern_stmt
;
2828 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2829 true if bool VAR can be optimized that way. */
2832 check_bool_pattern (tree var
, loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
2835 enum vect_def_type dt
;
2837 enum tree_code rhs_code
;
2839 if (!vect_is_simple_use (var
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
, &def
,
2843 if (dt
!= vect_internal_def
)
2846 if (!is_gimple_assign (def_stmt
))
2849 if (!has_single_use (def
))
2852 rhs1
= gimple_assign_rhs1 (def_stmt
);
2853 rhs_code
= gimple_assign_rhs_code (def_stmt
);
2857 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2860 if ((TYPE_PRECISION (TREE_TYPE (rhs1
)) != 1
2861 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
2862 && TREE_CODE (TREE_TYPE (rhs1
)) != BOOLEAN_TYPE
)
2864 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2867 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2872 if (!check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
))
2874 return check_bool_pattern (gimple_assign_rhs2 (def_stmt
), loop_vinfo
,
2878 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
2880 tree vecitype
, comp_vectype
;
2882 /* If the comparison can throw, then is_gimple_condexpr will be
2883 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2884 if (stmt_could_throw_p (def_stmt
))
2887 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
2888 if (comp_vectype
== NULL_TREE
)
2891 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
2893 machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
2895 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
2896 vecitype
= get_vectype_for_scalar_type (itype
);
2897 if (vecitype
== NULL_TREE
)
2901 vecitype
= comp_vectype
;
2902 return expand_vec_cond_expr_p (vecitype
, comp_vectype
);
2909 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2910 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2911 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2914 adjust_bool_pattern_cast (tree type
, tree var
)
2916 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (SSA_NAME_DEF_STMT (var
));
2917 gimple cast_stmt
, pattern_stmt
;
2919 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
));
2920 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
2921 new_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2923 = gimple_build_assign_with_ops (NOP_EXPR
,
2924 vect_recog_temp_ssa_var (type
, NULL
),
2925 gimple_assign_lhs (pattern_stmt
),
2927 STMT_VINFO_RELATED_STMT (stmt_vinfo
) = cast_stmt
;
2928 return gimple_assign_lhs (cast_stmt
);
2932 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2933 recursively. VAR is an SSA_NAME that should be transformed from bool
2934 to a wider integer type, OUT_TYPE is the desired final integer type of
2935 the whole pattern, TRUEVAL should be NULL unless optimizing
2936 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2937 in the then_clause, STMTS is where statements with added pattern stmts
2938 should be pushed to. */
2941 adjust_bool_pattern (tree var
, tree out_type
, tree trueval
,
2944 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2945 enum tree_code rhs_code
, def_rhs_code
;
2946 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
2948 gimple pattern_stmt
, def_stmt
;
2950 rhs1
= gimple_assign_rhs1 (stmt
);
2951 rhs2
= gimple_assign_rhs2 (stmt
);
2952 rhs_code
= gimple_assign_rhs_code (stmt
);
2953 loc
= gimple_location (stmt
);
2958 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2959 itype
= TREE_TYPE (irhs1
);
2961 = gimple_build_assign_with_ops (SSA_NAME
,
2962 vect_recog_temp_ssa_var (itype
, NULL
),
2967 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2968 itype
= TREE_TYPE (irhs1
);
2970 = gimple_build_assign_with_ops (BIT_XOR_EXPR
,
2971 vect_recog_temp_ssa_var (itype
, NULL
),
2972 irhs1
, build_int_cst (itype
, 1));
2976 /* Try to optimize x = y & (a < b ? 1 : 0); into
2977 x = (a < b ? y : 0);
2983 S1 a_b = x1 CMP1 y1;
2984 S2 b_b = x2 CMP2 y2;
2986 S4 d_T = (TYPE) c_b;
2988 we would normally emit:
2990 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2991 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2992 S3' c_T = a_T & b_T;
2995 but we can save one stmt by using the
2996 result of one of the COND_EXPRs in the other COND_EXPR and leave
2997 BIT_AND_EXPR stmt out:
2999 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3000 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3003 At least when VEC_COND_EXPR is implemented using masks
3004 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3005 computes the comparison masks and ands it, in one case with
3006 all ones vector, in the other case with a vector register.
3007 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3008 often more expensive. */
3009 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
3010 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3011 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3013 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3014 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3015 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3016 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
3019 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
3020 irhs2
= adjust_bool_pattern (rhs2
, out_type
, irhs1
, stmts
);
3021 tstmt
= stmts
->pop ();
3022 gcc_assert (tstmt
== def_stmt
);
3023 stmts
->quick_push (stmt
);
3024 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
3025 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
3026 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
3027 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
3031 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
3034 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
3035 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3036 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3038 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3039 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
3040 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
3041 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
3044 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
3045 irhs1
= adjust_bool_pattern (rhs1
, out_type
, irhs2
, stmts
);
3046 tstmt
= stmts
->pop ();
3047 gcc_assert (tstmt
== def_stmt
);
3048 stmts
->quick_push (stmt
);
3049 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
3050 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
3051 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
3052 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
3056 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3062 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3063 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
3065 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3066 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
3068 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
3069 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
3070 int out_prec
= TYPE_PRECISION (out_type
);
3071 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
3072 irhs2
= adjust_bool_pattern_cast (TREE_TYPE (irhs1
), rhs2
);
3073 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
3074 irhs1
= adjust_bool_pattern_cast (TREE_TYPE (irhs2
), rhs1
);
3077 irhs1
= adjust_bool_pattern_cast (out_type
, rhs1
);
3078 irhs2
= adjust_bool_pattern_cast (out_type
, rhs2
);
3081 itype
= TREE_TYPE (irhs1
);
3083 = gimple_build_assign_with_ops (rhs_code
,
3084 vect_recog_temp_ssa_var (itype
, NULL
),
3089 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
3090 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
3091 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
))
3092 || (TYPE_PRECISION (TREE_TYPE (rhs1
))
3093 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1
)))))
3095 machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
3097 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3100 itype
= TREE_TYPE (rhs1
);
3101 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
3102 if (trueval
== NULL_TREE
)
3103 trueval
= build_int_cst (itype
, 1);
3105 gcc_checking_assert (useless_type_conversion_p (itype
,
3106 TREE_TYPE (trueval
)));
3108 = gimple_build_assign_with_ops (COND_EXPR
,
3109 vect_recog_temp_ssa_var (itype
, NULL
),
3111 build_int_cst (itype
, 0));
3115 stmts
->safe_push (stmt
);
3116 gimple_set_location (pattern_stmt
, loc
);
3117 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
3118 return gimple_assign_lhs (pattern_stmt
);
3122 /* Function vect_recog_bool_pattern
3124 Try to find pattern like following:
3126 bool a_b, b_b, c_b, d_b, e_b;
3129 S1 a_b = x1 CMP1 y1;
3130 S2 b_b = x2 CMP2 y2;
3132 S4 d_b = x3 CMP3 y3;
3134 S6 f_T = (TYPE) e_b;
3136 where type 'TYPE' is an integral type. Or a similar pattern
3139 S6 f_Y = e_b ? r_Y : s_Y;
3141 as results from if-conversion of a complex condition.
3145 * LAST_STMT: A stmt at the end from which the pattern
3146 search begins, i.e. cast of a bool to
3151 * TYPE_IN: The type of the input arguments to the pattern.
3153 * TYPE_OUT: The type of the output of this pattern.
3155 * Return value: A new stmt that will be used to replace the pattern.
3157 Assuming size of TYPE is the same as size of all comparisons
3158 (otherwise some casts would be added where needed), the above
3159 sequence we create related pattern stmts:
3160 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3161 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3162 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3163 S5' e_T = c_T | d_T;
3166 Instead of the above S3' we could emit:
3167 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3168 S3' c_T = a_T | b_T;
3169 but the above is more efficient. */
3172 vect_recog_bool_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
3175 gimple last_stmt
= stmts
->pop ();
3176 enum tree_code rhs_code
;
3177 tree var
, lhs
, rhs
, vectype
;
3178 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
3179 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
3180 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
3181 gimple pattern_stmt
;
3183 if (!is_gimple_assign (last_stmt
))
3186 var
= gimple_assign_rhs1 (last_stmt
);
3187 lhs
= gimple_assign_lhs (last_stmt
);
3189 if ((TYPE_PRECISION (TREE_TYPE (var
)) != 1
3190 || !TYPE_UNSIGNED (TREE_TYPE (var
)))
3191 && TREE_CODE (TREE_TYPE (var
)) != BOOLEAN_TYPE
)
3194 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3195 if (CONVERT_EXPR_CODE_P (rhs_code
))
3197 if (TREE_CODE (TREE_TYPE (lhs
)) != INTEGER_TYPE
3198 || TYPE_PRECISION (TREE_TYPE (lhs
)) == 1)
3200 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3201 if (vectype
== NULL_TREE
)
3204 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
3207 rhs
= adjust_bool_pattern (var
, TREE_TYPE (lhs
), NULL_TREE
, stmts
);
3208 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3209 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3211 = gimple_build_assign_with_ops (SSA_NAME
, lhs
, rhs
, NULL_TREE
);
3214 = gimple_build_assign_with_ops (NOP_EXPR
, lhs
, rhs
, NULL_TREE
);
3215 *type_out
= vectype
;
3217 stmts
->safe_push (last_stmt
);
3218 if (dump_enabled_p ())
3219 dump_printf_loc (MSG_NOTE
, vect_location
,
3220 "vect_recog_bool_pattern: detected:\n");
3222 return pattern_stmt
;
3224 else if (rhs_code
== COND_EXPR
3225 && TREE_CODE (var
) == SSA_NAME
)
3227 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3228 if (vectype
== NULL_TREE
)
3231 /* Build a scalar type for the boolean result that when
3232 vectorized matches the vector type of the result in
3233 size and number of elements. */
3235 = wi::udiv_trunc (TYPE_SIZE (vectype
),
3236 TYPE_VECTOR_SUBPARTS (vectype
)).to_uhwi ();
3238 = build_nonstandard_integer_type (prec
,
3239 TYPE_UNSIGNED (TREE_TYPE (var
)));
3240 if (get_vectype_for_scalar_type (type
) == NULL_TREE
)
3243 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
3246 rhs
= adjust_bool_pattern (var
, type
, NULL_TREE
, stmts
);
3247 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3249 = gimple_build_assign_with_ops (COND_EXPR
, lhs
,
3250 build2 (NE_EXPR
, boolean_type_node
,
3251 rhs
, build_int_cst (type
, 0)),
3252 gimple_assign_rhs2 (last_stmt
),
3253 gimple_assign_rhs3 (last_stmt
));
3254 *type_out
= vectype
;
3256 stmts
->safe_push (last_stmt
);
3257 if (dump_enabled_p ())
3258 dump_printf_loc (MSG_NOTE
, vect_location
,
3259 "vect_recog_bool_pattern: detected:\n");
3261 return pattern_stmt
;
3263 else if (rhs_code
== SSA_NAME
3264 && STMT_VINFO_DATA_REF (stmt_vinfo
))
3266 stmt_vec_info pattern_stmt_info
;
3267 vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
3268 gcc_assert (vectype
!= NULL_TREE
);
3269 if (!VECTOR_MODE_P (TYPE_MODE (vectype
)))
3271 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
3274 rhs
= adjust_bool_pattern (var
, TREE_TYPE (vectype
), NULL_TREE
, stmts
);
3275 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
3276 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3278 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3280 = gimple_build_assign_with_ops (NOP_EXPR
, rhs2
, rhs
, NULL_TREE
);
3281 new_pattern_def_seq (stmt_vinfo
, cast_stmt
);
3285 = gimple_build_assign_with_ops (SSA_NAME
, lhs
, rhs
, NULL_TREE
);
3286 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
,
3288 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3289 STMT_VINFO_DATA_REF (pattern_stmt_info
)
3290 = STMT_VINFO_DATA_REF (stmt_vinfo
);
3291 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info
)
3292 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo
);
3293 STMT_VINFO_DR_INIT (pattern_stmt_info
) = STMT_VINFO_DR_INIT (stmt_vinfo
);
3294 STMT_VINFO_DR_OFFSET (pattern_stmt_info
)
3295 = STMT_VINFO_DR_OFFSET (stmt_vinfo
);
3296 STMT_VINFO_DR_STEP (pattern_stmt_info
) = STMT_VINFO_DR_STEP (stmt_vinfo
);
3297 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info
)
3298 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo
);
3299 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo
)) = pattern_stmt
;
3300 *type_out
= vectype
;
3302 stmts
->safe_push (last_stmt
);
3303 if (dump_enabled_p ())
3304 dump_printf_loc (MSG_NOTE
, vect_location
,
3305 "vect_recog_bool_pattern: detected:\n");
3306 return pattern_stmt
;
3313 /* Mark statements that are involved in a pattern. */
3316 vect_mark_pattern_stmts (gimple orig_stmt
, gimple pattern_stmt
,
3317 tree pattern_vectype
)
3319 stmt_vec_info pattern_stmt_info
, def_stmt_info
;
3320 stmt_vec_info orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
3321 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (orig_stmt_info
);
3322 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (orig_stmt_info
);
3325 pattern_stmt_info
= vinfo_for_stmt (pattern_stmt
);
3326 if (pattern_stmt_info
== NULL
)
3328 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
,
3330 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3332 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt
));
3334 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt
;
3335 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
3336 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
3337 STMT_VINFO_VECTYPE (pattern_stmt_info
) = pattern_vectype
;
3338 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
3339 STMT_VINFO_RELATED_STMT (orig_stmt_info
) = pattern_stmt
;
3340 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
)
3341 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
3342 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
))
3344 gimple_stmt_iterator si
;
3345 for (si
= gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
));
3346 !gsi_end_p (si
); gsi_next (&si
))
3348 def_stmt
= gsi_stmt (si
);
3349 def_stmt_info
= vinfo_for_stmt (def_stmt
);
3350 if (def_stmt_info
== NULL
)
3352 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
,
3354 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
3356 gimple_set_bb (def_stmt
, gimple_bb (orig_stmt
));
3357 STMT_VINFO_RELATED_STMT (def_stmt_info
) = orig_stmt
;
3358 STMT_VINFO_DEF_TYPE (def_stmt_info
) = vect_internal_def
;
3359 if (STMT_VINFO_VECTYPE (def_stmt_info
) == NULL_TREE
)
3360 STMT_VINFO_VECTYPE (def_stmt_info
) = pattern_vectype
;
3365 /* Function vect_pattern_recog_1
3368 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3369 computation pattern.
3370 STMT: A stmt from which the pattern search should start.
3372 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3373 expression that computes the same functionality and can be used to
3374 replace the sequence of stmts that are involved in the pattern.
3377 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3378 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3379 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3380 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3381 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3382 to the available target pattern.
3384 This function also does some bookkeeping, as explained in the documentation
3385 for vect_recog_pattern. */
3388 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func
,
3389 gimple_stmt_iterator si
,
3390 vec
<gimple
> *stmts_to_replace
)
3392 gimple stmt
= gsi_stmt (si
), pattern_stmt
;
3393 stmt_vec_info stmt_info
;
3394 loop_vec_info loop_vinfo
;
3395 tree pattern_vectype
;
3396 tree type_in
, type_out
;
3397 enum tree_code code
;
3401 stmts_to_replace
->truncate (0);
3402 stmts_to_replace
->quick_push (stmt
);
3403 pattern_stmt
= (* vect_recog_func
) (stmts_to_replace
, &type_in
, &type_out
);
3407 stmt
= stmts_to_replace
->last ();
3408 stmt_info
= vinfo_for_stmt (stmt
);
3409 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3411 if (VECTOR_MODE_P (TYPE_MODE (type_in
)))
3413 /* No need to check target support (already checked by the pattern
3414 recognition function). */
3415 pattern_vectype
= type_out
? type_out
: type_in
;
3419 machine_mode vec_mode
;
3420 enum insn_code icode
;
3423 /* Check target support */
3424 type_in
= get_vectype_for_scalar_type (type_in
);
3428 type_out
= get_vectype_for_scalar_type (type_out
);
3433 pattern_vectype
= type_out
;
3435 if (is_gimple_assign (pattern_stmt
))
3436 code
= gimple_assign_rhs_code (pattern_stmt
);
3439 gcc_assert (is_gimple_call (pattern_stmt
));
3443 optab
= optab_for_tree_code (code
, type_in
, optab_default
);
3444 vec_mode
= TYPE_MODE (type_in
);
3446 || (icode
= optab_handler (optab
, vec_mode
)) == CODE_FOR_nothing
3447 || (insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (type_out
)))
3451 /* Found a vectorizable pattern. */
3452 if (dump_enabled_p ())
3454 dump_printf_loc (MSG_NOTE
, vect_location
,
3455 "pattern recognized: ");
3456 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3457 dump_printf (MSG_NOTE
, "\n");
3460 /* Mark the stmts that are involved in the pattern. */
3461 vect_mark_pattern_stmts (stmt
, pattern_stmt
, pattern_vectype
);
3463 /* Patterns cannot be vectorized using SLP, because they change the order of
3466 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
, next
)
3468 LOOP_VINFO_REDUCTIONS (loop_vinfo
).ordered_remove (i
);
3470 /* It is possible that additional pattern stmts are created and inserted in
3471 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3472 relevant statements. */
3473 for (i
= 0; stmts_to_replace
->iterate (i
, &stmt
)
3474 && (unsigned) i
< (stmts_to_replace
->length () - 1);
3477 stmt_info
= vinfo_for_stmt (stmt
);
3478 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
3479 if (dump_enabled_p ())
3481 dump_printf_loc (MSG_NOTE
, vect_location
,
3482 "additional pattern stmt: ");
3483 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3484 dump_printf (MSG_NOTE
, "\n");
3487 vect_mark_pattern_stmts (stmt
, pattern_stmt
, NULL_TREE
);
3492 /* Function vect_pattern_recog
3495 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3498 Output - for each computation idiom that is detected we create a new stmt
3499 that provides the same functionality and that can be vectorized. We
3500 also record some information in the struct_stmt_info of the relevant
3501 stmts, as explained below:
3503 At the entry to this function we have the following stmts, with the
3504 following initial value in the STMT_VINFO fields:
3506 stmt in_pattern_p related_stmt vec_stmt
3507 S1: a_i = .... - - -
3508 S2: a_2 = ..use(a_i).. - - -
3509 S3: a_1 = ..use(a_2).. - - -
3510 S4: a_0 = ..use(a_1).. - - -
3511 S5: ... = ..use(a_0).. - - -
3513 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3514 represented by a single stmt. We then:
3515 - create a new stmt S6 equivalent to the pattern (the stmt is not
3516 inserted into the code)
3517 - fill in the STMT_VINFO fields as follows:
3519 in_pattern_p related_stmt vec_stmt
3520 S1: a_i = .... - - -
3521 S2: a_2 = ..use(a_i).. - - -
3522 S3: a_1 = ..use(a_2).. - - -
3523 S4: a_0 = ..use(a_1).. true S6 -
3524 '---> S6: a_new = .... - S4 -
3525 S5: ... = ..use(a_0).. - - -
3527 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3528 to each other through the RELATED_STMT field).
3530 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3531 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3532 remain irrelevant unless used by stmts other than S4.
3534 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3535 (because they are marked as irrelevant). It will vectorize S6, and record
3536 a pointer to the new vector stmt VS6 from S6 (as usual).
3537 S4 will be skipped, and S5 will be vectorized as usual:
3539 in_pattern_p related_stmt vec_stmt
3540 S1: a_i = .... - - -
3541 S2: a_2 = ..use(a_i).. - - -
3542 S3: a_1 = ..use(a_2).. - - -
3543 > VS6: va_new = .... - - -
3544 S4: a_0 = ..use(a_1).. true S6 VS6
3545 '---> S6: a_new = .... - S4 VS6
3546 > VS5: ... = ..vuse(va_new).. - - -
3547 S5: ... = ..use(a_0).. - - -
3549 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3550 elsewhere), and we'll end up with:
3553 VS5: ... = ..vuse(va_new)..
3555 In case of more than one pattern statements, e.g., widen-mult with
3559 S2 a_T = (TYPE) a_t;
3560 '--> S3: a_it = (interm_type) a_t;
3561 S4 prod_T = a_T * CONST;
3562 '--> S5: prod_T' = a_it w* CONST;
3564 there may be other users of a_T outside the pattern. In that case S2 will
3565 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3566 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3567 be recorded in S3. */
3570 vect_pattern_recog (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
3575 gimple_stmt_iterator si
;
3577 vect_recog_func_ptr vect_recog_func
;
3578 auto_vec
<gimple
, 1> stmts_to_replace
;
3581 if (dump_enabled_p ())
3582 dump_printf_loc (MSG_NOTE
, vect_location
,
3583 "=== vect_pattern_recog ===\n");
3587 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3588 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
3589 nbbs
= loop
->num_nodes
;
3593 bbs
= &BB_VINFO_BB (bb_vinfo
);
3597 /* Scan through the loop stmts, applying the pattern recognition
3598 functions starting at each stmt visited: */
3599 for (i
= 0; i
< nbbs
; i
++)
3601 basic_block bb
= bbs
[i
];
3602 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
3604 if (bb_vinfo
&& (stmt
= gsi_stmt (si
))
3605 && vinfo_for_stmt (stmt
)
3606 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
3609 /* Scan over all generic vect_recog_xxx_pattern functions. */
3610 for (j
= 0; j
< NUM_PATTERNS
; j
++)
3612 vect_recog_func
= vect_vect_recog_func_ptrs
[j
];
3613 vect_pattern_recog_1 (vect_recog_func
, si
,