1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
26 #include "stor-layout.h"
28 #include "basic-block.h"
29 #include "gimple-pretty-print.h"
30 #include "tree-ssa-alias.h"
31 #include "internal-fn.h"
33 #include "gimple-expr.h"
37 #include "gimple-iterator.h"
38 #include "gimple-ssa.h"
39 #include "tree-phinodes.h"
40 #include "ssa-iterators.h"
41 #include "stringpool.h"
42 #include "tree-ssanames.h"
47 #include "tree-data-ref.h"
48 #include "tree-vectorizer.h"
49 #include "recog.h" /* FIXME: for insn_data */
50 #include "diagnostic-core.h"
54 /* Pattern recognition functions */
55 static gimple
vect_recog_widen_sum_pattern (vec
<gimple
> *, tree
*,
57 static gimple
vect_recog_widen_mult_pattern (vec
<gimple
> *, tree
*,
59 static gimple
vect_recog_dot_prod_pattern (vec
<gimple
> *, tree
*,
61 static gimple
vect_recog_sad_pattern (vec
<gimple
> *, tree
*,
63 static gimple
vect_recog_pow_pattern (vec
<gimple
> *, tree
*, tree
*);
64 static gimple
vect_recog_over_widening_pattern (vec
<gimple
> *, tree
*,
66 static gimple
vect_recog_widen_shift_pattern (vec
<gimple
> *,
68 static gimple
vect_recog_rotate_pattern (vec
<gimple
> *, tree
*, tree
*);
69 static gimple
vect_recog_vector_vector_shift_pattern (vec
<gimple
> *,
71 static gimple
vect_recog_divmod_pattern (vec
<gimple
> *,
73 static gimple
vect_recog_mixed_size_cond_pattern (vec
<gimple
> *,
75 static gimple
vect_recog_bool_pattern (vec
<gimple
> *, tree
*, tree
*);
76 static vect_recog_func_ptr vect_vect_recog_func_ptrs
[NUM_PATTERNS
] = {
77 vect_recog_widen_mult_pattern
,
78 vect_recog_widen_sum_pattern
,
79 vect_recog_dot_prod_pattern
,
80 vect_recog_sad_pattern
,
81 vect_recog_pow_pattern
,
82 vect_recog_widen_shift_pattern
,
83 vect_recog_over_widening_pattern
,
84 vect_recog_rotate_pattern
,
85 vect_recog_vector_vector_shift_pattern
,
86 vect_recog_divmod_pattern
,
87 vect_recog_mixed_size_cond_pattern
,
88 vect_recog_bool_pattern
};
91 append_pattern_def_seq (stmt_vec_info stmt_info
, gimple stmt
)
93 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
98 new_pattern_def_seq (stmt_vec_info stmt_info
, gimple stmt
)
100 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
101 append_pattern_def_seq (stmt_info
, stmt
);
104 /* Check whether STMT2 is in the same loop or basic block as STMT1.
105 Which of the two applies depends on whether we're currently doing
106 loop-based or basic-block-based vectorization, as determined by
107 the vinfo_for_stmt for STMT1 (which must be defined).
109 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
110 to be defined as well. */
113 vect_same_loop_or_bb_p (gimple stmt1
, gimple stmt2
)
115 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt1
);
116 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
117 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
119 if (!gimple_bb (stmt2
))
124 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
125 if (!flow_bb_inside_loop_p (loop
, gimple_bb (stmt2
)))
130 if (gimple_bb (stmt2
) != BB_VINFO_BB (bb_vinfo
)
131 || gimple_code (stmt2
) == GIMPLE_PHI
)
135 gcc_assert (vinfo_for_stmt (stmt2
));
139 /* If the LHS of DEF_STMT has a single use, and that statement is
140 in the same loop or basic block, return it. */
143 vect_single_imm_use (gimple def_stmt
)
145 tree lhs
= gimple_assign_lhs (def_stmt
);
149 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
152 if (!vect_same_loop_or_bb_p (def_stmt
, use_stmt
))
158 /* Check whether NAME, an ssa-name used in USE_STMT,
159 is a result of a type promotion, such that:
160 DEF_STMT: NAME = NOP (name0)
161 If CHECK_SIGN is TRUE, check that either both types are signed or both are
165 type_conversion_p (tree name
, gimple use_stmt
, bool check_sign
,
166 tree
*orig_type
, gimple
*def_stmt
, bool *promotion
)
170 loop_vec_info loop_vinfo
;
171 stmt_vec_info stmt_vinfo
;
172 tree type
= TREE_TYPE (name
);
174 enum vect_def_type dt
;
176 bb_vec_info bb_vinfo
;
178 stmt_vinfo
= vinfo_for_stmt (use_stmt
);
179 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
180 bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
181 if (!vect_is_simple_use (name
, use_stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
185 if (dt
!= vect_internal_def
186 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
192 if (!is_gimple_assign (*def_stmt
))
195 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt
)))
198 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
200 *orig_type
= TREE_TYPE (oprnd0
);
201 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*orig_type
)
202 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*orig_type
)) && check_sign
))
205 if (TYPE_PRECISION (type
) >= (TYPE_PRECISION (*orig_type
) * 2))
210 if (!vect_is_simple_use (oprnd0
, *def_stmt
, loop_vinfo
,
211 bb_vinfo
, &dummy_gimple
, &dummy
, &dt
))
217 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
218 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
221 vect_recog_temp_ssa_var (tree type
, gimple stmt
)
223 return make_temp_ssa_name (type
, stmt
, "patt");
226 /* Function vect_recog_dot_prod_pattern
228 Try to find the following pattern:
234 sum_0 = phi <init, sum_1>
237 S3 x_T = (TYPE1) x_t;
238 S4 y_T = (TYPE1) y_t;
240 [S6 prod = (TYPE2) prod; #optional]
241 S7 sum_1 = prod + sum_0;
243 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
244 same size of 'TYPE1' or bigger. This is a special case of a reduction
249 * STMTS: Contains a stmt from which the pattern search begins. In the
250 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
255 * TYPE_IN: The type of the input arguments to the pattern.
257 * TYPE_OUT: The type of the output of this pattern.
259 * Return value: A new stmt that will be used to replace the sequence of
260 stmts that constitute the pattern. In this case it will be:
261 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
263 Note: The dot-prod idiom is a widening reduction pattern that is
264 vectorized without preserving all the intermediate results. It
265 produces only N/2 (widened) results (by summing up pairs of
266 intermediate results) rather than all N results. Therefore, we
267 cannot allow this pattern when we want to get all the results and in
268 the correct order (as is the case when this computation is in an
269 inner-loop nested in an outer-loop that us being vectorized). */
272 vect_recog_dot_prod_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
275 gimple stmt
, last_stmt
= (*stmts
)[0];
277 tree oprnd00
, oprnd01
;
278 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
279 tree type
, half_type
;
282 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
290 loop
= LOOP_VINFO_LOOP (loop_info
);
292 if (!is_gimple_assign (last_stmt
))
295 type
= gimple_expr_type (last_stmt
);
297 /* Look for the following pattern
301 DDPROD = (TYPE2) DPROD;
302 sum_1 = DDPROD + sum_0;
304 - DX is double the size of X
305 - DY is double the size of Y
306 - DX, DY, DPROD all have the same type
307 - sum is the same size of DPROD or bigger
308 - sum has been recognized as a reduction variable.
310 This is equivalent to:
311 DPROD = X w* Y; #widen mult
312 sum_1 = DPROD w+ sum_0; #widen summation
314 DPROD = X w* Y; #widen mult
315 sum_1 = DPROD + sum_0; #summation
318 /* Starting from LAST_STMT, follow the defs of its uses in search
319 of the above pattern. */
321 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
324 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
326 /* Has been detected as widening-summation? */
328 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
329 type
= gimple_expr_type (stmt
);
330 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
332 oprnd0
= gimple_assign_rhs1 (stmt
);
333 oprnd1
= gimple_assign_rhs2 (stmt
);
334 half_type
= TREE_TYPE (oprnd0
);
340 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
)
342 oprnd0
= gimple_assign_rhs1 (last_stmt
);
343 oprnd1
= gimple_assign_rhs2 (last_stmt
);
344 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
345 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
349 if (type_conversion_p (oprnd0
, stmt
, true, &half_type
, &def_stmt
,
354 oprnd0
= gimple_assign_rhs1 (stmt
);
360 /* So far so good. Since last_stmt was detected as a (summation) reduction,
361 we know that oprnd1 is the reduction variable (defined by a loop-header
362 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
363 Left to check that oprnd0 is defined by a (widen_)mult_expr */
364 if (TREE_CODE (oprnd0
) != SSA_NAME
)
367 prod_type
= half_type
;
368 stmt
= SSA_NAME_DEF_STMT (oprnd0
);
370 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
371 if (!gimple_bb (stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
374 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
375 inside the loop (in case we are analyzing an outer-loop). */
376 if (!is_gimple_assign (stmt
))
378 stmt_vinfo
= vinfo_for_stmt (stmt
);
379 gcc_assert (stmt_vinfo
);
380 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
382 if (gimple_assign_rhs_code (stmt
) != MULT_EXPR
)
384 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
386 /* Has been detected as a widening multiplication? */
388 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
389 if (gimple_assign_rhs_code (stmt
) != WIDEN_MULT_EXPR
)
391 stmt_vinfo
= vinfo_for_stmt (stmt
);
392 gcc_assert (stmt_vinfo
);
393 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_internal_def
);
394 oprnd00
= gimple_assign_rhs1 (stmt
);
395 oprnd01
= gimple_assign_rhs2 (stmt
);
396 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt
))
397 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
);
401 tree half_type0
, half_type1
;
405 oprnd0
= gimple_assign_rhs1 (stmt
);
406 oprnd1
= gimple_assign_rhs2 (stmt
);
407 if (!types_compatible_p (TREE_TYPE (oprnd0
), prod_type
)
408 || !types_compatible_p (TREE_TYPE (oprnd1
), prod_type
))
410 if (!type_conversion_p (oprnd0
, stmt
, true, &half_type0
, &def_stmt
,
414 oprnd00
= gimple_assign_rhs1 (def_stmt
);
415 if (!type_conversion_p (oprnd1
, stmt
, true, &half_type1
, &def_stmt
,
419 oprnd01
= gimple_assign_rhs1 (def_stmt
);
420 if (!types_compatible_p (half_type0
, half_type1
))
422 if (TYPE_PRECISION (prod_type
) != TYPE_PRECISION (half_type0
) * 2)
426 half_type
= TREE_TYPE (oprnd00
);
427 *type_in
= half_type
;
430 /* Pattern detected. Create a stmt to be used to replace the pattern: */
431 var
= vect_recog_temp_ssa_var (type
, NULL
);
432 pattern_stmt
= gimple_build_assign_with_ops (DOT_PROD_EXPR
, var
,
433 oprnd00
, oprnd01
, oprnd1
);
435 if (dump_enabled_p ())
437 dump_printf_loc (MSG_NOTE
, vect_location
,
438 "vect_recog_dot_prod_pattern: detected: ");
439 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
440 dump_printf (MSG_NOTE
, "\n");
443 /* We don't allow changing the order of the computation in the inner-loop
444 when doing outer-loop vectorization. */
445 gcc_assert (!nested_in_vect_loop_p (loop
, last_stmt
));
451 /* Function vect_recog_sad_pattern
453 Try to find the following Sum of Absolute Difference (SAD) pattern:
456 signed TYPE1 diff, abs_diff;
459 sum_0 = phi <init, sum_1>
462 S3 x_T = (TYPE1) x_t;
463 S4 y_T = (TYPE1) y_t;
465 S6 abs_diff = ABS_EXPR <diff>;
466 [S7 abs_diff = (TYPE2) abs_diff; #optional]
467 S8 sum_1 = abs_diff + sum_0;
469 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
470 same size of 'TYPE1' or bigger. This is a special case of a reduction
475 * STMTS: Contains a stmt from which the pattern search begins. In the
476 example, when this function is called with S8, the pattern
477 {S3,S4,S5,S6,S7,S8} will be detected.
481 * TYPE_IN: The type of the input arguments to the pattern.
483 * TYPE_OUT: The type of the output of this pattern.
485 * Return value: A new stmt that will be used to replace the sequence of
486 stmts that constitute the pattern. In this case it will be:
487 SAD_EXPR <x_t, y_t, sum_0>
491 vect_recog_sad_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
494 gimple last_stmt
= (*stmts
)[0];
495 tree sad_oprnd0
, sad_oprnd1
;
496 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
498 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
505 loop
= LOOP_VINFO_LOOP (loop_info
);
507 if (!is_gimple_assign (last_stmt
))
510 tree sum_type
= gimple_expr_type (last_stmt
);
512 /* Look for the following pattern
516 DAD = ABS_EXPR <DDIFF>;
517 DDPROD = (TYPE2) DPROD;
520 - DX is at least double the size of X
521 - DY is at least double the size of Y
522 - DX, DY, DDIFF, DAD all have the same type
523 - sum is the same size of DAD or bigger
524 - sum has been recognized as a reduction variable.
526 This is equivalent to:
527 DDIFF = X w- Y; #widen sub
528 DAD = ABS_EXPR <DDIFF>;
529 sum_1 = DAD w+ sum_0; #widen summation
531 DDIFF = X w- Y; #widen sub
532 DAD = ABS_EXPR <DDIFF>;
533 sum_1 = DAD + sum_0; #summation
536 /* Starting from LAST_STMT, follow the defs of its uses in search
537 of the above pattern. */
539 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
542 tree plus_oprnd0
, plus_oprnd1
;
544 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
546 /* Has been detected as widening-summation? */
548 gimple stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
549 sum_type
= gimple_expr_type (stmt
);
550 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
552 plus_oprnd0
= gimple_assign_rhs1 (stmt
);
553 plus_oprnd1
= gimple_assign_rhs2 (stmt
);
554 half_type
= TREE_TYPE (plus_oprnd0
);
560 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
)
562 plus_oprnd0
= gimple_assign_rhs1 (last_stmt
);
563 plus_oprnd1
= gimple_assign_rhs2 (last_stmt
);
564 if (!types_compatible_p (TREE_TYPE (plus_oprnd0
), sum_type
)
565 || !types_compatible_p (TREE_TYPE (plus_oprnd1
), sum_type
))
568 /* The type conversion could be promotion, demotion,
569 or just signed -> unsigned. */
570 if (type_conversion_p (plus_oprnd0
, last_stmt
, false,
571 &half_type
, &def_stmt
, &promotion
))
572 plus_oprnd0
= gimple_assign_rhs1 (def_stmt
);
574 half_type
= sum_type
;
577 /* So far so good. Since last_stmt was detected as a (summation) reduction,
578 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
579 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
580 Then check that plus_oprnd0 is defined by an abs_expr. */
582 if (TREE_CODE (plus_oprnd0
) != SSA_NAME
)
585 tree abs_type
= half_type
;
586 gimple abs_stmt
= SSA_NAME_DEF_STMT (plus_oprnd0
);
588 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
589 if (!gimple_bb (abs_stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (abs_stmt
)))
592 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
593 inside the loop (in case we are analyzing an outer-loop). */
594 if (!is_gimple_assign (abs_stmt
))
597 stmt_vec_info abs_stmt_vinfo
= vinfo_for_stmt (abs_stmt
);
598 gcc_assert (abs_stmt_vinfo
);
599 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo
) != vect_internal_def
)
601 if (gimple_assign_rhs_code (abs_stmt
) != ABS_EXPR
)
604 tree abs_oprnd
= gimple_assign_rhs1 (abs_stmt
);
605 if (!types_compatible_p (TREE_TYPE (abs_oprnd
), abs_type
))
607 if (TYPE_UNSIGNED (abs_type
))
610 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
612 if (TREE_CODE (abs_oprnd
) != SSA_NAME
)
615 gimple diff_stmt
= SSA_NAME_DEF_STMT (abs_oprnd
);
617 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
618 if (!gimple_bb (diff_stmt
)
619 || !flow_bb_inside_loop_p (loop
, gimple_bb (diff_stmt
)))
622 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
623 inside the loop (in case we are analyzing an outer-loop). */
624 if (!is_gimple_assign (diff_stmt
))
627 stmt_vec_info diff_stmt_vinfo
= vinfo_for_stmt (diff_stmt
);
628 gcc_assert (diff_stmt_vinfo
);
629 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo
) != vect_internal_def
)
631 if (gimple_assign_rhs_code (diff_stmt
) != MINUS_EXPR
)
634 tree half_type0
, half_type1
;
637 tree minus_oprnd0
= gimple_assign_rhs1 (diff_stmt
);
638 tree minus_oprnd1
= gimple_assign_rhs2 (diff_stmt
);
640 if (!types_compatible_p (TREE_TYPE (minus_oprnd0
), abs_type
)
641 || !types_compatible_p (TREE_TYPE (minus_oprnd1
), abs_type
))
643 if (!type_conversion_p (minus_oprnd0
, diff_stmt
, false,
644 &half_type0
, &def_stmt
, &promotion
)
647 sad_oprnd0
= gimple_assign_rhs1 (def_stmt
);
649 if (!type_conversion_p (minus_oprnd1
, diff_stmt
, false,
650 &half_type1
, &def_stmt
, &promotion
)
653 sad_oprnd1
= gimple_assign_rhs1 (def_stmt
);
655 if (!types_compatible_p (half_type0
, half_type1
))
657 if (TYPE_PRECISION (abs_type
) < TYPE_PRECISION (half_type0
) * 2
658 || TYPE_PRECISION (sum_type
) < TYPE_PRECISION (half_type0
) * 2)
661 *type_in
= TREE_TYPE (sad_oprnd0
);
662 *type_out
= sum_type
;
664 /* Pattern detected. Create a stmt to be used to replace the pattern: */
665 tree var
= vect_recog_temp_ssa_var (sum_type
, NULL
);
666 gimple pattern_stmt
= gimple_build_assign_with_ops
667 (SAD_EXPR
, var
, sad_oprnd0
, sad_oprnd1
, plus_oprnd1
);
669 if (dump_enabled_p ())
671 dump_printf_loc (MSG_NOTE
, vect_location
,
672 "vect_recog_sad_pattern: detected: ");
673 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
674 dump_printf (MSG_NOTE
, "\n");
677 /* We don't allow changing the order of the computation in the inner-loop
678 when doing outer-loop vectorization. */
679 gcc_assert (!nested_in_vect_loop_p (loop
, last_stmt
));
685 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
688 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
689 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
691 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
692 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
693 that satisfies the above restrictions, we can perform a widening opeartion
694 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
695 with a_it = (interm_type) a_t; */
698 vect_handle_widen_op_by_const (gimple stmt
, enum tree_code code
,
699 tree const_oprnd
, tree
*oprnd
,
700 vec
<gimple
> *stmts
, tree type
,
701 tree
*half_type
, gimple def_stmt
)
703 tree new_type
, new_oprnd
;
706 if (code
!= MULT_EXPR
&& code
!= LSHIFT_EXPR
)
709 if (((code
== MULT_EXPR
&& int_fits_type_p (const_oprnd
, *half_type
))
710 || (code
== LSHIFT_EXPR
711 && compare_tree_int (const_oprnd
, TYPE_PRECISION (*half_type
))
713 && TYPE_PRECISION (type
) == (TYPE_PRECISION (*half_type
) * 2))
715 /* CONST_OPRND is a constant of HALF_TYPE. */
716 *oprnd
= gimple_assign_rhs1 (def_stmt
);
720 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (*half_type
) * 4))
723 if (!vect_same_loop_or_bb_p (stmt
, def_stmt
))
726 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
727 a type 2 times bigger than HALF_TYPE. */
728 new_type
= build_nonstandard_integer_type (TYPE_PRECISION (type
) / 2,
729 TYPE_UNSIGNED (type
));
730 if ((code
== MULT_EXPR
&& !int_fits_type_p (const_oprnd
, new_type
))
731 || (code
== LSHIFT_EXPR
732 && compare_tree_int (const_oprnd
, TYPE_PRECISION (new_type
)) == 1))
735 /* Use NEW_TYPE for widening operation. */
736 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
738 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
739 /* Check if the already created pattern stmt is what we need. */
740 if (!is_gimple_assign (new_stmt
)
741 || gimple_assign_rhs_code (new_stmt
) != NOP_EXPR
742 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != new_type
)
745 stmts
->safe_push (def_stmt
);
746 *oprnd
= gimple_assign_lhs (new_stmt
);
750 /* Create a_T = (NEW_TYPE) a_t; */
751 *oprnd
= gimple_assign_rhs1 (def_stmt
);
752 new_oprnd
= make_ssa_name (new_type
, NULL
);
753 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
, *oprnd
,
755 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
756 stmts
->safe_push (def_stmt
);
760 *half_type
= new_type
;
765 /* Function vect_recog_widen_mult_pattern
767 Try to find the following pattern:
771 TYPE a_T, b_T, prod_T;
777 S5 prod_T = a_T * b_T;
779 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
781 Also detect unsigned cases:
785 unsigned TYPE u_prod_T;
786 TYPE a_T, b_T, prod_T;
792 S5 prod_T = a_T * b_T;
793 S6 u_prod_T = (unsigned TYPE) prod_T;
795 and multiplication by constants:
802 S5 prod_T = a_T * CONST;
804 A special case of multiplication by constants is when 'TYPE' is 4 times
805 bigger than 'type', but CONST fits an intermediate type 2 times smaller
806 than 'TYPE'. In that case we create an additional pattern stmt for S3
807 to create a variable of the intermediate type, and perform widen-mult
808 on the intermediate type as well:
812 TYPE a_T, prod_T, prod_T';
816 '--> a_it = (interm_type) a_t;
817 S5 prod_T = a_T * CONST;
818 '--> prod_T' = a_it w* CONST;
822 * STMTS: Contains a stmt from which the pattern search begins. In the
823 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
824 is detected. In case of unsigned widen-mult, the original stmt (S5) is
825 replaced with S6 in STMTS. In case of multiplication by a constant
826 of an intermediate type (the last case above), STMTS also contains S3
827 (inserted before S5).
831 * TYPE_IN: The type of the input arguments to the pattern.
833 * TYPE_OUT: The type of the output of this pattern.
835 * Return value: A new stmt that will be used to replace the sequence of
836 stmts that constitute the pattern. In this case it will be:
837 WIDEN_MULT <a_t, b_t>
838 If the result of WIDEN_MULT needs to be converted to a larger type, the
839 returned stmt will be this type conversion stmt.
843 vect_recog_widen_mult_pattern (vec
<gimple
> *stmts
,
844 tree
*type_in
, tree
*type_out
)
846 gimple last_stmt
= stmts
->pop ();
847 gimple def_stmt0
, def_stmt1
;
849 tree type
, half_type0
, half_type1
;
850 gimple new_stmt
= NULL
, pattern_stmt
= NULL
;
851 tree vectype
, vecitype
;
853 enum tree_code dummy_code
;
859 if (!is_gimple_assign (last_stmt
))
862 type
= gimple_expr_type (last_stmt
);
864 /* Starting from LAST_STMT, follow the defs of its uses in search
865 of the above pattern. */
867 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
870 oprnd0
= gimple_assign_rhs1 (last_stmt
);
871 oprnd1
= gimple_assign_rhs2 (last_stmt
);
872 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
873 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
876 /* Check argument 0. */
877 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
881 /* Check argument 1. */
882 op1_ok
= type_conversion_p (oprnd1
, last_stmt
, false, &half_type1
,
883 &def_stmt1
, &promotion
);
885 if (op1_ok
&& promotion
)
887 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
888 oprnd1
= gimple_assign_rhs1 (def_stmt1
);
892 if (TREE_CODE (oprnd1
) == INTEGER_CST
893 && TREE_CODE (half_type0
) == INTEGER_TYPE
894 && vect_handle_widen_op_by_const (last_stmt
, MULT_EXPR
, oprnd1
,
895 &oprnd0
, stmts
, type
,
896 &half_type0
, def_stmt0
))
898 half_type1
= half_type0
;
899 oprnd1
= fold_convert (half_type1
, oprnd1
);
905 /* If the two arguments have different sizes, convert the one with
906 the smaller type into the larger type. */
907 if (TYPE_PRECISION (half_type0
) != TYPE_PRECISION (half_type1
))
910 gimple def_stmt
= NULL
;
912 if (TYPE_PRECISION (half_type0
) < TYPE_PRECISION (half_type1
))
914 def_stmt
= def_stmt0
;
915 half_type0
= half_type1
;
920 def_stmt
= def_stmt1
;
921 half_type1
= half_type0
;
925 tree old_oprnd
= gimple_assign_rhs1 (def_stmt
);
926 tree new_oprnd
= make_ssa_name (half_type0
, NULL
);
927 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
928 old_oprnd
, NULL_TREE
);
932 /* Handle unsigned case. Look for
933 S6 u_prod_T = (unsigned TYPE) prod_T;
934 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
935 if (TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type0
))
941 if (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (half_type1
))
944 use_stmt
= vect_single_imm_use (last_stmt
);
945 if (!use_stmt
|| !is_gimple_assign (use_stmt
)
946 || gimple_assign_rhs_code (use_stmt
) != NOP_EXPR
)
949 use_lhs
= gimple_assign_lhs (use_stmt
);
950 use_type
= TREE_TYPE (use_lhs
);
951 if (!INTEGRAL_TYPE_P (use_type
)
952 || (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (use_type
))
953 || (TYPE_PRECISION (type
) != TYPE_PRECISION (use_type
)))
957 last_stmt
= use_stmt
;
960 if (!types_compatible_p (half_type0
, half_type1
))
963 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
964 to get an intermediate result of type ITYPE. In this case we need
965 to build a statement to convert this intermediate result to type TYPE. */
967 if (TYPE_PRECISION (type
) > TYPE_PRECISION (half_type0
) * 2)
968 itype
= build_nonstandard_integer_type
969 (GET_MODE_BITSIZE (TYPE_MODE (half_type0
)) * 2,
970 TYPE_UNSIGNED (type
));
972 /* Pattern detected. */
973 if (dump_enabled_p ())
974 dump_printf_loc (MSG_NOTE
, vect_location
,
975 "vect_recog_widen_mult_pattern: detected:\n");
977 /* Check target support */
978 vectype
= get_vectype_for_scalar_type (half_type0
);
979 vecitype
= get_vectype_for_scalar_type (itype
);
982 || !supportable_widening_operation (WIDEN_MULT_EXPR
, last_stmt
,
984 &dummy_code
, &dummy_code
,
985 &dummy_int
, &dummy_vec
))
989 *type_out
= get_vectype_for_scalar_type (type
);
991 /* Pattern supported. Create a stmt to be used to replace the pattern: */
992 var
= vect_recog_temp_ssa_var (itype
, NULL
);
993 pattern_stmt
= gimple_build_assign_with_ops (WIDEN_MULT_EXPR
, var
, oprnd0
,
996 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
997 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
998 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
999 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
1001 /* If the original two operands have different sizes, we may need to convert
1002 the smaller one into the larget type. If this is the case, at this point
1003 the new stmt is already built. */
1006 append_pattern_def_seq (stmt_vinfo
, new_stmt
);
1007 stmt_vec_info new_stmt_info
1008 = new_stmt_vec_info (new_stmt
, loop_vinfo
, bb_vinfo
);
1009 set_vinfo_for_stmt (new_stmt
, new_stmt_info
);
1010 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
1013 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
1014 the result of the widen-mult operation into type TYPE. */
1017 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
1018 stmt_vec_info pattern_stmt_info
1019 = new_stmt_vec_info (pattern_stmt
, loop_vinfo
, bb_vinfo
);
1020 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
1021 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vecitype
;
1023 = gimple_build_assign_with_ops (NOP_EXPR
,
1024 vect_recog_temp_ssa_var (type
, NULL
),
1025 gimple_assign_lhs (pattern_stmt
),
1029 if (dump_enabled_p ())
1030 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1032 stmts
->safe_push (last_stmt
);
1033 return pattern_stmt
;
1037 /* Function vect_recog_pow_pattern
1039 Try to find the following pattern:
1043 with POW being one of pow, powf, powi, powif and N being
1048 * LAST_STMT: A stmt from which the pattern search begins.
1052 * TYPE_IN: The type of the input arguments to the pattern.
1054 * TYPE_OUT: The type of the output of this pattern.
1056 * Return value: A new stmt that will be used to replace the sequence of
1057 stmts that constitute the pattern. In this case it will be:
1064 vect_recog_pow_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
1067 gimple last_stmt
= (*stmts
)[0];
1068 tree fn
, base
, exp
= NULL
;
1072 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
1075 fn
= gimple_call_fndecl (last_stmt
);
1076 if (fn
== NULL_TREE
|| DECL_BUILT_IN_CLASS (fn
) != BUILT_IN_NORMAL
)
1079 switch (DECL_FUNCTION_CODE (fn
))
1081 case BUILT_IN_POWIF
:
1085 base
= gimple_call_arg (last_stmt
, 0);
1086 exp
= gimple_call_arg (last_stmt
, 1);
1087 if (TREE_CODE (exp
) != REAL_CST
1088 && TREE_CODE (exp
) != INTEGER_CST
)
1096 /* We now have a pow or powi builtin function call with a constant
1099 *type_out
= NULL_TREE
;
1101 /* Catch squaring. */
1102 if ((tree_fits_shwi_p (exp
)
1103 && tree_to_shwi (exp
) == 2)
1104 || (TREE_CODE (exp
) == REAL_CST
1105 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconst2
)))
1107 *type_in
= TREE_TYPE (base
);
1109 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1110 stmt
= gimple_build_assign_with_ops (MULT_EXPR
, var
, base
, base
);
1114 /* Catch square root. */
1115 if (TREE_CODE (exp
) == REAL_CST
1116 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconsthalf
))
1118 tree newfn
= mathfn_built_in (TREE_TYPE (base
), BUILT_IN_SQRT
);
1119 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (base
));
1122 gimple stmt
= gimple_build_call (newfn
, 1, base
);
1123 if (vectorizable_function (stmt
, *type_in
, *type_in
)
1126 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
1127 gimple_call_set_lhs (stmt
, var
);
1137 /* Function vect_recog_widen_sum_pattern
1139 Try to find the following pattern:
1142 TYPE x_T, sum = init;
1144 sum_0 = phi <init, sum_1>
1146 S2 x_T = (TYPE) x_t;
1147 S3 sum_1 = x_T + sum_0;
1149 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1150 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1151 a special case of a reduction computation.
1155 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1156 when this function is called with S3, the pattern {S2,S3} will be detected.
1160 * TYPE_IN: The type of the input arguments to the pattern.
1162 * TYPE_OUT: The type of the output of this pattern.
1164 * Return value: A new stmt that will be used to replace the sequence of
1165 stmts that constitute the pattern. In this case it will be:
1166 WIDEN_SUM <x_t, sum_0>
1168 Note: The widening-sum idiom is a widening reduction pattern that is
1169 vectorized without preserving all the intermediate results. It
1170 produces only N/2 (widened) results (by summing up pairs of
1171 intermediate results) rather than all N results. Therefore, we
1172 cannot allow this pattern when we want to get all the results and in
1173 the correct order (as is the case when this computation is in an
1174 inner-loop nested in an outer-loop that us being vectorized). */
1177 vect_recog_widen_sum_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
1180 gimple stmt
, last_stmt
= (*stmts
)[0];
1181 tree oprnd0
, oprnd1
;
1182 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1183 tree type
, half_type
;
1184 gimple pattern_stmt
;
1185 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1193 loop
= LOOP_VINFO_LOOP (loop_info
);
1195 if (!is_gimple_assign (last_stmt
))
1198 type
= gimple_expr_type (last_stmt
);
1200 /* Look for the following pattern
1203 In which DX is at least double the size of X, and sum_1 has been
1204 recognized as a reduction variable.
1207 /* Starting from LAST_STMT, follow the defs of its uses in search
1208 of the above pattern. */
1210 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
1213 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
)
1216 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1217 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1218 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
1219 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
1222 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1223 we know that oprnd1 is the reduction variable (defined by a loop-header
1224 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1225 Left to check that oprnd0 is defined by a cast from type 'type' to type
1228 if (!type_conversion_p (oprnd0
, last_stmt
, true, &half_type
, &stmt
,
1233 oprnd0
= gimple_assign_rhs1 (stmt
);
1234 *type_in
= half_type
;
1237 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1238 var
= vect_recog_temp_ssa_var (type
, NULL
);
1239 pattern_stmt
= gimple_build_assign_with_ops (WIDEN_SUM_EXPR
, var
,
1242 if (dump_enabled_p ())
1244 dump_printf_loc (MSG_NOTE
, vect_location
,
1245 "vect_recog_widen_sum_pattern: detected: ");
1246 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1247 dump_printf (MSG_NOTE
, "\n");
1250 /* We don't allow changing the order of the computation in the inner-loop
1251 when doing outer-loop vectorization. */
1252 gcc_assert (!nested_in_vect_loop_p (loop
, last_stmt
));
1254 return pattern_stmt
;
1258 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1261 STMT - a statement to check.
1262 DEF - we support operations with two operands, one of which is constant.
1263 The other operand can be defined by a demotion operation, or by a
1264 previous statement in a sequence of over-promoted operations. In the
1265 later case DEF is used to replace that operand. (It is defined by a
1266 pattern statement we created for the previous statement in the
1270 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1271 NULL, it's the type of DEF.
1272 STMTS - additional pattern statements. If a pattern statement (type
1273 conversion) is created in this function, its original statement is
1277 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1278 operands to use in the new pattern statement for STMT (will be created
1279 in vect_recog_over_widening_pattern ()).
1280 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1281 statements for STMT: the first one is a type promotion and the second
1282 one is the operation itself. We return the type promotion statement
1283 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1284 the second pattern statement. */
1287 vect_operation_fits_smaller_type (gimple stmt
, tree def
, tree
*new_type
,
1288 tree
*op0
, tree
*op1
, gimple
*new_def_stmt
,
1291 enum tree_code code
;
1292 tree const_oprnd
, oprnd
;
1293 tree interm_type
= NULL_TREE
, half_type
, new_oprnd
, type
;
1294 gimple def_stmt
, new_stmt
;
1300 *new_def_stmt
= NULL
;
1302 if (!is_gimple_assign (stmt
))
1305 code
= gimple_assign_rhs_code (stmt
);
1306 if (code
!= LSHIFT_EXPR
&& code
!= RSHIFT_EXPR
1307 && code
!= BIT_IOR_EXPR
&& code
!= BIT_XOR_EXPR
&& code
!= BIT_AND_EXPR
)
1310 oprnd
= gimple_assign_rhs1 (stmt
);
1311 const_oprnd
= gimple_assign_rhs2 (stmt
);
1312 type
= gimple_expr_type (stmt
);
1314 if (TREE_CODE (oprnd
) != SSA_NAME
1315 || TREE_CODE (const_oprnd
) != INTEGER_CST
)
1318 /* If oprnd has other uses besides that in stmt we cannot mark it
1319 as being part of a pattern only. */
1320 if (!has_single_use (oprnd
))
1323 /* If we are in the middle of a sequence, we use DEF from a previous
1324 statement. Otherwise, OPRND has to be a result of type promotion. */
1327 half_type
= *new_type
;
1333 if (!type_conversion_p (oprnd
, stmt
, false, &half_type
, &def_stmt
,
1336 || !vect_same_loop_or_bb_p (stmt
, def_stmt
))
1340 /* Can we perform the operation on a smaller type? */
1346 if (!int_fits_type_p (const_oprnd
, half_type
))
1348 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1349 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1352 interm_type
= build_nonstandard_integer_type (
1353 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1354 if (!int_fits_type_p (const_oprnd
, interm_type
))
1361 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1362 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1365 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1366 (e.g., if the original value was char, the shift amount is at most 8
1367 if we want to use short). */
1368 if (compare_tree_int (const_oprnd
, TYPE_PRECISION (half_type
)) == 1)
1371 interm_type
= build_nonstandard_integer_type (
1372 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1374 if (!vect_supportable_shift (code
, interm_type
))
1380 if (vect_supportable_shift (code
, half_type
))
1383 /* Try intermediate type - HALF_TYPE is not supported. */
1384 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1387 interm_type
= build_nonstandard_integer_type (
1388 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1390 if (!vect_supportable_shift (code
, interm_type
))
1399 /* There are four possible cases:
1400 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1401 the first statement in the sequence)
1402 a. The original, HALF_TYPE, is not enough - we replace the promotion
1403 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1404 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1406 2. OPRND is defined by a pattern statement we created.
1407 a. Its type is not sufficient for the operation, we create a new stmt:
1408 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1409 this statement in NEW_DEF_STMT, and it is later put in
1410 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1411 b. OPRND is good to use in the new statement. */
1416 /* Replace the original type conversion HALF_TYPE->TYPE with
1417 HALF_TYPE->INTERM_TYPE. */
1418 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
1420 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
1421 /* Check if the already created pattern stmt is what we need. */
1422 if (!is_gimple_assign (new_stmt
)
1423 || gimple_assign_rhs_code (new_stmt
) != NOP_EXPR
1424 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != interm_type
)
1427 stmts
->safe_push (def_stmt
);
1428 oprnd
= gimple_assign_lhs (new_stmt
);
1432 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1433 oprnd
= gimple_assign_rhs1 (def_stmt
);
1434 new_oprnd
= make_ssa_name (interm_type
, NULL
);
1435 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
1437 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
1438 stmts
->safe_push (def_stmt
);
1444 /* Retrieve the operand before the type promotion. */
1445 oprnd
= gimple_assign_rhs1 (def_stmt
);
1452 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1453 new_oprnd
= make_ssa_name (interm_type
, NULL
);
1454 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
1457 *new_def_stmt
= new_stmt
;
1460 /* Otherwise, OPRND is already set. */
1464 *new_type
= interm_type
;
1466 *new_type
= half_type
;
1469 *op1
= fold_convert (*new_type
, const_oprnd
);
1475 /* Try to find a statement or a sequence of statements that can be performed
1479 TYPE x_T, res0_T, res1_T;
1482 S2 x_T = (TYPE) x_t;
1483 S3 res0_T = op (x_T, C0);
1484 S4 res1_T = op (res0_T, C1);
1485 S5 ... = () res1_T; - type demotion
1487 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1489 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1490 be 'type' or some intermediate type. For now, we expect S5 to be a type
1491 demotion operation. We also check that S3 and S4 have only one use. */
1494 vect_recog_over_widening_pattern (vec
<gimple
> *stmts
,
1495 tree
*type_in
, tree
*type_out
)
1497 gimple stmt
= stmts
->pop ();
1498 gimple pattern_stmt
= NULL
, new_def_stmt
, prev_stmt
= NULL
, use_stmt
= NULL
;
1499 tree op0
, op1
, vectype
= NULL_TREE
, use_lhs
, use_type
;
1500 tree var
= NULL_TREE
, new_type
= NULL_TREE
, new_oprnd
;
1507 if (!vinfo_for_stmt (stmt
)
1508 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt
)))
1511 new_def_stmt
= NULL
;
1512 if (!vect_operation_fits_smaller_type (stmt
, var
, &new_type
,
1513 &op0
, &op1
, &new_def_stmt
,
1522 /* STMT can be performed on a smaller type. Check its uses. */
1523 use_stmt
= vect_single_imm_use (stmt
);
1524 if (!use_stmt
|| !is_gimple_assign (use_stmt
))
1527 /* Create pattern statement for STMT. */
1528 vectype
= get_vectype_for_scalar_type (new_type
);
1532 /* We want to collect all the statements for which we create pattern
1533 statetments, except for the case when the last statement in the
1534 sequence doesn't have a corresponding pattern statement. In such
1535 case we associate the last pattern statement with the last statement
1536 in the sequence. Therefore, we only add the original statement to
1537 the list if we know that it is not the last. */
1539 stmts
->safe_push (prev_stmt
);
1541 var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1543 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt
), var
,
1545 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
1546 new_pattern_def_seq (vinfo_for_stmt (stmt
), new_def_stmt
);
1548 if (dump_enabled_p ())
1550 dump_printf_loc (MSG_NOTE
, vect_location
,
1551 "created pattern stmt: ");
1552 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1553 dump_printf (MSG_NOTE
, "\n");
1556 type
= gimple_expr_type (stmt
);
1563 /* We got a sequence. We expect it to end with a type demotion operation.
1564 Otherwise, we quit (for now). There are three possible cases: the
1565 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1566 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1567 NEW_TYPE differs (we create a new conversion statement). */
1568 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
1570 use_lhs
= gimple_assign_lhs (use_stmt
);
1571 use_type
= TREE_TYPE (use_lhs
);
1572 /* Support only type demotion or signedess change. */
1573 if (!INTEGRAL_TYPE_P (use_type
)
1574 || TYPE_PRECISION (type
) <= TYPE_PRECISION (use_type
))
1577 /* Check that NEW_TYPE is not bigger than the conversion result. */
1578 if (TYPE_PRECISION (new_type
) > TYPE_PRECISION (use_type
))
1581 if (TYPE_UNSIGNED (new_type
) != TYPE_UNSIGNED (use_type
)
1582 || TYPE_PRECISION (new_type
) != TYPE_PRECISION (use_type
))
1584 /* Create NEW_TYPE->USE_TYPE conversion. */
1585 new_oprnd
= make_ssa_name (use_type
, NULL
);
1586 pattern_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
1588 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt
)) = pattern_stmt
;
1590 *type_in
= get_vectype_for_scalar_type (new_type
);
1591 *type_out
= get_vectype_for_scalar_type (use_type
);
1593 /* We created a pattern statement for the last statement in the
1594 sequence, so we don't need to associate it with the pattern
1595 statement created for PREV_STMT. Therefore, we add PREV_STMT
1596 to the list in order to mark it later in vect_pattern_recog_1. */
1598 stmts
->safe_push (prev_stmt
);
1603 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt
))
1604 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt
));
1607 *type_out
= NULL_TREE
;
1610 stmts
->safe_push (use_stmt
);
1613 /* TODO: support general case, create a conversion to the correct type. */
1616 /* Pattern detected. */
1617 if (dump_enabled_p ())
1619 dump_printf_loc (MSG_NOTE
, vect_location
,
1620 "vect_recog_over_widening_pattern: detected: ");
1621 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1622 dump_printf (MSG_NOTE
, "\n");
1625 return pattern_stmt
;
1628 /* Detect widening shift pattern:
1634 S2 a_T = (TYPE) a_t;
1635 S3 res_T = a_T << CONST;
1637 where type 'TYPE' is at least double the size of type 'type'.
1639 Also detect cases where the shift result is immediately converted
1640 to another type 'result_type' that is no larger in size than 'TYPE'.
1641 In those cases we perform a widen-shift that directly results in
1642 'result_type', to avoid a possible over-widening situation:
1646 result_type res_result;
1649 S2 a_T = (TYPE) a_t;
1650 S3 res_T = a_T << CONST;
1651 S4 res_result = (result_type) res_T;
1652 '--> res_result' = a_t w<< CONST;
1654 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1655 create an additional pattern stmt for S2 to create a variable of an
1656 intermediate type, and perform widen-shift on the intermediate type:
1660 TYPE a_T, res_T, res_T';
1663 S2 a_T = (TYPE) a_t;
1664 '--> a_it = (interm_type) a_t;
1665 S3 res_T = a_T << CONST;
1666 '--> res_T' = a_it <<* CONST;
1670 * STMTS: Contains a stmt from which the pattern search begins.
1671 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1672 in STMTS. When an intermediate type is used and a pattern statement is
1673 created for S2, we also put S2 here (before S3).
1677 * TYPE_IN: The type of the input arguments to the pattern.
1679 * TYPE_OUT: The type of the output of this pattern.
1681 * Return value: A new stmt that will be used to replace the sequence of
1682 stmts that constitute the pattern. In this case it will be:
1683 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1686 vect_recog_widen_shift_pattern (vec
<gimple
> *stmts
,
1687 tree
*type_in
, tree
*type_out
)
1689 gimple last_stmt
= stmts
->pop ();
1691 tree oprnd0
, oprnd1
;
1692 tree type
, half_type0
;
1693 gimple pattern_stmt
;
1694 tree vectype
, vectype_out
= NULL_TREE
;
1696 enum tree_code dummy_code
;
1698 vec
<tree
> dummy_vec
;
1702 if (!is_gimple_assign (last_stmt
) || !vinfo_for_stmt (last_stmt
))
1705 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt
)))
1708 if (gimple_assign_rhs_code (last_stmt
) != LSHIFT_EXPR
)
1711 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1712 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1713 if (TREE_CODE (oprnd0
) != SSA_NAME
|| TREE_CODE (oprnd1
) != INTEGER_CST
)
1716 /* Check operand 0: it has to be defined by a type promotion. */
1717 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
1722 /* Check operand 1: has to be positive. We check that it fits the type
1723 in vect_handle_widen_op_by_const (). */
1724 if (tree_int_cst_compare (oprnd1
, size_zero_node
) <= 0)
1727 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
1728 type
= gimple_expr_type (last_stmt
);
1730 /* Check for subsequent conversion to another type. */
1731 use_stmt
= vect_single_imm_use (last_stmt
);
1732 if (use_stmt
&& is_gimple_assign (use_stmt
)
1733 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
))
1734 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
1736 tree use_lhs
= gimple_assign_lhs (use_stmt
);
1737 tree use_type
= TREE_TYPE (use_lhs
);
1739 if (INTEGRAL_TYPE_P (use_type
)
1740 && TYPE_PRECISION (use_type
) <= TYPE_PRECISION (type
))
1742 last_stmt
= use_stmt
;
1747 /* Check if this a widening operation. */
1748 if (!vect_handle_widen_op_by_const (last_stmt
, LSHIFT_EXPR
, oprnd1
,
1750 type
, &half_type0
, def_stmt0
))
1753 /* Pattern detected. */
1754 if (dump_enabled_p ())
1755 dump_printf_loc (MSG_NOTE
, vect_location
,
1756 "vect_recog_widen_shift_pattern: detected:\n");
1758 /* Check target support. */
1759 vectype
= get_vectype_for_scalar_type (half_type0
);
1760 vectype_out
= get_vectype_for_scalar_type (type
);
1764 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR
, last_stmt
,
1765 vectype_out
, vectype
,
1766 &dummy_code
, &dummy_code
,
1767 &dummy_int
, &dummy_vec
))
1771 *type_out
= vectype_out
;
1773 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1774 var
= vect_recog_temp_ssa_var (type
, NULL
);
1776 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR
, var
, oprnd0
, oprnd1
);
1778 if (dump_enabled_p ())
1779 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1781 stmts
->safe_push (last_stmt
);
1782 return pattern_stmt
;
1785 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1789 S0 a_t = b_t r<< c_t;
1793 * STMTS: Contains a stmt from which the pattern search begins,
1794 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1798 S2 e_t = d_t & (B - 1);
1799 S3 f_t = b_t << c_t;
1800 S4 g_t = b_t >> e_t;
1803 where B is element bitsize of type.
1807 * TYPE_IN: The type of the input arguments to the pattern.
1809 * TYPE_OUT: The type of the output of this pattern.
1811 * Return value: A new stmt that will be used to replace the rotate
1815 vect_recog_rotate_pattern (vec
<gimple
> *stmts
, tree
*type_in
, tree
*type_out
)
1817 gimple last_stmt
= stmts
->pop ();
1818 tree oprnd0
, oprnd1
, lhs
, var
, var1
, var2
, vectype
, type
, stype
, def
, def2
;
1819 gimple pattern_stmt
, def_stmt
;
1820 enum tree_code rhs_code
;
1821 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1822 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1823 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1824 enum vect_def_type dt
;
1825 optab optab1
, optab2
;
1826 edge ext_def
= NULL
;
1828 if (!is_gimple_assign (last_stmt
))
1831 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1841 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
1844 lhs
= gimple_assign_lhs (last_stmt
);
1845 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1846 type
= TREE_TYPE (oprnd0
);
1847 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1848 if (TREE_CODE (oprnd0
) != SSA_NAME
1849 || TYPE_PRECISION (TREE_TYPE (lhs
)) != TYPE_PRECISION (type
)
1850 || !INTEGRAL_TYPE_P (type
)
1851 || !TYPE_UNSIGNED (type
))
1854 if (!vect_is_simple_use (oprnd1
, last_stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
1858 if (dt
!= vect_internal_def
1859 && dt
!= vect_constant_def
1860 && dt
!= vect_external_def
)
1863 vectype
= get_vectype_for_scalar_type (type
);
1864 if (vectype
== NULL_TREE
)
1867 /* If vector/vector or vector/scalar rotate is supported by the target,
1868 don't do anything here. */
1869 optab1
= optab_for_tree_code (rhs_code
, vectype
, optab_vector
);
1871 && optab_handler (optab1
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1874 if (bb_vinfo
!= NULL
|| dt
!= vect_internal_def
)
1876 optab2
= optab_for_tree_code (rhs_code
, vectype
, optab_scalar
);
1878 && optab_handler (optab2
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1882 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1883 don't do anything here either. */
1884 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_vector
);
1885 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_vector
);
1887 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1889 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1891 if (bb_vinfo
== NULL
&& dt
== vect_internal_def
)
1893 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_scalar
);
1894 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_scalar
);
1896 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1898 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1903 *type_out
= vectype
;
1904 if (*type_in
== NULL_TREE
)
1907 if (dt
== vect_external_def
1908 && TREE_CODE (oprnd1
) == SSA_NAME
1911 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1912 ext_def
= loop_preheader_edge (loop
);
1913 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1
))
1915 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (oprnd1
));
1917 || !dominated_by_p (CDI_DOMINATORS
, ext_def
->dest
, bb
))
1923 if (TREE_CODE (oprnd1
) == INTEGER_CST
1924 || TYPE_MODE (TREE_TYPE (oprnd1
)) == TYPE_MODE (type
))
1926 else if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
1928 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
1929 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (type
)
1930 && TYPE_PRECISION (TREE_TYPE (rhs1
))
1931 == TYPE_PRECISION (type
))
1935 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
1936 if (def
== NULL_TREE
)
1938 def
= vect_recog_temp_ssa_var (type
, NULL
);
1939 def_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, def
, oprnd1
,
1944 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1945 gcc_assert (!new_bb
);
1948 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1950 stype
= TREE_TYPE (def
);
1952 if (TREE_CODE (def
) == INTEGER_CST
)
1954 if (!tree_fits_uhwi_p (def
)
1955 || tree_to_uhwi (def
) >= GET_MODE_PRECISION (TYPE_MODE (type
))
1956 || integer_zerop (def
))
1958 def2
= build_int_cst (stype
,
1959 GET_MODE_PRECISION (TYPE_MODE (type
))
1960 - tree_to_uhwi (def
));
1964 tree vecstype
= get_vectype_for_scalar_type (stype
);
1965 stmt_vec_info def_stmt_vinfo
;
1967 if (vecstype
== NULL_TREE
)
1969 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1970 def_stmt
= gimple_build_assign_with_ops (NEGATE_EXPR
, def2
, def
,
1975 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1976 gcc_assert (!new_bb
);
1980 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
1981 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1982 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
1983 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1986 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1988 = build_int_cst (stype
, GET_MODE_PRECISION (TYPE_MODE (stype
)) - 1);
1989 def_stmt
= gimple_build_assign_with_ops (BIT_AND_EXPR
, def2
,
1990 gimple_assign_lhs (def_stmt
),
1995 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1996 gcc_assert (!new_bb
);
2000 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2001 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2002 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
2003 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2007 var1
= vect_recog_temp_ssa_var (type
, NULL
);
2008 def_stmt
= gimple_build_assign_with_ops (rhs_code
== LROTATE_EXPR
2009 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
2011 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2013 var2
= vect_recog_temp_ssa_var (type
, NULL
);
2014 def_stmt
= gimple_build_assign_with_ops (rhs_code
== LROTATE_EXPR
2015 ? RSHIFT_EXPR
: LSHIFT_EXPR
,
2016 var2
, oprnd0
, def2
);
2017 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2019 /* Pattern detected. */
2020 if (dump_enabled_p ())
2021 dump_printf_loc (MSG_NOTE
, vect_location
,
2022 "vect_recog_rotate_pattern: detected:\n");
2024 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2025 var
= vect_recog_temp_ssa_var (type
, NULL
);
2026 pattern_stmt
= gimple_build_assign_with_ops (BIT_IOR_EXPR
, var
, var1
, var2
);
2028 if (dump_enabled_p ())
2029 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
2031 stmts
->safe_push (last_stmt
);
2032 return pattern_stmt
;
2035 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2043 S3 res_T = b_T op a_t;
2045 where type 'TYPE' is a type with different size than 'type',
2046 and op is <<, >> or rotate.
2051 TYPE b_T, c_T, res_T;
2054 S1 a_t = (type) c_T;
2056 S3 res_T = b_T op a_t;
2060 * STMTS: Contains a stmt from which the pattern search begins,
2061 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2062 with a shift/rotate which has same type on both operands, in the
2063 second case just b_T op c_T, in the first case with added cast
2064 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2068 * TYPE_IN: The type of the input arguments to the pattern.
2070 * TYPE_OUT: The type of the output of this pattern.
2072 * Return value: A new stmt that will be used to replace the shift/rotate
2076 vect_recog_vector_vector_shift_pattern (vec
<gimple
> *stmts
,
2077 tree
*type_in
, tree
*type_out
)
2079 gimple last_stmt
= stmts
->pop ();
2080 tree oprnd0
, oprnd1
, lhs
, var
;
2081 gimple pattern_stmt
, def_stmt
;
2082 enum tree_code rhs_code
;
2083 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2084 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2085 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2086 enum vect_def_type dt
;
2089 if (!is_gimple_assign (last_stmt
))
2092 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2104 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2107 lhs
= gimple_assign_lhs (last_stmt
);
2108 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2109 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2110 if (TREE_CODE (oprnd0
) != SSA_NAME
2111 || TREE_CODE (oprnd1
) != SSA_NAME
2112 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
2113 || TYPE_PRECISION (TREE_TYPE (oprnd1
))
2114 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1
)))
2115 || TYPE_PRECISION (TREE_TYPE (lhs
))
2116 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2119 if (!vect_is_simple_use (oprnd1
, last_stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
2123 if (dt
!= vect_internal_def
)
2126 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (oprnd0
));
2127 *type_out
= *type_in
;
2128 if (*type_in
== NULL_TREE
)
2132 if (gimple_assign_cast_p (def_stmt
))
2134 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
2135 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
2136 && TYPE_PRECISION (TREE_TYPE (rhs1
))
2137 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2141 if (def
== NULL_TREE
)
2143 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2144 def_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, def
, oprnd1
,
2146 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2149 /* Pattern detected. */
2150 if (dump_enabled_p ())
2151 dump_printf_loc (MSG_NOTE
, vect_location
,
2152 "vect_recog_vector_vector_shift_pattern: detected:\n");
2154 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2155 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2156 pattern_stmt
= gimple_build_assign_with_ops (rhs_code
, var
, oprnd0
, def
);
2158 if (dump_enabled_p ())
2159 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
2161 stmts
->safe_push (last_stmt
);
2162 return pattern_stmt
;
2165 /* Detect a signed division by a constant that wouldn't be
2166 otherwise vectorized:
2172 where type 'type' is an integral type and N is a constant.
2174 Similarly handle modulo by a constant:
2180 * STMTS: Contains a stmt from which the pattern search begins,
2181 i.e. the division stmt. S1 is replaced by if N is a power
2182 of two constant and type is signed:
2183 S3 y_t = b_t < 0 ? N - 1 : 0;
2185 S1' a_t = x_t >> log2 (N);
2187 S4 is replaced if N is a power of two constant and
2188 type is signed by (where *_T temporaries have unsigned type):
2189 S9 y_T = b_t < 0 ? -1U : 0U;
2190 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2191 S7 z_t = (type) z_T;
2193 S5 x_t = w_t & (N - 1);
2194 S4' a_t = x_t - z_t;
2198 * TYPE_IN: The type of the input arguments to the pattern.
2200 * TYPE_OUT: The type of the output of this pattern.
2202 * Return value: A new stmt that will be used to replace the division
2203 S1 or modulo S4 stmt. */
2206 vect_recog_divmod_pattern (vec
<gimple
> *stmts
,
2207 tree
*type_in
, tree
*type_out
)
2209 gimple last_stmt
= stmts
->pop ();
2210 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
2211 gimple pattern_stmt
, def_stmt
;
2212 enum tree_code rhs_code
;
2213 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2214 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2215 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2218 int dummy_int
, prec
;
2219 stmt_vec_info def_stmt_vinfo
;
2221 if (!is_gimple_assign (last_stmt
))
2224 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2227 case TRUNC_DIV_EXPR
:
2228 case TRUNC_MOD_EXPR
:
2234 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2237 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2238 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2239 itype
= TREE_TYPE (oprnd0
);
2240 if (TREE_CODE (oprnd0
) != SSA_NAME
2241 || TREE_CODE (oprnd1
) != INTEGER_CST
2242 || TREE_CODE (itype
) != INTEGER_TYPE
2243 || TYPE_PRECISION (itype
) != GET_MODE_PRECISION (TYPE_MODE (itype
)))
2246 vectype
= get_vectype_for_scalar_type (itype
);
2247 if (vectype
== NULL_TREE
)
2250 /* If the target can handle vectorized division or modulo natively,
2251 don't attempt to optimize this. */
2252 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
2253 if (optab
!= unknown_optab
)
2255 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
2256 int icode
= (int) optab_handler (optab
, vec_mode
);
2257 if (icode
!= CODE_FOR_nothing
)
2261 prec
= TYPE_PRECISION (itype
);
2262 if (integer_pow2p (oprnd1
))
2264 if (TYPE_UNSIGNED (itype
) || tree_int_cst_sgn (oprnd1
) != 1)
2267 /* Pattern detected. */
2268 if (dump_enabled_p ())
2269 dump_printf_loc (MSG_NOTE
, vect_location
,
2270 "vect_recog_divmod_pattern: detected:\n");
2272 cond
= build2 (LT_EXPR
, boolean_type_node
, oprnd0
,
2273 build_int_cst (itype
, 0));
2274 if (rhs_code
== TRUNC_DIV_EXPR
)
2276 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
2279 = gimple_build_assign_with_ops (COND_EXPR
, var
, cond
,
2280 fold_build2 (MINUS_EXPR
, itype
,
2282 build_int_cst (itype
,
2284 build_int_cst (itype
, 0));
2285 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2286 var
= vect_recog_temp_ssa_var (itype
, NULL
);
2288 = gimple_build_assign_with_ops (PLUS_EXPR
, var
, oprnd0
,
2289 gimple_assign_lhs (def_stmt
));
2290 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2292 shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
2294 = gimple_build_assign_with_ops (RSHIFT_EXPR
,
2295 vect_recog_temp_ssa_var (itype
,
2302 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2303 if (compare_tree_int (oprnd1
, 2) == 0)
2305 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2307 = gimple_build_assign_with_ops (COND_EXPR
, signmask
, cond
,
2308 build_int_cst (itype
, 1),
2309 build_int_cst (itype
, 0));
2310 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2315 = build_nonstandard_integer_type (prec
, 1);
2316 tree vecutype
= get_vectype_for_scalar_type (utype
);
2318 = build_int_cst (utype
, GET_MODE_BITSIZE (TYPE_MODE (itype
))
2319 - tree_log2 (oprnd1
));
2320 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
2323 = gimple_build_assign_with_ops (COND_EXPR
, var
, cond
,
2324 build_int_cst (utype
, -1),
2325 build_int_cst (utype
, 0));
2327 = new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2328 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2329 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2330 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2331 var
= vect_recog_temp_ssa_var (utype
, NULL
);
2333 = gimple_build_assign_with_ops (RSHIFT_EXPR
, var
,
2334 gimple_assign_lhs (def_stmt
),
2337 = new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2338 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2339 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2340 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2341 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2343 = gimple_build_assign_with_ops (NOP_EXPR
, signmask
, var
,
2345 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2348 = gimple_build_assign_with_ops (PLUS_EXPR
,
2349 vect_recog_temp_ssa_var (itype
,
2352 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2354 = gimple_build_assign_with_ops (BIT_AND_EXPR
,
2355 vect_recog_temp_ssa_var (itype
,
2357 gimple_assign_lhs (def_stmt
),
2358 fold_build2 (MINUS_EXPR
, itype
,
2360 build_int_cst (itype
,
2362 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2365 = gimple_build_assign_with_ops (MINUS_EXPR
,
2366 vect_recog_temp_ssa_var (itype
,
2368 gimple_assign_lhs (def_stmt
),
2372 if (dump_enabled_p ())
2373 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
,
2376 stmts
->safe_push (last_stmt
);
2379 *type_out
= vectype
;
2380 return pattern_stmt
;
2383 if (prec
> HOST_BITS_PER_WIDE_INT
2384 || integer_zerop (oprnd1
))
2387 if (!can_mult_highpart_p (TYPE_MODE (vectype
), TYPE_UNSIGNED (itype
)))
2390 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2392 if (TYPE_UNSIGNED (itype
))
2394 unsigned HOST_WIDE_INT mh
, ml
;
2395 int pre_shift
, post_shift
;
2396 unsigned HOST_WIDE_INT d
= (TREE_INT_CST_LOW (oprnd1
)
2397 & GET_MODE_MASK (TYPE_MODE (itype
)));
2398 tree t1
, t2
, t3
, t4
;
2400 if (d
>= ((unsigned HOST_WIDE_INT
) 1 << (prec
- 1)))
2401 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2404 /* Find a suitable multiplier and right shift count
2405 instead of multiplying with D. */
2406 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
2408 /* If the suggested multiplier is more than SIZE bits, we can do better
2409 for even divisors, using an initial right shift. */
2410 if (mh
!= 0 && (d
& 1) == 0)
2412 pre_shift
= floor_log2 (d
& -d
);
2413 mh
= choose_multiplier (d
>> pre_shift
, prec
, prec
- pre_shift
,
2414 &ml
, &post_shift
, &dummy_int
);
2422 if (post_shift
- 1 >= prec
)
2425 /* t1 = oprnd0 h* ml;
2429 q = t4 >> (post_shift - 1); */
2430 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2432 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR
, t1
, oprnd0
,
2433 build_int_cst (itype
, ml
));
2434 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2436 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2438 = gimple_build_assign_with_ops (MINUS_EXPR
, t2
, oprnd0
, t1
);
2439 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2441 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2443 = gimple_build_assign_with_ops (RSHIFT_EXPR
, t3
, t2
,
2445 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2447 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2449 = gimple_build_assign_with_ops (PLUS_EXPR
, t4
, t1
, t3
);
2451 if (post_shift
!= 1)
2453 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2455 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2457 = gimple_build_assign_with_ops (RSHIFT_EXPR
, q
, t4
,
2458 build_int_cst (itype
,
2465 pattern_stmt
= def_stmt
;
2470 if (pre_shift
>= prec
|| post_shift
>= prec
)
2473 /* t1 = oprnd0 >> pre_shift;
2475 q = t2 >> post_shift; */
2478 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2480 = gimple_build_assign_with_ops (RSHIFT_EXPR
, t1
, oprnd0
,
2481 build_int_cst (NULL
,
2483 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2488 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2490 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR
, t2
, t1
,
2491 build_int_cst (itype
, ml
));
2495 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2497 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2499 = gimple_build_assign_with_ops (RSHIFT_EXPR
, q
, t2
,
2500 build_int_cst (itype
,
2506 pattern_stmt
= def_stmt
;
2511 unsigned HOST_WIDE_INT ml
;
2513 HOST_WIDE_INT d
= TREE_INT_CST_LOW (oprnd1
);
2514 unsigned HOST_WIDE_INT abs_d
;
2516 tree t1
, t2
, t3
, t4
;
2518 /* Give up for -1. */
2522 /* Since d might be INT_MIN, we have to cast to
2523 unsigned HOST_WIDE_INT before negating to avoid
2524 undefined signed overflow. */
2526 ? (unsigned HOST_WIDE_INT
) d
2527 : - (unsigned HOST_WIDE_INT
) d
);
2529 /* n rem d = n rem -d */
2530 if (rhs_code
== TRUNC_MOD_EXPR
&& d
< 0)
2533 oprnd1
= build_int_cst (itype
, abs_d
);
2535 else if (HOST_BITS_PER_WIDE_INT
>= prec
2536 && abs_d
== (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2537 /* This case is not handled correctly below. */
2540 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
, &post_shift
, &dummy_int
);
2541 if (ml
>= (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2544 ml
|= (~(unsigned HOST_WIDE_INT
) 0) << (prec
- 1);
2546 if (post_shift
>= prec
)
2549 /* t1 = oprnd0 h* ml; */
2550 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2552 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR
, t1
, oprnd0
,
2553 build_int_cst (itype
, ml
));
2557 /* t2 = t1 + oprnd0; */
2558 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2559 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2561 = gimple_build_assign_with_ops (PLUS_EXPR
, t2
, t1
, oprnd0
);
2568 /* t3 = t2 >> post_shift; */
2569 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2570 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2572 = gimple_build_assign_with_ops (RSHIFT_EXPR
, t3
, t2
,
2573 build_int_cst (itype
, post_shift
));
2578 wide_int oprnd0_min
, oprnd0_max
;
2580 if (get_range_info (oprnd0
, &oprnd0_min
, &oprnd0_max
) == VR_RANGE
)
2582 if (!wi::neg_p (oprnd0_min
, TYPE_SIGN (itype
)))
2584 else if (wi::neg_p (oprnd0_max
, TYPE_SIGN (itype
)))
2588 if (msb
== 0 && d
>= 0)
2592 pattern_stmt
= def_stmt
;
2596 /* t4 = oprnd0 >> (prec - 1);
2597 or if we know from VRP that oprnd0 >= 0
2599 or if we know from VRP that oprnd0 < 0
2601 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2602 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2605 = gimple_build_assign_with_ops (INTEGER_CST
,
2606 t4
, build_int_cst (itype
, msb
),
2610 = gimple_build_assign_with_ops (RSHIFT_EXPR
, t4
, oprnd0
,
2611 build_int_cst (itype
, prec
- 1));
2612 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2614 /* q = t3 - t4; or q = t4 - t3; */
2615 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2617 = gimple_build_assign_with_ops (MINUS_EXPR
, q
, d
< 0 ? t4
: t3
,
2622 if (rhs_code
== TRUNC_MOD_EXPR
)
2626 /* We divided. Now finish by:
2629 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2631 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2633 = gimple_build_assign_with_ops (MULT_EXPR
, t1
, q
, oprnd1
);
2634 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2636 r
= vect_recog_temp_ssa_var (itype
, NULL
);
2638 = gimple_build_assign_with_ops (MINUS_EXPR
, r
, oprnd0
, t1
);
2641 /* Pattern detected. */
2642 if (dump_enabled_p ())
2644 dump_printf_loc (MSG_NOTE
, vect_location
,
2645 "vect_recog_divmod_pattern: detected: ");
2646 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
2647 dump_printf (MSG_NOTE
, "\n");
2650 stmts
->safe_push (last_stmt
);
2653 *type_out
= vectype
;
2654 return pattern_stmt
;
2657 /* Function vect_recog_mixed_size_cond_pattern
2659 Try to find the following pattern:
2664 S1 a_T = x_t CMP y_t ? b_T : c_T;
2666 where type 'TYPE' is an integral type which has different size
2667 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2668 than 'type', the constants need to fit into an integer type
2669 with the same width as 'type') or results of conversion from 'type'.
2673 * LAST_STMT: A stmt from which the pattern search begins.
2677 * TYPE_IN: The type of the input arguments to the pattern.
2679 * TYPE_OUT: The type of the output of this pattern.
2681 * Return value: A new stmt that will be used to replace the pattern.
2682 Additionally a def_stmt is added.
2684 a_it = x_t CMP y_t ? b_it : c_it;
2685 a_T = (TYPE) a_it; */
2688 vect_recog_mixed_size_cond_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
2691 gimple last_stmt
= (*stmts
)[0];
2692 tree cond_expr
, then_clause
, else_clause
;
2693 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
), def_stmt_info
;
2694 tree type
, vectype
, comp_vectype
, itype
= NULL_TREE
, vecitype
;
2695 enum machine_mode cmpmode
;
2696 gimple pattern_stmt
, def_stmt
;
2697 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2698 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2699 tree orig_type0
= NULL_TREE
, orig_type1
= NULL_TREE
;
2700 gimple def_stmt0
= NULL
, def_stmt1
= NULL
;
2702 tree comp_scalar_type
;
2704 if (!is_gimple_assign (last_stmt
)
2705 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
2706 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
2709 cond_expr
= gimple_assign_rhs1 (last_stmt
);
2710 then_clause
= gimple_assign_rhs2 (last_stmt
);
2711 else_clause
= gimple_assign_rhs3 (last_stmt
);
2713 if (!COMPARISON_CLASS_P (cond_expr
))
2716 comp_scalar_type
= TREE_TYPE (TREE_OPERAND (cond_expr
, 0));
2717 comp_vectype
= get_vectype_for_scalar_type (comp_scalar_type
);
2718 if (comp_vectype
== NULL_TREE
)
2721 type
= gimple_expr_type (last_stmt
);
2722 if (types_compatible_p (type
, comp_scalar_type
)
2723 || ((TREE_CODE (then_clause
) != INTEGER_CST
2724 || TREE_CODE (else_clause
) != INTEGER_CST
)
2725 && !INTEGRAL_TYPE_P (comp_scalar_type
))
2726 || !INTEGRAL_TYPE_P (type
))
2729 if ((TREE_CODE (then_clause
) != INTEGER_CST
2730 && !type_conversion_p (then_clause
, last_stmt
, false, &orig_type0
,
2731 &def_stmt0
, &promotion
))
2732 || (TREE_CODE (else_clause
) != INTEGER_CST
2733 && !type_conversion_p (else_clause
, last_stmt
, false, &orig_type1
,
2734 &def_stmt1
, &promotion
)))
2737 if (orig_type0
&& orig_type1
2738 && !types_compatible_p (orig_type0
, orig_type1
))
2743 if (!types_compatible_p (orig_type0
, comp_scalar_type
))
2745 then_clause
= gimple_assign_rhs1 (def_stmt0
);
2751 if (!types_compatible_p (orig_type1
, comp_scalar_type
))
2753 else_clause
= gimple_assign_rhs1 (def_stmt1
);
2757 cmpmode
= GET_MODE_INNER (TYPE_MODE (comp_vectype
));
2759 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) == GET_MODE_BITSIZE (cmpmode
))
2762 vectype
= get_vectype_for_scalar_type (type
);
2763 if (vectype
== NULL_TREE
)
2766 if (expand_vec_cond_expr_p (vectype
, comp_vectype
))
2769 if (itype
== NULL_TREE
)
2770 itype
= build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode
),
2771 TYPE_UNSIGNED (type
));
2773 if (itype
== NULL_TREE
2774 || GET_MODE_BITSIZE (TYPE_MODE (itype
)) != GET_MODE_BITSIZE (cmpmode
))
2777 vecitype
= get_vectype_for_scalar_type (itype
);
2778 if (vecitype
== NULL_TREE
)
2781 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
))
2784 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) > GET_MODE_BITSIZE (cmpmode
))
2786 if ((TREE_CODE (then_clause
) == INTEGER_CST
2787 && !int_fits_type_p (then_clause
, itype
))
2788 || (TREE_CODE (else_clause
) == INTEGER_CST
2789 && !int_fits_type_p (else_clause
, itype
)))
2794 = gimple_build_assign_with_ops (COND_EXPR
,
2795 vect_recog_temp_ssa_var (itype
, NULL
),
2796 unshare_expr (cond_expr
),
2797 fold_convert (itype
, then_clause
),
2798 fold_convert (itype
, else_clause
));
2800 = gimple_build_assign_with_ops (NOP_EXPR
,
2801 vect_recog_temp_ssa_var (type
, NULL
),
2802 gimple_assign_lhs (def_stmt
), NULL_TREE
);
2804 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2805 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2806 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
2807 STMT_VINFO_VECTYPE (def_stmt_info
) = vecitype
;
2808 *type_in
= vecitype
;
2809 *type_out
= vectype
;
2811 if (dump_enabled_p ())
2812 dump_printf_loc (MSG_NOTE
, vect_location
,
2813 "vect_recog_mixed_size_cond_pattern: detected:\n");
2815 return pattern_stmt
;
2819 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2820 true if bool VAR can be optimized that way. */
2823 check_bool_pattern (tree var
, loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
2826 enum vect_def_type dt
;
2828 enum tree_code rhs_code
;
2830 if (!vect_is_simple_use (var
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
, &def
,
2834 if (dt
!= vect_internal_def
)
2837 if (!is_gimple_assign (def_stmt
))
2840 if (!has_single_use (def
))
2843 rhs1
= gimple_assign_rhs1 (def_stmt
);
2844 rhs_code
= gimple_assign_rhs_code (def_stmt
);
2848 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2851 if ((TYPE_PRECISION (TREE_TYPE (rhs1
)) != 1
2852 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
2853 && TREE_CODE (TREE_TYPE (rhs1
)) != BOOLEAN_TYPE
)
2855 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2858 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2863 if (!check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
))
2865 return check_bool_pattern (gimple_assign_rhs2 (def_stmt
), loop_vinfo
,
2869 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
2871 tree vecitype
, comp_vectype
;
2873 /* If the comparison can throw, then is_gimple_condexpr will be
2874 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2875 if (stmt_could_throw_p (def_stmt
))
2878 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
2879 if (comp_vectype
== NULL_TREE
)
2882 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
2884 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
2886 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
2887 vecitype
= get_vectype_for_scalar_type (itype
);
2888 if (vecitype
== NULL_TREE
)
2892 vecitype
= comp_vectype
;
2893 return expand_vec_cond_expr_p (vecitype
, comp_vectype
);
2900 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2901 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2902 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2905 adjust_bool_pattern_cast (tree type
, tree var
)
2907 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (SSA_NAME_DEF_STMT (var
));
2908 gimple cast_stmt
, pattern_stmt
;
2910 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
));
2911 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
2912 new_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2914 = gimple_build_assign_with_ops (NOP_EXPR
,
2915 vect_recog_temp_ssa_var (type
, NULL
),
2916 gimple_assign_lhs (pattern_stmt
),
2918 STMT_VINFO_RELATED_STMT (stmt_vinfo
) = cast_stmt
;
2919 return gimple_assign_lhs (cast_stmt
);
2923 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2924 recursively. VAR is an SSA_NAME that should be transformed from bool
2925 to a wider integer type, OUT_TYPE is the desired final integer type of
2926 the whole pattern, TRUEVAL should be NULL unless optimizing
2927 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2928 in the then_clause, STMTS is where statements with added pattern stmts
2929 should be pushed to. */
2932 adjust_bool_pattern (tree var
, tree out_type
, tree trueval
,
2935 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2936 enum tree_code rhs_code
, def_rhs_code
;
2937 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
2939 gimple pattern_stmt
, def_stmt
;
2941 rhs1
= gimple_assign_rhs1 (stmt
);
2942 rhs2
= gimple_assign_rhs2 (stmt
);
2943 rhs_code
= gimple_assign_rhs_code (stmt
);
2944 loc
= gimple_location (stmt
);
2949 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2950 itype
= TREE_TYPE (irhs1
);
2952 = gimple_build_assign_with_ops (SSA_NAME
,
2953 vect_recog_temp_ssa_var (itype
, NULL
),
2958 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2959 itype
= TREE_TYPE (irhs1
);
2961 = gimple_build_assign_with_ops (BIT_XOR_EXPR
,
2962 vect_recog_temp_ssa_var (itype
, NULL
),
2963 irhs1
, build_int_cst (itype
, 1));
2967 /* Try to optimize x = y & (a < b ? 1 : 0); into
2968 x = (a < b ? y : 0);
2974 S1 a_b = x1 CMP1 y1;
2975 S2 b_b = x2 CMP2 y2;
2977 S4 d_T = (TYPE) c_b;
2979 we would normally emit:
2981 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2982 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2983 S3' c_T = a_T & b_T;
2986 but we can save one stmt by using the
2987 result of one of the COND_EXPRs in the other COND_EXPR and leave
2988 BIT_AND_EXPR stmt out:
2990 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2991 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2994 At least when VEC_COND_EXPR is implemented using masks
2995 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2996 computes the comparison masks and ands it, in one case with
2997 all ones vector, in the other case with a vector register.
2998 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2999 often more expensive. */
3000 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
3001 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3002 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3004 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3005 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3006 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3007 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
3010 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
3011 irhs2
= adjust_bool_pattern (rhs2
, out_type
, irhs1
, stmts
);
3012 tstmt
= stmts
->pop ();
3013 gcc_assert (tstmt
== def_stmt
);
3014 stmts
->quick_push (stmt
);
3015 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
3016 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
3017 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
3018 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
3022 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
3025 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
3026 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3027 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3029 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3030 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
3031 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
3032 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
3035 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
3036 irhs1
= adjust_bool_pattern (rhs1
, out_type
, irhs2
, stmts
);
3037 tstmt
= stmts
->pop ();
3038 gcc_assert (tstmt
== def_stmt
);
3039 stmts
->quick_push (stmt
);
3040 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
3041 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
3042 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
3043 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
3047 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3053 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3054 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
3056 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3057 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
3059 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
3060 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
3061 int out_prec
= TYPE_PRECISION (out_type
);
3062 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
3063 irhs2
= adjust_bool_pattern_cast (TREE_TYPE (irhs1
), rhs2
);
3064 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
3065 irhs1
= adjust_bool_pattern_cast (TREE_TYPE (irhs2
), rhs1
);
3068 irhs1
= adjust_bool_pattern_cast (out_type
, rhs1
);
3069 irhs2
= adjust_bool_pattern_cast (out_type
, rhs2
);
3072 itype
= TREE_TYPE (irhs1
);
3074 = gimple_build_assign_with_ops (rhs_code
,
3075 vect_recog_temp_ssa_var (itype
, NULL
),
3080 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
3081 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
3082 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
))
3083 || (TYPE_PRECISION (TREE_TYPE (rhs1
))
3084 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1
)))))
3086 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
3088 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3091 itype
= TREE_TYPE (rhs1
);
3092 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
3093 if (trueval
== NULL_TREE
)
3094 trueval
= build_int_cst (itype
, 1);
3096 gcc_checking_assert (useless_type_conversion_p (itype
,
3097 TREE_TYPE (trueval
)));
3099 = gimple_build_assign_with_ops (COND_EXPR
,
3100 vect_recog_temp_ssa_var (itype
, NULL
),
3102 build_int_cst (itype
, 0));
3106 stmts
->safe_push (stmt
);
3107 gimple_set_location (pattern_stmt
, loc
);
3108 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
3109 return gimple_assign_lhs (pattern_stmt
);
3113 /* Function vect_recog_bool_pattern
3115 Try to find pattern like following:
3117 bool a_b, b_b, c_b, d_b, e_b;
3120 S1 a_b = x1 CMP1 y1;
3121 S2 b_b = x2 CMP2 y2;
3123 S4 d_b = x3 CMP3 y3;
3125 S6 f_T = (TYPE) e_b;
3127 where type 'TYPE' is an integral type. Or a similar pattern
3130 S6 f_Y = e_b ? r_Y : s_Y;
3132 as results from if-conversion of a complex condition.
3136 * LAST_STMT: A stmt at the end from which the pattern
3137 search begins, i.e. cast of a bool to
3142 * TYPE_IN: The type of the input arguments to the pattern.
3144 * TYPE_OUT: The type of the output of this pattern.
3146 * Return value: A new stmt that will be used to replace the pattern.
3148 Assuming size of TYPE is the same as size of all comparisons
3149 (otherwise some casts would be added where needed), the above
3150 sequence we create related pattern stmts:
3151 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3152 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3153 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3154 S5' e_T = c_T | d_T;
3157 Instead of the above S3' we could emit:
3158 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3159 S3' c_T = a_T | b_T;
3160 but the above is more efficient. */
3163 vect_recog_bool_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
3166 gimple last_stmt
= stmts
->pop ();
3167 enum tree_code rhs_code
;
3168 tree var
, lhs
, rhs
, vectype
;
3169 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
3170 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
3171 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
3172 gimple pattern_stmt
;
3174 if (!is_gimple_assign (last_stmt
))
3177 var
= gimple_assign_rhs1 (last_stmt
);
3178 lhs
= gimple_assign_lhs (last_stmt
);
3180 if ((TYPE_PRECISION (TREE_TYPE (var
)) != 1
3181 || !TYPE_UNSIGNED (TREE_TYPE (var
)))
3182 && TREE_CODE (TREE_TYPE (var
)) != BOOLEAN_TYPE
)
3185 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3186 if (CONVERT_EXPR_CODE_P (rhs_code
))
3188 if (TREE_CODE (TREE_TYPE (lhs
)) != INTEGER_TYPE
3189 || TYPE_PRECISION (TREE_TYPE (lhs
)) == 1)
3191 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3192 if (vectype
== NULL_TREE
)
3195 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
3198 rhs
= adjust_bool_pattern (var
, TREE_TYPE (lhs
), NULL_TREE
, stmts
);
3199 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3200 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3202 = gimple_build_assign_with_ops (SSA_NAME
, lhs
, rhs
, NULL_TREE
);
3205 = gimple_build_assign_with_ops (NOP_EXPR
, lhs
, rhs
, NULL_TREE
);
3206 *type_out
= vectype
;
3208 stmts
->safe_push (last_stmt
);
3209 if (dump_enabled_p ())
3210 dump_printf_loc (MSG_NOTE
, vect_location
,
3211 "vect_recog_bool_pattern: detected:\n");
3213 return pattern_stmt
;
3215 else if (rhs_code
== COND_EXPR
3216 && TREE_CODE (var
) == SSA_NAME
)
3218 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3219 if (vectype
== NULL_TREE
)
3222 /* Build a scalar type for the boolean result that when
3223 vectorized matches the vector type of the result in
3224 size and number of elements. */
3226 = wi::udiv_trunc (TYPE_SIZE (vectype
),
3227 TYPE_VECTOR_SUBPARTS (vectype
)).to_uhwi ();
3229 = build_nonstandard_integer_type (prec
,
3230 TYPE_UNSIGNED (TREE_TYPE (var
)));
3231 if (get_vectype_for_scalar_type (type
) == NULL_TREE
)
3234 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
3237 rhs
= adjust_bool_pattern (var
, type
, NULL_TREE
, stmts
);
3238 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3240 = gimple_build_assign_with_ops (COND_EXPR
, lhs
,
3241 build2 (NE_EXPR
, boolean_type_node
,
3242 rhs
, build_int_cst (type
, 0)),
3243 gimple_assign_rhs2 (last_stmt
),
3244 gimple_assign_rhs3 (last_stmt
));
3245 *type_out
= vectype
;
3247 stmts
->safe_push (last_stmt
);
3248 if (dump_enabled_p ())
3249 dump_printf_loc (MSG_NOTE
, vect_location
,
3250 "vect_recog_bool_pattern: detected:\n");
3252 return pattern_stmt
;
3254 else if (rhs_code
== SSA_NAME
3255 && STMT_VINFO_DATA_REF (stmt_vinfo
))
3257 stmt_vec_info pattern_stmt_info
;
3258 vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
3259 gcc_assert (vectype
!= NULL_TREE
);
3260 if (!VECTOR_MODE_P (TYPE_MODE (vectype
)))
3262 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
3265 rhs
= adjust_bool_pattern (var
, TREE_TYPE (vectype
), NULL_TREE
, stmts
);
3266 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
3267 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3269 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3271 = gimple_build_assign_with_ops (NOP_EXPR
, rhs2
, rhs
, NULL_TREE
);
3272 new_pattern_def_seq (stmt_vinfo
, cast_stmt
);
3276 = gimple_build_assign_with_ops (SSA_NAME
, lhs
, rhs
, NULL_TREE
);
3277 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
,
3279 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3280 STMT_VINFO_DATA_REF (pattern_stmt_info
)
3281 = STMT_VINFO_DATA_REF (stmt_vinfo
);
3282 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info
)
3283 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo
);
3284 STMT_VINFO_DR_INIT (pattern_stmt_info
) = STMT_VINFO_DR_INIT (stmt_vinfo
);
3285 STMT_VINFO_DR_OFFSET (pattern_stmt_info
)
3286 = STMT_VINFO_DR_OFFSET (stmt_vinfo
);
3287 STMT_VINFO_DR_STEP (pattern_stmt_info
) = STMT_VINFO_DR_STEP (stmt_vinfo
);
3288 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info
)
3289 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo
);
3290 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo
)) = pattern_stmt
;
3291 *type_out
= vectype
;
3293 stmts
->safe_push (last_stmt
);
3294 if (dump_enabled_p ())
3295 dump_printf_loc (MSG_NOTE
, vect_location
,
3296 "vect_recog_bool_pattern: detected:\n");
3297 return pattern_stmt
;
3304 /* Mark statements that are involved in a pattern. */
3307 vect_mark_pattern_stmts (gimple orig_stmt
, gimple pattern_stmt
,
3308 tree pattern_vectype
)
3310 stmt_vec_info pattern_stmt_info
, def_stmt_info
;
3311 stmt_vec_info orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
3312 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (orig_stmt_info
);
3313 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (orig_stmt_info
);
3316 pattern_stmt_info
= vinfo_for_stmt (pattern_stmt
);
3317 if (pattern_stmt_info
== NULL
)
3319 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
,
3321 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3323 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt
));
3325 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt
;
3326 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
3327 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
3328 STMT_VINFO_VECTYPE (pattern_stmt_info
) = pattern_vectype
;
3329 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
3330 STMT_VINFO_RELATED_STMT (orig_stmt_info
) = pattern_stmt
;
3331 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
)
3332 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
3333 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
))
3335 gimple_stmt_iterator si
;
3336 for (si
= gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
));
3337 !gsi_end_p (si
); gsi_next (&si
))
3339 def_stmt
= gsi_stmt (si
);
3340 def_stmt_info
= vinfo_for_stmt (def_stmt
);
3341 if (def_stmt_info
== NULL
)
3343 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
,
3345 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
3347 gimple_set_bb (def_stmt
, gimple_bb (orig_stmt
));
3348 STMT_VINFO_RELATED_STMT (def_stmt_info
) = orig_stmt
;
3349 STMT_VINFO_DEF_TYPE (def_stmt_info
) = vect_internal_def
;
3350 if (STMT_VINFO_VECTYPE (def_stmt_info
) == NULL_TREE
)
3351 STMT_VINFO_VECTYPE (def_stmt_info
) = pattern_vectype
;
3356 /* Function vect_pattern_recog_1
3359 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3360 computation pattern.
3361 STMT: A stmt from which the pattern search should start.
3363 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3364 expression that computes the same functionality and can be used to
3365 replace the sequence of stmts that are involved in the pattern.
3368 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3369 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3370 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3371 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3372 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3373 to the available target pattern.
3375 This function also does some bookkeeping, as explained in the documentation
3376 for vect_recog_pattern. */
3379 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func
,
3380 gimple_stmt_iterator si
,
3381 vec
<gimple
> *stmts_to_replace
)
3383 gimple stmt
= gsi_stmt (si
), pattern_stmt
;
3384 stmt_vec_info stmt_info
;
3385 loop_vec_info loop_vinfo
;
3386 tree pattern_vectype
;
3387 tree type_in
, type_out
;
3388 enum tree_code code
;
3392 stmts_to_replace
->truncate (0);
3393 stmts_to_replace
->quick_push (stmt
);
3394 pattern_stmt
= (* vect_recog_func
) (stmts_to_replace
, &type_in
, &type_out
);
3398 stmt
= stmts_to_replace
->last ();
3399 stmt_info
= vinfo_for_stmt (stmt
);
3400 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3402 if (VECTOR_MODE_P (TYPE_MODE (type_in
)))
3404 /* No need to check target support (already checked by the pattern
3405 recognition function). */
3406 pattern_vectype
= type_out
? type_out
: type_in
;
3410 enum machine_mode vec_mode
;
3411 enum insn_code icode
;
3414 /* Check target support */
3415 type_in
= get_vectype_for_scalar_type (type_in
);
3419 type_out
= get_vectype_for_scalar_type (type_out
);
3424 pattern_vectype
= type_out
;
3426 if (is_gimple_assign (pattern_stmt
))
3427 code
= gimple_assign_rhs_code (pattern_stmt
);
3430 gcc_assert (is_gimple_call (pattern_stmt
));
3434 optab
= optab_for_tree_code (code
, type_in
, optab_default
);
3435 vec_mode
= TYPE_MODE (type_in
);
3437 || (icode
= optab_handler (optab
, vec_mode
)) == CODE_FOR_nothing
3438 || (insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (type_out
)))
3442 /* Found a vectorizable pattern. */
3443 if (dump_enabled_p ())
3445 dump_printf_loc (MSG_NOTE
, vect_location
,
3446 "pattern recognized: ");
3447 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3448 dump_printf (MSG_NOTE
, "\n");
3451 /* Mark the stmts that are involved in the pattern. */
3452 vect_mark_pattern_stmts (stmt
, pattern_stmt
, pattern_vectype
);
3454 /* Patterns cannot be vectorized using SLP, because they change the order of
3457 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
, next
)
3459 LOOP_VINFO_REDUCTIONS (loop_vinfo
).ordered_remove (i
);
3461 /* It is possible that additional pattern stmts are created and inserted in
3462 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3463 relevant statements. */
3464 for (i
= 0; stmts_to_replace
->iterate (i
, &stmt
)
3465 && (unsigned) i
< (stmts_to_replace
->length () - 1);
3468 stmt_info
= vinfo_for_stmt (stmt
);
3469 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
3470 if (dump_enabled_p ())
3472 dump_printf_loc (MSG_NOTE
, vect_location
,
3473 "additional pattern stmt: ");
3474 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3475 dump_printf (MSG_NOTE
, "\n");
3478 vect_mark_pattern_stmts (stmt
, pattern_stmt
, NULL_TREE
);
3483 /* Function vect_pattern_recog
3486 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3489 Output - for each computation idiom that is detected we create a new stmt
3490 that provides the same functionality and that can be vectorized. We
3491 also record some information in the struct_stmt_info of the relevant
3492 stmts, as explained below:
3494 At the entry to this function we have the following stmts, with the
3495 following initial value in the STMT_VINFO fields:
3497 stmt in_pattern_p related_stmt vec_stmt
3498 S1: a_i = .... - - -
3499 S2: a_2 = ..use(a_i).. - - -
3500 S3: a_1 = ..use(a_2).. - - -
3501 S4: a_0 = ..use(a_1).. - - -
3502 S5: ... = ..use(a_0).. - - -
3504 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3505 represented by a single stmt. We then:
3506 - create a new stmt S6 equivalent to the pattern (the stmt is not
3507 inserted into the code)
3508 - fill in the STMT_VINFO fields as follows:
3510 in_pattern_p related_stmt vec_stmt
3511 S1: a_i = .... - - -
3512 S2: a_2 = ..use(a_i).. - - -
3513 S3: a_1 = ..use(a_2).. - - -
3514 S4: a_0 = ..use(a_1).. true S6 -
3515 '---> S6: a_new = .... - S4 -
3516 S5: ... = ..use(a_0).. - - -
3518 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3519 to each other through the RELATED_STMT field).
3521 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3522 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3523 remain irrelevant unless used by stmts other than S4.
3525 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3526 (because they are marked as irrelevant). It will vectorize S6, and record
3527 a pointer to the new vector stmt VS6 from S6 (as usual).
3528 S4 will be skipped, and S5 will be vectorized as usual:
3530 in_pattern_p related_stmt vec_stmt
3531 S1: a_i = .... - - -
3532 S2: a_2 = ..use(a_i).. - - -
3533 S3: a_1 = ..use(a_2).. - - -
3534 > VS6: va_new = .... - - -
3535 S4: a_0 = ..use(a_1).. true S6 VS6
3536 '---> S6: a_new = .... - S4 VS6
3537 > VS5: ... = ..vuse(va_new).. - - -
3538 S5: ... = ..use(a_0).. - - -
3540 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3541 elsewhere), and we'll end up with:
3544 VS5: ... = ..vuse(va_new)..
3546 In case of more than one pattern statements, e.g., widen-mult with
3550 S2 a_T = (TYPE) a_t;
3551 '--> S3: a_it = (interm_type) a_t;
3552 S4 prod_T = a_T * CONST;
3553 '--> S5: prod_T' = a_it w* CONST;
3555 there may be other users of a_T outside the pattern. In that case S2 will
3556 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3557 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3558 be recorded in S3. */
3561 vect_pattern_recog (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
3566 gimple_stmt_iterator si
;
3568 vect_recog_func_ptr vect_recog_func
;
3569 auto_vec
<gimple
, 1> stmts_to_replace
;
3572 if (dump_enabled_p ())
3573 dump_printf_loc (MSG_NOTE
, vect_location
,
3574 "=== vect_pattern_recog ===\n");
3578 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3579 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
3580 nbbs
= loop
->num_nodes
;
3584 bbs
= &BB_VINFO_BB (bb_vinfo
);
3588 /* Scan through the loop stmts, applying the pattern recognition
3589 functions starting at each stmt visited: */
3590 for (i
= 0; i
< nbbs
; i
++)
3592 basic_block bb
= bbs
[i
];
3593 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
3595 if (bb_vinfo
&& (stmt
= gsi_stmt (si
))
3596 && vinfo_for_stmt (stmt
)
3597 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
3600 /* Scan over all generic vect_recog_xxx_pattern functions. */
3601 for (j
= 0; j
< NUM_PATTERNS
; j
++)
3603 vect_recog_func
= vect_vect_recog_func_ptrs
[j
];
3604 vect_pattern_recog_1 (vect_recog_func
, si
,