1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "fold-const.h"
31 #include "stor-layout.h"
33 #include "dominance.h"
34 #include "gimple-pretty-print.h"
35 #include "internal-fn.h"
38 #include "gimple-iterator.h"
41 #include "insn-config.h"
50 #include "insn-codes.h"
53 #include "tree-data-ref.h"
54 #include "tree-vectorizer.h"
55 #include "recog.h" /* FIXME: for insn_data */
56 #include "diagnostic-core.h"
60 /* Pattern recognition functions */
61 static gimple
vect_recog_widen_sum_pattern (vec
<gimple
> *, tree
*,
63 static gimple
vect_recog_widen_mult_pattern (vec
<gimple
> *, tree
*,
65 static gimple
vect_recog_dot_prod_pattern (vec
<gimple
> *, tree
*,
67 static gimple
vect_recog_sad_pattern (vec
<gimple
> *, tree
*,
69 static gimple
vect_recog_pow_pattern (vec
<gimple
> *, tree
*, tree
*);
70 static gimple
vect_recog_over_widening_pattern (vec
<gimple
> *, tree
*,
72 static gimple
vect_recog_widen_shift_pattern (vec
<gimple
> *,
74 static gimple
vect_recog_rotate_pattern (vec
<gimple
> *, tree
*, tree
*);
75 static gimple
vect_recog_vector_vector_shift_pattern (vec
<gimple
> *,
77 static gimple
vect_recog_divmod_pattern (vec
<gimple
> *,
79 static gimple
vect_recog_mixed_size_cond_pattern (vec
<gimple
> *,
81 static gimple
vect_recog_bool_pattern (vec
<gimple
> *, tree
*, tree
*);
82 static vect_recog_func_ptr vect_vect_recog_func_ptrs
[NUM_PATTERNS
] = {
83 vect_recog_widen_mult_pattern
,
84 vect_recog_widen_sum_pattern
,
85 vect_recog_dot_prod_pattern
,
86 vect_recog_sad_pattern
,
87 vect_recog_pow_pattern
,
88 vect_recog_widen_shift_pattern
,
89 vect_recog_over_widening_pattern
,
90 vect_recog_rotate_pattern
,
91 vect_recog_vector_vector_shift_pattern
,
92 vect_recog_divmod_pattern
,
93 vect_recog_mixed_size_cond_pattern
,
94 vect_recog_bool_pattern
};
97 append_pattern_def_seq (stmt_vec_info stmt_info
, gimple stmt
)
99 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
104 new_pattern_def_seq (stmt_vec_info stmt_info
, gimple stmt
)
106 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
107 append_pattern_def_seq (stmt_info
, stmt
);
110 /* Check whether STMT2 is in the same loop or basic block as STMT1.
111 Which of the two applies depends on whether we're currently doing
112 loop-based or basic-block-based vectorization, as determined by
113 the vinfo_for_stmt for STMT1 (which must be defined).
115 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
116 to be defined as well. */
119 vect_same_loop_or_bb_p (gimple stmt1
, gimple stmt2
)
121 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt1
);
122 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
123 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
125 if (!gimple_bb (stmt2
))
130 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
131 if (!flow_bb_inside_loop_p (loop
, gimple_bb (stmt2
)))
136 if (gimple_bb (stmt2
) != BB_VINFO_BB (bb_vinfo
)
137 || gimple_code (stmt2
) == GIMPLE_PHI
)
141 gcc_assert (vinfo_for_stmt (stmt2
));
145 /* If the LHS of DEF_STMT has a single use, and that statement is
146 in the same loop or basic block, return it. */
149 vect_single_imm_use (gimple def_stmt
)
151 tree lhs
= gimple_assign_lhs (def_stmt
);
155 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
158 if (!vect_same_loop_or_bb_p (def_stmt
, use_stmt
))
164 /* Check whether NAME, an ssa-name used in USE_STMT,
165 is a result of a type promotion, such that:
166 DEF_STMT: NAME = NOP (name0)
167 If CHECK_SIGN is TRUE, check that either both types are signed or both are
171 type_conversion_p (tree name
, gimple use_stmt
, bool check_sign
,
172 tree
*orig_type
, gimple
*def_stmt
, bool *promotion
)
176 loop_vec_info loop_vinfo
;
177 stmt_vec_info stmt_vinfo
;
178 tree type
= TREE_TYPE (name
);
180 enum vect_def_type dt
;
182 bb_vec_info bb_vinfo
;
184 stmt_vinfo
= vinfo_for_stmt (use_stmt
);
185 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
186 bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
187 if (!vect_is_simple_use (name
, use_stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
191 if (dt
!= vect_internal_def
192 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
198 if (!is_gimple_assign (*def_stmt
))
201 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt
)))
204 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
206 *orig_type
= TREE_TYPE (oprnd0
);
207 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*orig_type
)
208 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*orig_type
)) && check_sign
))
211 if (TYPE_PRECISION (type
) >= (TYPE_PRECISION (*orig_type
) * 2))
216 if (!vect_is_simple_use (oprnd0
, *def_stmt
, loop_vinfo
,
217 bb_vinfo
, &dummy_gimple
, &dummy
, &dt
))
223 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
224 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
227 vect_recog_temp_ssa_var (tree type
, gimple stmt
)
229 return make_temp_ssa_name (type
, stmt
, "patt");
232 /* Function vect_recog_dot_prod_pattern
234 Try to find the following pattern:
240 sum_0 = phi <init, sum_1>
243 S3 x_T = (TYPE1) x_t;
244 S4 y_T = (TYPE1) y_t;
246 [S6 prod = (TYPE2) prod; #optional]
247 S7 sum_1 = prod + sum_0;
249 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
250 same size of 'TYPE1' or bigger. This is a special case of a reduction
255 * STMTS: Contains a stmt from which the pattern search begins. In the
256 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
261 * TYPE_IN: The type of the input arguments to the pattern.
263 * TYPE_OUT: The type of the output of this pattern.
265 * Return value: A new stmt that will be used to replace the sequence of
266 stmts that constitute the pattern. In this case it will be:
267 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
269 Note: The dot-prod idiom is a widening reduction pattern that is
270 vectorized without preserving all the intermediate results. It
271 produces only N/2 (widened) results (by summing up pairs of
272 intermediate results) rather than all N results. Therefore, we
273 cannot allow this pattern when we want to get all the results and in
274 the correct order (as is the case when this computation is in an
275 inner-loop nested in an outer-loop that us being vectorized). */
278 vect_recog_dot_prod_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
281 gimple stmt
, last_stmt
= (*stmts
)[0];
283 tree oprnd00
, oprnd01
;
284 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
285 tree type
, half_type
;
288 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
296 loop
= LOOP_VINFO_LOOP (loop_info
);
298 /* We don't allow changing the order of the computation in the inner-loop
299 when doing outer-loop vectorization. */
300 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
303 if (!is_gimple_assign (last_stmt
))
306 type
= gimple_expr_type (last_stmt
);
308 /* Look for the following pattern
312 DDPROD = (TYPE2) DPROD;
313 sum_1 = DDPROD + sum_0;
315 - DX is double the size of X
316 - DY is double the size of Y
317 - DX, DY, DPROD all have the same type
318 - sum is the same size of DPROD or bigger
319 - sum has been recognized as a reduction variable.
321 This is equivalent to:
322 DPROD = X w* Y; #widen mult
323 sum_1 = DPROD w+ sum_0; #widen summation
325 DPROD = X w* Y; #widen mult
326 sum_1 = DPROD + sum_0; #summation
329 /* Starting from LAST_STMT, follow the defs of its uses in search
330 of the above pattern. */
332 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
335 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
337 /* Has been detected as widening-summation? */
339 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
340 type
= gimple_expr_type (stmt
);
341 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
343 oprnd0
= gimple_assign_rhs1 (stmt
);
344 oprnd1
= gimple_assign_rhs2 (stmt
);
345 half_type
= TREE_TYPE (oprnd0
);
351 oprnd0
= gimple_assign_rhs1 (last_stmt
);
352 oprnd1
= gimple_assign_rhs2 (last_stmt
);
353 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
354 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
358 if (type_conversion_p (oprnd0
, stmt
, true, &half_type
, &def_stmt
,
363 oprnd0
= gimple_assign_rhs1 (stmt
);
369 /* So far so good. Since last_stmt was detected as a (summation) reduction,
370 we know that oprnd1 is the reduction variable (defined by a loop-header
371 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
372 Left to check that oprnd0 is defined by a (widen_)mult_expr */
373 if (TREE_CODE (oprnd0
) != SSA_NAME
)
376 prod_type
= half_type
;
377 stmt
= SSA_NAME_DEF_STMT (oprnd0
);
379 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
380 if (!gimple_bb (stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
383 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
384 inside the loop (in case we are analyzing an outer-loop). */
385 if (!is_gimple_assign (stmt
))
387 stmt_vinfo
= vinfo_for_stmt (stmt
);
388 gcc_assert (stmt_vinfo
);
389 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
391 if (gimple_assign_rhs_code (stmt
) != MULT_EXPR
)
393 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
395 /* Has been detected as a widening multiplication? */
397 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
398 if (gimple_assign_rhs_code (stmt
) != WIDEN_MULT_EXPR
)
400 stmt_vinfo
= vinfo_for_stmt (stmt
);
401 gcc_assert (stmt_vinfo
);
402 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_internal_def
);
403 oprnd00
= gimple_assign_rhs1 (stmt
);
404 oprnd01
= gimple_assign_rhs2 (stmt
);
405 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt
))
406 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
);
410 tree half_type0
, half_type1
;
414 oprnd0
= gimple_assign_rhs1 (stmt
);
415 oprnd1
= gimple_assign_rhs2 (stmt
);
416 if (!types_compatible_p (TREE_TYPE (oprnd0
), prod_type
)
417 || !types_compatible_p (TREE_TYPE (oprnd1
), prod_type
))
419 if (!type_conversion_p (oprnd0
, stmt
, true, &half_type0
, &def_stmt
,
423 oprnd00
= gimple_assign_rhs1 (def_stmt
);
424 if (!type_conversion_p (oprnd1
, stmt
, true, &half_type1
, &def_stmt
,
428 oprnd01
= gimple_assign_rhs1 (def_stmt
);
429 if (!types_compatible_p (half_type0
, half_type1
))
431 if (TYPE_PRECISION (prod_type
) != TYPE_PRECISION (half_type0
) * 2)
435 half_type
= TREE_TYPE (oprnd00
);
436 *type_in
= half_type
;
439 /* Pattern detected. Create a stmt to be used to replace the pattern: */
440 var
= vect_recog_temp_ssa_var (type
, NULL
);
441 pattern_stmt
= gimple_build_assign (var
, DOT_PROD_EXPR
,
442 oprnd00
, oprnd01
, oprnd1
);
444 if (dump_enabled_p ())
446 dump_printf_loc (MSG_NOTE
, vect_location
,
447 "vect_recog_dot_prod_pattern: detected: ");
448 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
449 dump_printf (MSG_NOTE
, "\n");
456 /* Function vect_recog_sad_pattern
458 Try to find the following Sum of Absolute Difference (SAD) pattern:
461 signed TYPE1 diff, abs_diff;
464 sum_0 = phi <init, sum_1>
467 S3 x_T = (TYPE1) x_t;
468 S4 y_T = (TYPE1) y_t;
470 S6 abs_diff = ABS_EXPR <diff>;
471 [S7 abs_diff = (TYPE2) abs_diff; #optional]
472 S8 sum_1 = abs_diff + sum_0;
474 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
475 same size of 'TYPE1' or bigger. This is a special case of a reduction
480 * STMTS: Contains a stmt from which the pattern search begins. In the
481 example, when this function is called with S8, the pattern
482 {S3,S4,S5,S6,S7,S8} will be detected.
486 * TYPE_IN: The type of the input arguments to the pattern.
488 * TYPE_OUT: The type of the output of this pattern.
490 * Return value: A new stmt that will be used to replace the sequence of
491 stmts that constitute the pattern. In this case it will be:
492 SAD_EXPR <x_t, y_t, sum_0>
496 vect_recog_sad_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
499 gimple last_stmt
= (*stmts
)[0];
500 tree sad_oprnd0
, sad_oprnd1
;
501 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
503 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
510 loop
= LOOP_VINFO_LOOP (loop_info
);
512 /* We don't allow changing the order of the computation in the inner-loop
513 when doing outer-loop vectorization. */
514 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
517 if (!is_gimple_assign (last_stmt
))
520 tree sum_type
= gimple_expr_type (last_stmt
);
522 /* Look for the following pattern
526 DAD = ABS_EXPR <DDIFF>;
527 DDPROD = (TYPE2) DPROD;
530 - DX is at least double the size of X
531 - DY is at least double the size of Y
532 - DX, DY, DDIFF, DAD all have the same type
533 - sum is the same size of DAD or bigger
534 - sum has been recognized as a reduction variable.
536 This is equivalent to:
537 DDIFF = X w- Y; #widen sub
538 DAD = ABS_EXPR <DDIFF>;
539 sum_1 = DAD w+ sum_0; #widen summation
541 DDIFF = X w- Y; #widen sub
542 DAD = ABS_EXPR <DDIFF>;
543 sum_1 = DAD + sum_0; #summation
546 /* Starting from LAST_STMT, follow the defs of its uses in search
547 of the above pattern. */
549 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
552 tree plus_oprnd0
, plus_oprnd1
;
554 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
556 /* Has been detected as widening-summation? */
558 gimple stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
559 sum_type
= gimple_expr_type (stmt
);
560 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
562 plus_oprnd0
= gimple_assign_rhs1 (stmt
);
563 plus_oprnd1
= gimple_assign_rhs2 (stmt
);
564 half_type
= TREE_TYPE (plus_oprnd0
);
570 plus_oprnd0
= gimple_assign_rhs1 (last_stmt
);
571 plus_oprnd1
= gimple_assign_rhs2 (last_stmt
);
572 if (!types_compatible_p (TREE_TYPE (plus_oprnd0
), sum_type
)
573 || !types_compatible_p (TREE_TYPE (plus_oprnd1
), sum_type
))
576 /* The type conversion could be promotion, demotion,
577 or just signed -> unsigned. */
578 if (type_conversion_p (plus_oprnd0
, last_stmt
, false,
579 &half_type
, &def_stmt
, &promotion
))
580 plus_oprnd0
= gimple_assign_rhs1 (def_stmt
);
582 half_type
= sum_type
;
585 /* So far so good. Since last_stmt was detected as a (summation) reduction,
586 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
587 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
588 Then check that plus_oprnd0 is defined by an abs_expr. */
590 if (TREE_CODE (plus_oprnd0
) != SSA_NAME
)
593 tree abs_type
= half_type
;
594 gimple abs_stmt
= SSA_NAME_DEF_STMT (plus_oprnd0
);
596 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
597 if (!gimple_bb (abs_stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (abs_stmt
)))
600 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
601 inside the loop (in case we are analyzing an outer-loop). */
602 if (!is_gimple_assign (abs_stmt
))
605 stmt_vec_info abs_stmt_vinfo
= vinfo_for_stmt (abs_stmt
);
606 gcc_assert (abs_stmt_vinfo
);
607 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo
) != vect_internal_def
)
609 if (gimple_assign_rhs_code (abs_stmt
) != ABS_EXPR
)
612 tree abs_oprnd
= gimple_assign_rhs1 (abs_stmt
);
613 if (!types_compatible_p (TREE_TYPE (abs_oprnd
), abs_type
))
615 if (TYPE_UNSIGNED (abs_type
))
618 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
620 if (TREE_CODE (abs_oprnd
) != SSA_NAME
)
623 gimple diff_stmt
= SSA_NAME_DEF_STMT (abs_oprnd
);
625 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
626 if (!gimple_bb (diff_stmt
)
627 || !flow_bb_inside_loop_p (loop
, gimple_bb (diff_stmt
)))
630 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
631 inside the loop (in case we are analyzing an outer-loop). */
632 if (!is_gimple_assign (diff_stmt
))
635 stmt_vec_info diff_stmt_vinfo
= vinfo_for_stmt (diff_stmt
);
636 gcc_assert (diff_stmt_vinfo
);
637 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo
) != vect_internal_def
)
639 if (gimple_assign_rhs_code (diff_stmt
) != MINUS_EXPR
)
642 tree half_type0
, half_type1
;
645 tree minus_oprnd0
= gimple_assign_rhs1 (diff_stmt
);
646 tree minus_oprnd1
= gimple_assign_rhs2 (diff_stmt
);
648 if (!types_compatible_p (TREE_TYPE (minus_oprnd0
), abs_type
)
649 || !types_compatible_p (TREE_TYPE (minus_oprnd1
), abs_type
))
651 if (!type_conversion_p (minus_oprnd0
, diff_stmt
, false,
652 &half_type0
, &def_stmt
, &promotion
)
655 sad_oprnd0
= gimple_assign_rhs1 (def_stmt
);
657 if (!type_conversion_p (minus_oprnd1
, diff_stmt
, false,
658 &half_type1
, &def_stmt
, &promotion
)
661 sad_oprnd1
= gimple_assign_rhs1 (def_stmt
);
663 if (!types_compatible_p (half_type0
, half_type1
))
665 if (TYPE_PRECISION (abs_type
) < TYPE_PRECISION (half_type0
) * 2
666 || TYPE_PRECISION (sum_type
) < TYPE_PRECISION (half_type0
) * 2)
669 *type_in
= TREE_TYPE (sad_oprnd0
);
670 *type_out
= sum_type
;
672 /* Pattern detected. Create a stmt to be used to replace the pattern: */
673 tree var
= vect_recog_temp_ssa_var (sum_type
, NULL
);
674 gimple pattern_stmt
= gimple_build_assign (var
, SAD_EXPR
, sad_oprnd0
,
675 sad_oprnd1
, plus_oprnd1
);
677 if (dump_enabled_p ())
679 dump_printf_loc (MSG_NOTE
, vect_location
,
680 "vect_recog_sad_pattern: detected: ");
681 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
682 dump_printf (MSG_NOTE
, "\n");
689 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
692 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
693 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
695 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
696 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
697 that satisfies the above restrictions, we can perform a widening opeartion
698 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
699 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
702 vect_handle_widen_op_by_const (gimple stmt
, enum tree_code code
,
703 tree const_oprnd
, tree
*oprnd
,
704 gimple
*wstmt
, tree type
,
705 tree
*half_type
, gimple def_stmt
)
707 tree new_type
, new_oprnd
;
709 if (code
!= MULT_EXPR
&& code
!= LSHIFT_EXPR
)
712 if (((code
== MULT_EXPR
&& int_fits_type_p (const_oprnd
, *half_type
))
713 || (code
== LSHIFT_EXPR
714 && compare_tree_int (const_oprnd
, TYPE_PRECISION (*half_type
))
716 && TYPE_PRECISION (type
) == (TYPE_PRECISION (*half_type
) * 2))
718 /* CONST_OPRND is a constant of HALF_TYPE. */
719 *oprnd
= gimple_assign_rhs1 (def_stmt
);
723 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (*half_type
) * 4))
726 if (!vect_same_loop_or_bb_p (stmt
, def_stmt
))
729 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
730 a type 2 times bigger than HALF_TYPE. */
731 new_type
= build_nonstandard_integer_type (TYPE_PRECISION (type
) / 2,
732 TYPE_UNSIGNED (type
));
733 if ((code
== MULT_EXPR
&& !int_fits_type_p (const_oprnd
, new_type
))
734 || (code
== LSHIFT_EXPR
735 && compare_tree_int (const_oprnd
, TYPE_PRECISION (new_type
)) == 1))
738 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
739 *oprnd
= gimple_assign_rhs1 (def_stmt
);
740 new_oprnd
= make_ssa_name (new_type
);
741 *wstmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, *oprnd
);
744 *half_type
= new_type
;
749 /* Function vect_recog_widen_mult_pattern
751 Try to find the following pattern:
755 TYPE a_T, b_T, prod_T;
761 S5 prod_T = a_T * b_T;
763 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
765 Also detect unsigned cases:
769 unsigned TYPE u_prod_T;
770 TYPE a_T, b_T, prod_T;
776 S5 prod_T = a_T * b_T;
777 S6 u_prod_T = (unsigned TYPE) prod_T;
779 and multiplication by constants:
786 S5 prod_T = a_T * CONST;
788 A special case of multiplication by constants is when 'TYPE' is 4 times
789 bigger than 'type', but CONST fits an intermediate type 2 times smaller
790 than 'TYPE'. In that case we create an additional pattern stmt for S3
791 to create a variable of the intermediate type, and perform widen-mult
792 on the intermediate type as well:
796 TYPE a_T, prod_T, prod_T';
800 '--> a_it = (interm_type) a_t;
801 S5 prod_T = a_T * CONST;
802 '--> prod_T' = a_it w* CONST;
806 * STMTS: Contains a stmt from which the pattern search begins. In the
807 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
808 is detected. In case of unsigned widen-mult, the original stmt (S5) is
809 replaced with S6 in STMTS. In case of multiplication by a constant
810 of an intermediate type (the last case above), STMTS also contains S3
811 (inserted before S5).
815 * TYPE_IN: The type of the input arguments to the pattern.
817 * TYPE_OUT: The type of the output of this pattern.
819 * Return value: A new stmt that will be used to replace the sequence of
820 stmts that constitute the pattern. In this case it will be:
821 WIDEN_MULT <a_t, b_t>
822 If the result of WIDEN_MULT needs to be converted to a larger type, the
823 returned stmt will be this type conversion stmt.
827 vect_recog_widen_mult_pattern (vec
<gimple
> *stmts
,
828 tree
*type_in
, tree
*type_out
)
830 gimple last_stmt
= stmts
->pop ();
831 gimple def_stmt0
, def_stmt1
;
833 tree type
, half_type0
, half_type1
;
834 gimple new_stmt
= NULL
, pattern_stmt
= NULL
;
835 tree vectype
, vecitype
;
837 enum tree_code dummy_code
;
843 if (!is_gimple_assign (last_stmt
))
846 type
= gimple_expr_type (last_stmt
);
848 /* Starting from LAST_STMT, follow the defs of its uses in search
849 of the above pattern. */
851 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
854 oprnd0
= gimple_assign_rhs1 (last_stmt
);
855 oprnd1
= gimple_assign_rhs2 (last_stmt
);
856 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
857 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
860 /* Check argument 0. */
861 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
865 /* Check argument 1. */
866 op1_ok
= type_conversion_p (oprnd1
, last_stmt
, false, &half_type1
,
867 &def_stmt1
, &promotion
);
869 if (op1_ok
&& promotion
)
871 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
872 oprnd1
= gimple_assign_rhs1 (def_stmt1
);
876 if (TREE_CODE (oprnd1
) == INTEGER_CST
877 && TREE_CODE (half_type0
) == INTEGER_TYPE
878 && vect_handle_widen_op_by_const (last_stmt
, MULT_EXPR
, oprnd1
,
879 &oprnd0
, &new_stmt
, type
,
880 &half_type0
, def_stmt0
))
882 half_type1
= half_type0
;
883 oprnd1
= fold_convert (half_type1
, oprnd1
);
889 /* If the two arguments have different sizes, convert the one with
890 the smaller type into the larger type. */
891 if (TYPE_PRECISION (half_type0
) != TYPE_PRECISION (half_type1
))
893 /* If we already used up the single-stmt slot give up. */
898 gimple def_stmt
= NULL
;
900 if (TYPE_PRECISION (half_type0
) < TYPE_PRECISION (half_type1
))
902 def_stmt
= def_stmt0
;
903 half_type0
= half_type1
;
908 def_stmt
= def_stmt1
;
909 half_type1
= half_type0
;
913 tree old_oprnd
= gimple_assign_rhs1 (def_stmt
);
914 tree new_oprnd
= make_ssa_name (half_type0
);
915 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, old_oprnd
);
919 /* Handle unsigned case. Look for
920 S6 u_prod_T = (unsigned TYPE) prod_T;
921 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
922 if (TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type0
))
928 if (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (half_type1
))
931 use_stmt
= vect_single_imm_use (last_stmt
);
932 if (!use_stmt
|| !is_gimple_assign (use_stmt
)
933 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
936 use_lhs
= gimple_assign_lhs (use_stmt
);
937 use_type
= TREE_TYPE (use_lhs
);
938 if (!INTEGRAL_TYPE_P (use_type
)
939 || (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (use_type
))
940 || (TYPE_PRECISION (type
) != TYPE_PRECISION (use_type
)))
944 last_stmt
= use_stmt
;
947 if (!types_compatible_p (half_type0
, half_type1
))
950 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
951 to get an intermediate result of type ITYPE. In this case we need
952 to build a statement to convert this intermediate result to type TYPE. */
954 if (TYPE_PRECISION (type
) > TYPE_PRECISION (half_type0
) * 2)
955 itype
= build_nonstandard_integer_type
956 (GET_MODE_BITSIZE (TYPE_MODE (half_type0
)) * 2,
957 TYPE_UNSIGNED (type
));
959 /* Pattern detected. */
960 if (dump_enabled_p ())
961 dump_printf_loc (MSG_NOTE
, vect_location
,
962 "vect_recog_widen_mult_pattern: detected:\n");
964 /* Check target support */
965 vectype
= get_vectype_for_scalar_type (half_type0
);
966 vecitype
= get_vectype_for_scalar_type (itype
);
969 || !supportable_widening_operation (WIDEN_MULT_EXPR
, last_stmt
,
971 &dummy_code
, &dummy_code
,
972 &dummy_int
, &dummy_vec
))
976 *type_out
= get_vectype_for_scalar_type (type
);
978 /* Pattern supported. Create a stmt to be used to replace the pattern: */
979 var
= vect_recog_temp_ssa_var (itype
, NULL
);
980 pattern_stmt
= gimple_build_assign (var
, WIDEN_MULT_EXPR
, oprnd0
, oprnd1
);
982 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
983 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
984 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
985 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
987 /* If the original two operands have different sizes, we may need to convert
988 the smaller one into the larget type. If this is the case, at this point
989 the new stmt is already built. */
992 append_pattern_def_seq (stmt_vinfo
, new_stmt
);
993 stmt_vec_info new_stmt_info
994 = new_stmt_vec_info (new_stmt
, loop_vinfo
, bb_vinfo
);
995 set_vinfo_for_stmt (new_stmt
, new_stmt_info
);
996 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
999 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
1000 the result of the widen-mult operation into type TYPE. */
1003 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
1004 stmt_vec_info pattern_stmt_info
1005 = new_stmt_vec_info (pattern_stmt
, loop_vinfo
, bb_vinfo
);
1006 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
1007 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vecitype
;
1008 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
1010 gimple_assign_lhs (pattern_stmt
));
1013 if (dump_enabled_p ())
1014 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1016 stmts
->safe_push (last_stmt
);
1017 return pattern_stmt
;
1021 /* Function vect_recog_pow_pattern
1023 Try to find the following pattern:
1027 with POW being one of pow, powf, powi, powif and N being
1032 * LAST_STMT: A stmt from which the pattern search begins.
1036 * TYPE_IN: The type of the input arguments to the pattern.
1038 * TYPE_OUT: The type of the output of this pattern.
1040 * Return value: A new stmt that will be used to replace the sequence of
1041 stmts that constitute the pattern. In this case it will be:
1048 vect_recog_pow_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
1051 gimple last_stmt
= (*stmts
)[0];
1052 tree fn
, base
, exp
= NULL
;
1056 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
1059 fn
= gimple_call_fndecl (last_stmt
);
1060 if (fn
== NULL_TREE
|| DECL_BUILT_IN_CLASS (fn
) != BUILT_IN_NORMAL
)
1063 switch (DECL_FUNCTION_CODE (fn
))
1065 case BUILT_IN_POWIF
:
1069 base
= gimple_call_arg (last_stmt
, 0);
1070 exp
= gimple_call_arg (last_stmt
, 1);
1071 if (TREE_CODE (exp
) != REAL_CST
1072 && TREE_CODE (exp
) != INTEGER_CST
)
1080 /* We now have a pow or powi builtin function call with a constant
1083 *type_out
= NULL_TREE
;
1085 /* Catch squaring. */
1086 if ((tree_fits_shwi_p (exp
)
1087 && tree_to_shwi (exp
) == 2)
1088 || (TREE_CODE (exp
) == REAL_CST
1089 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconst2
)))
1091 *type_in
= TREE_TYPE (base
);
1093 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1094 stmt
= gimple_build_assign (var
, MULT_EXPR
, base
, base
);
1098 /* Catch square root. */
1099 if (TREE_CODE (exp
) == REAL_CST
1100 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconsthalf
))
1102 tree newfn
= mathfn_built_in (TREE_TYPE (base
), BUILT_IN_SQRT
);
1103 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (base
));
1106 gcall
*stmt
= gimple_build_call (newfn
, 1, base
);
1107 if (vectorizable_function (stmt
, *type_in
, *type_in
)
1110 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
1111 gimple_call_set_lhs (stmt
, var
);
1121 /* Function vect_recog_widen_sum_pattern
1123 Try to find the following pattern:
1126 TYPE x_T, sum = init;
1128 sum_0 = phi <init, sum_1>
1130 S2 x_T = (TYPE) x_t;
1131 S3 sum_1 = x_T + sum_0;
1133 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1134 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1135 a special case of a reduction computation.
1139 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1140 when this function is called with S3, the pattern {S2,S3} will be detected.
1144 * TYPE_IN: The type of the input arguments to the pattern.
1146 * TYPE_OUT: The type of the output of this pattern.
1148 * Return value: A new stmt that will be used to replace the sequence of
1149 stmts that constitute the pattern. In this case it will be:
1150 WIDEN_SUM <x_t, sum_0>
1152 Note: The widening-sum idiom is a widening reduction pattern that is
1153 vectorized without preserving all the intermediate results. It
1154 produces only N/2 (widened) results (by summing up pairs of
1155 intermediate results) rather than all N results. Therefore, we
1156 cannot allow this pattern when we want to get all the results and in
1157 the correct order (as is the case when this computation is in an
1158 inner-loop nested in an outer-loop that us being vectorized). */
1161 vect_recog_widen_sum_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
1164 gimple stmt
, last_stmt
= (*stmts
)[0];
1165 tree oprnd0
, oprnd1
;
1166 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1167 tree type
, half_type
;
1168 gimple pattern_stmt
;
1169 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1177 loop
= LOOP_VINFO_LOOP (loop_info
);
1179 /* We don't allow changing the order of the computation in the inner-loop
1180 when doing outer-loop vectorization. */
1181 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
1184 if (!is_gimple_assign (last_stmt
))
1187 type
= gimple_expr_type (last_stmt
);
1189 /* Look for the following pattern
1192 In which DX is at least double the size of X, and sum_1 has been
1193 recognized as a reduction variable.
1196 /* Starting from LAST_STMT, follow the defs of its uses in search
1197 of the above pattern. */
1199 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
1202 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1203 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1204 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
1205 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
1208 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1209 we know that oprnd1 is the reduction variable (defined by a loop-header
1210 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1211 Left to check that oprnd0 is defined by a cast from type 'type' to type
1214 if (!type_conversion_p (oprnd0
, last_stmt
, true, &half_type
, &stmt
,
1219 oprnd0
= gimple_assign_rhs1 (stmt
);
1220 *type_in
= half_type
;
1223 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1224 var
= vect_recog_temp_ssa_var (type
, NULL
);
1225 pattern_stmt
= gimple_build_assign (var
, WIDEN_SUM_EXPR
, oprnd0
, oprnd1
);
1227 if (dump_enabled_p ())
1229 dump_printf_loc (MSG_NOTE
, vect_location
,
1230 "vect_recog_widen_sum_pattern: detected: ");
1231 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1232 dump_printf (MSG_NOTE
, "\n");
1235 return pattern_stmt
;
1239 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1242 STMT - a statement to check.
1243 DEF - we support operations with two operands, one of which is constant.
1244 The other operand can be defined by a demotion operation, or by a
1245 previous statement in a sequence of over-promoted operations. In the
1246 later case DEF is used to replace that operand. (It is defined by a
1247 pattern statement we created for the previous statement in the
1251 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1252 NULL, it's the type of DEF.
1253 STMTS - additional pattern statements. If a pattern statement (type
1254 conversion) is created in this function, its original statement is
1258 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1259 operands to use in the new pattern statement for STMT (will be created
1260 in vect_recog_over_widening_pattern ()).
1261 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1262 statements for STMT: the first one is a type promotion and the second
1263 one is the operation itself. We return the type promotion statement
1264 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1265 the second pattern statement. */
1268 vect_operation_fits_smaller_type (gimple stmt
, tree def
, tree
*new_type
,
1269 tree
*op0
, tree
*op1
, gimple
*new_def_stmt
,
1272 enum tree_code code
;
1273 tree const_oprnd
, oprnd
;
1274 tree interm_type
= NULL_TREE
, half_type
, new_oprnd
, type
;
1275 gimple def_stmt
, new_stmt
;
1281 *new_def_stmt
= NULL
;
1283 if (!is_gimple_assign (stmt
))
1286 code
= gimple_assign_rhs_code (stmt
);
1287 if (code
!= LSHIFT_EXPR
&& code
!= RSHIFT_EXPR
1288 && code
!= BIT_IOR_EXPR
&& code
!= BIT_XOR_EXPR
&& code
!= BIT_AND_EXPR
)
1291 oprnd
= gimple_assign_rhs1 (stmt
);
1292 const_oprnd
= gimple_assign_rhs2 (stmt
);
1293 type
= gimple_expr_type (stmt
);
1295 if (TREE_CODE (oprnd
) != SSA_NAME
1296 || TREE_CODE (const_oprnd
) != INTEGER_CST
)
1299 /* If oprnd has other uses besides that in stmt we cannot mark it
1300 as being part of a pattern only. */
1301 if (!has_single_use (oprnd
))
1304 /* If we are in the middle of a sequence, we use DEF from a previous
1305 statement. Otherwise, OPRND has to be a result of type promotion. */
1308 half_type
= *new_type
;
1314 if (!type_conversion_p (oprnd
, stmt
, false, &half_type
, &def_stmt
,
1317 || !vect_same_loop_or_bb_p (stmt
, def_stmt
))
1321 /* Can we perform the operation on a smaller type? */
1327 if (!int_fits_type_p (const_oprnd
, half_type
))
1329 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1330 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1333 interm_type
= build_nonstandard_integer_type (
1334 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1335 if (!int_fits_type_p (const_oprnd
, interm_type
))
1342 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1343 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1346 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1347 (e.g., if the original value was char, the shift amount is at most 8
1348 if we want to use short). */
1349 if (compare_tree_int (const_oprnd
, TYPE_PRECISION (half_type
)) == 1)
1352 interm_type
= build_nonstandard_integer_type (
1353 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1355 if (!vect_supportable_shift (code
, interm_type
))
1361 if (vect_supportable_shift (code
, half_type
))
1364 /* Try intermediate type - HALF_TYPE is not supported. */
1365 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1368 interm_type
= build_nonstandard_integer_type (
1369 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1371 if (!vect_supportable_shift (code
, interm_type
))
1380 /* There are four possible cases:
1381 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1382 the first statement in the sequence)
1383 a. The original, HALF_TYPE, is not enough - we replace the promotion
1384 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1385 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1387 2. OPRND is defined by a pattern statement we created.
1388 a. Its type is not sufficient for the operation, we create a new stmt:
1389 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1390 this statement in NEW_DEF_STMT, and it is later put in
1391 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1392 b. OPRND is good to use in the new statement. */
1397 /* Replace the original type conversion HALF_TYPE->TYPE with
1398 HALF_TYPE->INTERM_TYPE. */
1399 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
1401 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
1402 /* Check if the already created pattern stmt is what we need. */
1403 if (!is_gimple_assign (new_stmt
)
1404 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt
))
1405 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != interm_type
)
1408 stmts
->safe_push (def_stmt
);
1409 oprnd
= gimple_assign_lhs (new_stmt
);
1413 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1414 oprnd
= gimple_assign_rhs1 (def_stmt
);
1415 new_oprnd
= make_ssa_name (interm_type
);
1416 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, oprnd
);
1417 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
1418 stmts
->safe_push (def_stmt
);
1424 /* Retrieve the operand before the type promotion. */
1425 oprnd
= gimple_assign_rhs1 (def_stmt
);
1432 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1433 new_oprnd
= make_ssa_name (interm_type
);
1434 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, oprnd
);
1436 *new_def_stmt
= new_stmt
;
1439 /* Otherwise, OPRND is already set. */
1443 *new_type
= interm_type
;
1445 *new_type
= half_type
;
1448 *op1
= fold_convert (*new_type
, const_oprnd
);
1454 /* Try to find a statement or a sequence of statements that can be performed
1458 TYPE x_T, res0_T, res1_T;
1461 S2 x_T = (TYPE) x_t;
1462 S3 res0_T = op (x_T, C0);
1463 S4 res1_T = op (res0_T, C1);
1464 S5 ... = () res1_T; - type demotion
1466 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1468 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1469 be 'type' or some intermediate type. For now, we expect S5 to be a type
1470 demotion operation. We also check that S3 and S4 have only one use. */
1473 vect_recog_over_widening_pattern (vec
<gimple
> *stmts
,
1474 tree
*type_in
, tree
*type_out
)
1476 gimple stmt
= stmts
->pop ();
1477 gimple pattern_stmt
= NULL
, new_def_stmt
, prev_stmt
= NULL
, use_stmt
= NULL
;
1478 tree op0
, op1
, vectype
= NULL_TREE
, use_lhs
, use_type
;
1479 tree var
= NULL_TREE
, new_type
= NULL_TREE
, new_oprnd
;
1486 if (!vinfo_for_stmt (stmt
)
1487 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt
)))
1490 new_def_stmt
= NULL
;
1491 if (!vect_operation_fits_smaller_type (stmt
, var
, &new_type
,
1492 &op0
, &op1
, &new_def_stmt
,
1501 /* STMT can be performed on a smaller type. Check its uses. */
1502 use_stmt
= vect_single_imm_use (stmt
);
1503 if (!use_stmt
|| !is_gimple_assign (use_stmt
))
1506 /* Create pattern statement for STMT. */
1507 vectype
= get_vectype_for_scalar_type (new_type
);
1511 /* We want to collect all the statements for which we create pattern
1512 statetments, except for the case when the last statement in the
1513 sequence doesn't have a corresponding pattern statement. In such
1514 case we associate the last pattern statement with the last statement
1515 in the sequence. Therefore, we only add the original statement to
1516 the list if we know that it is not the last. */
1518 stmts
->safe_push (prev_stmt
);
1520 var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1522 = gimple_build_assign (var
, gimple_assign_rhs_code (stmt
), op0
, op1
);
1523 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
1524 new_pattern_def_seq (vinfo_for_stmt (stmt
), new_def_stmt
);
1526 if (dump_enabled_p ())
1528 dump_printf_loc (MSG_NOTE
, vect_location
,
1529 "created pattern stmt: ");
1530 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1531 dump_printf (MSG_NOTE
, "\n");
1534 type
= gimple_expr_type (stmt
);
1541 /* We got a sequence. We expect it to end with a type demotion operation.
1542 Otherwise, we quit (for now). There are three possible cases: the
1543 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1544 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1545 NEW_TYPE differs (we create a new conversion statement). */
1546 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
1548 use_lhs
= gimple_assign_lhs (use_stmt
);
1549 use_type
= TREE_TYPE (use_lhs
);
1550 /* Support only type demotion or signedess change. */
1551 if (!INTEGRAL_TYPE_P (use_type
)
1552 || TYPE_PRECISION (type
) <= TYPE_PRECISION (use_type
))
1555 /* Check that NEW_TYPE is not bigger than the conversion result. */
1556 if (TYPE_PRECISION (new_type
) > TYPE_PRECISION (use_type
))
1559 if (TYPE_UNSIGNED (new_type
) != TYPE_UNSIGNED (use_type
)
1560 || TYPE_PRECISION (new_type
) != TYPE_PRECISION (use_type
))
1562 /* Create NEW_TYPE->USE_TYPE conversion. */
1563 new_oprnd
= make_ssa_name (use_type
);
1564 pattern_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, var
);
1565 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt
)) = pattern_stmt
;
1567 *type_in
= get_vectype_for_scalar_type (new_type
);
1568 *type_out
= get_vectype_for_scalar_type (use_type
);
1570 /* We created a pattern statement for the last statement in the
1571 sequence, so we don't need to associate it with the pattern
1572 statement created for PREV_STMT. Therefore, we add PREV_STMT
1573 to the list in order to mark it later in vect_pattern_recog_1. */
1575 stmts
->safe_push (prev_stmt
);
1580 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt
))
1581 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt
));
1584 *type_out
= NULL_TREE
;
1587 stmts
->safe_push (use_stmt
);
1590 /* TODO: support general case, create a conversion to the correct type. */
1593 /* Pattern detected. */
1594 if (dump_enabled_p ())
1596 dump_printf_loc (MSG_NOTE
, vect_location
,
1597 "vect_recog_over_widening_pattern: detected: ");
1598 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1599 dump_printf (MSG_NOTE
, "\n");
1602 return pattern_stmt
;
1605 /* Detect widening shift pattern:
1611 S2 a_T = (TYPE) a_t;
1612 S3 res_T = a_T << CONST;
1614 where type 'TYPE' is at least double the size of type 'type'.
1616 Also detect cases where the shift result is immediately converted
1617 to another type 'result_type' that is no larger in size than 'TYPE'.
1618 In those cases we perform a widen-shift that directly results in
1619 'result_type', to avoid a possible over-widening situation:
1623 result_type res_result;
1626 S2 a_T = (TYPE) a_t;
1627 S3 res_T = a_T << CONST;
1628 S4 res_result = (result_type) res_T;
1629 '--> res_result' = a_t w<< CONST;
1631 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1632 create an additional pattern stmt for S2 to create a variable of an
1633 intermediate type, and perform widen-shift on the intermediate type:
1637 TYPE a_T, res_T, res_T';
1640 S2 a_T = (TYPE) a_t;
1641 '--> a_it = (interm_type) a_t;
1642 S3 res_T = a_T << CONST;
1643 '--> res_T' = a_it <<* CONST;
1647 * STMTS: Contains a stmt from which the pattern search begins.
1648 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1649 in STMTS. When an intermediate type is used and a pattern statement is
1650 created for S2, we also put S2 here (before S3).
1654 * TYPE_IN: The type of the input arguments to the pattern.
1656 * TYPE_OUT: The type of the output of this pattern.
1658 * Return value: A new stmt that will be used to replace the sequence of
1659 stmts that constitute the pattern. In this case it will be:
1660 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1663 vect_recog_widen_shift_pattern (vec
<gimple
> *stmts
,
1664 tree
*type_in
, tree
*type_out
)
1666 gimple last_stmt
= stmts
->pop ();
1668 tree oprnd0
, oprnd1
;
1669 tree type
, half_type0
;
1670 gimple pattern_stmt
;
1671 tree vectype
, vectype_out
= NULL_TREE
;
1673 enum tree_code dummy_code
;
1675 vec
<tree
> dummy_vec
;
1679 if (!is_gimple_assign (last_stmt
) || !vinfo_for_stmt (last_stmt
))
1682 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt
)))
1685 if (gimple_assign_rhs_code (last_stmt
) != LSHIFT_EXPR
)
1688 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1689 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1690 if (TREE_CODE (oprnd0
) != SSA_NAME
|| TREE_CODE (oprnd1
) != INTEGER_CST
)
1693 /* Check operand 0: it has to be defined by a type promotion. */
1694 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
1699 /* Check operand 1: has to be positive. We check that it fits the type
1700 in vect_handle_widen_op_by_const (). */
1701 if (tree_int_cst_compare (oprnd1
, size_zero_node
) <= 0)
1704 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
1705 type
= gimple_expr_type (last_stmt
);
1707 /* Check for subsequent conversion to another type. */
1708 use_stmt
= vect_single_imm_use (last_stmt
);
1709 if (use_stmt
&& is_gimple_assign (use_stmt
)
1710 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
))
1711 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
1713 tree use_lhs
= gimple_assign_lhs (use_stmt
);
1714 tree use_type
= TREE_TYPE (use_lhs
);
1716 if (INTEGRAL_TYPE_P (use_type
)
1717 && TYPE_PRECISION (use_type
) <= TYPE_PRECISION (type
))
1719 last_stmt
= use_stmt
;
1724 /* Check if this a widening operation. */
1725 gimple wstmt
= NULL
;
1726 if (!vect_handle_widen_op_by_const (last_stmt
, LSHIFT_EXPR
, oprnd1
,
1728 type
, &half_type0
, def_stmt0
))
1731 /* Pattern detected. */
1732 if (dump_enabled_p ())
1733 dump_printf_loc (MSG_NOTE
, vect_location
,
1734 "vect_recog_widen_shift_pattern: detected:\n");
1736 /* Check target support. */
1737 vectype
= get_vectype_for_scalar_type (half_type0
);
1738 vectype_out
= get_vectype_for_scalar_type (type
);
1742 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR
, last_stmt
,
1743 vectype_out
, vectype
,
1744 &dummy_code
, &dummy_code
,
1745 &dummy_int
, &dummy_vec
))
1749 *type_out
= vectype_out
;
1751 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1752 var
= vect_recog_temp_ssa_var (type
, NULL
);
1754 gimple_build_assign (var
, WIDEN_LSHIFT_EXPR
, oprnd0
, oprnd1
);
1757 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1758 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1759 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1760 new_pattern_def_seq (stmt_vinfo
, wstmt
);
1761 stmt_vec_info new_stmt_info
1762 = new_stmt_vec_info (wstmt
, loop_vinfo
, bb_vinfo
);
1763 set_vinfo_for_stmt (wstmt
, new_stmt_info
);
1764 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
1767 if (dump_enabled_p ())
1768 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1770 stmts
->safe_push (last_stmt
);
1771 return pattern_stmt
;
1774 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1778 S0 a_t = b_t r<< c_t;
1782 * STMTS: Contains a stmt from which the pattern search begins,
1783 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1787 S2 e_t = d_t & (B - 1);
1788 S3 f_t = b_t << c_t;
1789 S4 g_t = b_t >> e_t;
1792 where B is element bitsize of type.
1796 * TYPE_IN: The type of the input arguments to the pattern.
1798 * TYPE_OUT: The type of the output of this pattern.
1800 * Return value: A new stmt that will be used to replace the rotate
1804 vect_recog_rotate_pattern (vec
<gimple
> *stmts
, tree
*type_in
, tree
*type_out
)
1806 gimple last_stmt
= stmts
->pop ();
1807 tree oprnd0
, oprnd1
, lhs
, var
, var1
, var2
, vectype
, type
, stype
, def
, def2
;
1808 gimple pattern_stmt
, def_stmt
;
1809 enum tree_code rhs_code
;
1810 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1811 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1812 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1813 enum vect_def_type dt
;
1814 optab optab1
, optab2
;
1815 edge ext_def
= NULL
;
1817 if (!is_gimple_assign (last_stmt
))
1820 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1830 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
1833 lhs
= gimple_assign_lhs (last_stmt
);
1834 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1835 type
= TREE_TYPE (oprnd0
);
1836 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1837 if (TREE_CODE (oprnd0
) != SSA_NAME
1838 || TYPE_PRECISION (TREE_TYPE (lhs
)) != TYPE_PRECISION (type
)
1839 || !INTEGRAL_TYPE_P (type
)
1840 || !TYPE_UNSIGNED (type
))
1843 if (!vect_is_simple_use (oprnd1
, last_stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
1847 if (dt
!= vect_internal_def
1848 && dt
!= vect_constant_def
1849 && dt
!= vect_external_def
)
1852 vectype
= get_vectype_for_scalar_type (type
);
1853 if (vectype
== NULL_TREE
)
1856 /* If vector/vector or vector/scalar rotate is supported by the target,
1857 don't do anything here. */
1858 optab1
= optab_for_tree_code (rhs_code
, vectype
, optab_vector
);
1860 && optab_handler (optab1
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1863 if (bb_vinfo
!= NULL
|| dt
!= vect_internal_def
)
1865 optab2
= optab_for_tree_code (rhs_code
, vectype
, optab_scalar
);
1867 && optab_handler (optab2
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1871 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1872 don't do anything here either. */
1873 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_vector
);
1874 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_vector
);
1876 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1878 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1880 if (bb_vinfo
== NULL
&& dt
== vect_internal_def
)
1882 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_scalar
);
1883 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_scalar
);
1885 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1887 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1892 *type_out
= vectype
;
1893 if (*type_in
== NULL_TREE
)
1896 if (dt
== vect_external_def
1897 && TREE_CODE (oprnd1
) == SSA_NAME
1900 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1901 ext_def
= loop_preheader_edge (loop
);
1902 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1
))
1904 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (oprnd1
));
1906 || !dominated_by_p (CDI_DOMINATORS
, ext_def
->dest
, bb
))
1912 if (TREE_CODE (oprnd1
) == INTEGER_CST
1913 || TYPE_MODE (TREE_TYPE (oprnd1
)) == TYPE_MODE (type
))
1915 else if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
1917 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
1918 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (type
)
1919 && TYPE_PRECISION (TREE_TYPE (rhs1
))
1920 == TYPE_PRECISION (type
))
1924 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
1925 if (def
== NULL_TREE
)
1927 def
= vect_recog_temp_ssa_var (type
, NULL
);
1928 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
1932 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1933 gcc_assert (!new_bb
);
1936 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1938 stype
= TREE_TYPE (def
);
1940 if (TREE_CODE (def
) == INTEGER_CST
)
1942 if (!tree_fits_uhwi_p (def
)
1943 || tree_to_uhwi (def
) >= GET_MODE_PRECISION (TYPE_MODE (type
))
1944 || integer_zerop (def
))
1946 def2
= build_int_cst (stype
,
1947 GET_MODE_PRECISION (TYPE_MODE (type
))
1948 - tree_to_uhwi (def
));
1952 tree vecstype
= get_vectype_for_scalar_type (stype
);
1953 stmt_vec_info def_stmt_vinfo
;
1955 if (vecstype
== NULL_TREE
)
1957 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1958 def_stmt
= gimple_build_assign (def2
, NEGATE_EXPR
, def
);
1962 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1963 gcc_assert (!new_bb
);
1967 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
1968 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1969 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
1970 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1973 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1975 = build_int_cst (stype
, GET_MODE_PRECISION (TYPE_MODE (stype
)) - 1);
1976 def_stmt
= gimple_build_assign (def2
, BIT_AND_EXPR
,
1977 gimple_assign_lhs (def_stmt
), mask
);
1981 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1982 gcc_assert (!new_bb
);
1986 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
1987 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1988 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
1989 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1993 var1
= vect_recog_temp_ssa_var (type
, NULL
);
1994 def_stmt
= gimple_build_assign (var1
, rhs_code
== LROTATE_EXPR
1995 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
1997 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1999 var2
= vect_recog_temp_ssa_var (type
, NULL
);
2000 def_stmt
= gimple_build_assign (var2
, rhs_code
== LROTATE_EXPR
2001 ? RSHIFT_EXPR
: LSHIFT_EXPR
,
2003 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2005 /* Pattern detected. */
2006 if (dump_enabled_p ())
2007 dump_printf_loc (MSG_NOTE
, vect_location
,
2008 "vect_recog_rotate_pattern: detected:\n");
2010 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2011 var
= vect_recog_temp_ssa_var (type
, NULL
);
2012 pattern_stmt
= gimple_build_assign (var
, BIT_IOR_EXPR
, var1
, var2
);
2014 if (dump_enabled_p ())
2015 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
2017 stmts
->safe_push (last_stmt
);
2018 return pattern_stmt
;
2021 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2029 S3 res_T = b_T op a_t;
2031 where type 'TYPE' is a type with different size than 'type',
2032 and op is <<, >> or rotate.
2037 TYPE b_T, c_T, res_T;
2040 S1 a_t = (type) c_T;
2042 S3 res_T = b_T op a_t;
2046 * STMTS: Contains a stmt from which the pattern search begins,
2047 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2048 with a shift/rotate which has same type on both operands, in the
2049 second case just b_T op c_T, in the first case with added cast
2050 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2054 * TYPE_IN: The type of the input arguments to the pattern.
2056 * TYPE_OUT: The type of the output of this pattern.
2058 * Return value: A new stmt that will be used to replace the shift/rotate
2062 vect_recog_vector_vector_shift_pattern (vec
<gimple
> *stmts
,
2063 tree
*type_in
, tree
*type_out
)
2065 gimple last_stmt
= stmts
->pop ();
2066 tree oprnd0
, oprnd1
, lhs
, var
;
2067 gimple pattern_stmt
, def_stmt
;
2068 enum tree_code rhs_code
;
2069 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2070 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2071 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2072 enum vect_def_type dt
;
2075 if (!is_gimple_assign (last_stmt
))
2078 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2090 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2093 lhs
= gimple_assign_lhs (last_stmt
);
2094 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2095 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2096 if (TREE_CODE (oprnd0
) != SSA_NAME
2097 || TREE_CODE (oprnd1
) != SSA_NAME
2098 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
2099 || TYPE_PRECISION (TREE_TYPE (oprnd1
))
2100 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1
)))
2101 || TYPE_PRECISION (TREE_TYPE (lhs
))
2102 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2105 if (!vect_is_simple_use (oprnd1
, last_stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
2109 if (dt
!= vect_internal_def
)
2112 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (oprnd0
));
2113 *type_out
= *type_in
;
2114 if (*type_in
== NULL_TREE
)
2118 if (gimple_assign_cast_p (def_stmt
))
2120 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
2121 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
2122 && TYPE_PRECISION (TREE_TYPE (rhs1
))
2123 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2127 if (def
== NULL_TREE
)
2129 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2130 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
2131 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2134 /* Pattern detected. */
2135 if (dump_enabled_p ())
2136 dump_printf_loc (MSG_NOTE
, vect_location
,
2137 "vect_recog_vector_vector_shift_pattern: detected:\n");
2139 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2140 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2141 pattern_stmt
= gimple_build_assign (var
, rhs_code
, oprnd0
, def
);
2143 if (dump_enabled_p ())
2144 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
2146 stmts
->safe_push (last_stmt
);
2147 return pattern_stmt
;
2150 /* Detect a signed division by a constant that wouldn't be
2151 otherwise vectorized:
2157 where type 'type' is an integral type and N is a constant.
2159 Similarly handle modulo by a constant:
2165 * STMTS: Contains a stmt from which the pattern search begins,
2166 i.e. the division stmt. S1 is replaced by if N is a power
2167 of two constant and type is signed:
2168 S3 y_t = b_t < 0 ? N - 1 : 0;
2170 S1' a_t = x_t >> log2 (N);
2172 S4 is replaced if N is a power of two constant and
2173 type is signed by (where *_T temporaries have unsigned type):
2174 S9 y_T = b_t < 0 ? -1U : 0U;
2175 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2176 S7 z_t = (type) z_T;
2178 S5 x_t = w_t & (N - 1);
2179 S4' a_t = x_t - z_t;
2183 * TYPE_IN: The type of the input arguments to the pattern.
2185 * TYPE_OUT: The type of the output of this pattern.
2187 * Return value: A new stmt that will be used to replace the division
2188 S1 or modulo S4 stmt. */
2191 vect_recog_divmod_pattern (vec
<gimple
> *stmts
,
2192 tree
*type_in
, tree
*type_out
)
2194 gimple last_stmt
= stmts
->pop ();
2195 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
2196 gimple pattern_stmt
, def_stmt
;
2197 enum tree_code rhs_code
;
2198 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2199 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2200 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2203 int dummy_int
, prec
;
2204 stmt_vec_info def_stmt_vinfo
;
2206 if (!is_gimple_assign (last_stmt
))
2209 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2212 case TRUNC_DIV_EXPR
:
2213 case TRUNC_MOD_EXPR
:
2219 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2222 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2223 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2224 itype
= TREE_TYPE (oprnd0
);
2225 if (TREE_CODE (oprnd0
) != SSA_NAME
2226 || TREE_CODE (oprnd1
) != INTEGER_CST
2227 || TREE_CODE (itype
) != INTEGER_TYPE
2228 || TYPE_PRECISION (itype
) != GET_MODE_PRECISION (TYPE_MODE (itype
)))
2231 vectype
= get_vectype_for_scalar_type (itype
);
2232 if (vectype
== NULL_TREE
)
2235 /* If the target can handle vectorized division or modulo natively,
2236 don't attempt to optimize this. */
2237 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
2238 if (optab
!= unknown_optab
)
2240 machine_mode vec_mode
= TYPE_MODE (vectype
);
2241 int icode
= (int) optab_handler (optab
, vec_mode
);
2242 if (icode
!= CODE_FOR_nothing
)
2246 prec
= TYPE_PRECISION (itype
);
2247 if (integer_pow2p (oprnd1
))
2249 if (TYPE_UNSIGNED (itype
) || tree_int_cst_sgn (oprnd1
) != 1)
2252 /* Pattern detected. */
2253 if (dump_enabled_p ())
2254 dump_printf_loc (MSG_NOTE
, vect_location
,
2255 "vect_recog_divmod_pattern: detected:\n");
2257 cond
= build2 (LT_EXPR
, boolean_type_node
, oprnd0
,
2258 build_int_cst (itype
, 0));
2259 if (rhs_code
== TRUNC_DIV_EXPR
)
2261 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
2264 = gimple_build_assign (var
, COND_EXPR
, cond
,
2265 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2266 build_int_cst (itype
, 1)),
2267 build_int_cst (itype
, 0));
2268 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2269 var
= vect_recog_temp_ssa_var (itype
, NULL
);
2271 = gimple_build_assign (var
, PLUS_EXPR
, oprnd0
,
2272 gimple_assign_lhs (def_stmt
));
2273 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2275 shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
2277 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2278 RSHIFT_EXPR
, var
, shift
);
2283 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2284 if (compare_tree_int (oprnd1
, 2) == 0)
2286 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2287 def_stmt
= gimple_build_assign (signmask
, COND_EXPR
, cond
,
2288 build_int_cst (itype
, 1),
2289 build_int_cst (itype
, 0));
2290 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2295 = build_nonstandard_integer_type (prec
, 1);
2296 tree vecutype
= get_vectype_for_scalar_type (utype
);
2298 = build_int_cst (utype
, GET_MODE_BITSIZE (TYPE_MODE (itype
))
2299 - tree_log2 (oprnd1
));
2300 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
2302 def_stmt
= gimple_build_assign (var
, COND_EXPR
, cond
,
2303 build_int_cst (utype
, -1),
2304 build_int_cst (utype
, 0));
2306 = new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2307 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2308 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2309 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2310 var
= vect_recog_temp_ssa_var (utype
, NULL
);
2311 def_stmt
= gimple_build_assign (var
, RSHIFT_EXPR
,
2312 gimple_assign_lhs (def_stmt
),
2315 = new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2316 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2317 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2318 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2319 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2321 = gimple_build_assign (signmask
, NOP_EXPR
, var
);
2322 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2325 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2326 PLUS_EXPR
, oprnd0
, signmask
);
2327 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2329 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2330 BIT_AND_EXPR
, gimple_assign_lhs (def_stmt
),
2331 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2332 build_int_cst (itype
, 1)));
2333 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2336 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2337 MINUS_EXPR
, gimple_assign_lhs (def_stmt
),
2341 if (dump_enabled_p ())
2342 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
,
2345 stmts
->safe_push (last_stmt
);
2348 *type_out
= vectype
;
2349 return pattern_stmt
;
2352 if (prec
> HOST_BITS_PER_WIDE_INT
2353 || integer_zerop (oprnd1
))
2356 if (!can_mult_highpart_p (TYPE_MODE (vectype
), TYPE_UNSIGNED (itype
)))
2359 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2361 if (TYPE_UNSIGNED (itype
))
2363 unsigned HOST_WIDE_INT mh
, ml
;
2364 int pre_shift
, post_shift
;
2365 unsigned HOST_WIDE_INT d
= (TREE_INT_CST_LOW (oprnd1
)
2366 & GET_MODE_MASK (TYPE_MODE (itype
)));
2367 tree t1
, t2
, t3
, t4
;
2369 if (d
>= ((unsigned HOST_WIDE_INT
) 1 << (prec
- 1)))
2370 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2373 /* Find a suitable multiplier and right shift count
2374 instead of multiplying with D. */
2375 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
2377 /* If the suggested multiplier is more than SIZE bits, we can do better
2378 for even divisors, using an initial right shift. */
2379 if (mh
!= 0 && (d
& 1) == 0)
2381 pre_shift
= floor_log2 (d
& -d
);
2382 mh
= choose_multiplier (d
>> pre_shift
, prec
, prec
- pre_shift
,
2383 &ml
, &post_shift
, &dummy_int
);
2391 if (post_shift
- 1 >= prec
)
2394 /* t1 = oprnd0 h* ml;
2398 q = t4 >> (post_shift - 1); */
2399 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2400 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2401 build_int_cst (itype
, ml
));
2402 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2404 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2406 = gimple_build_assign (t2
, MINUS_EXPR
, oprnd0
, t1
);
2407 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2409 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2411 = gimple_build_assign (t3
, RSHIFT_EXPR
, t2
, integer_one_node
);
2412 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2414 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2416 = gimple_build_assign (t4
, PLUS_EXPR
, t1
, t3
);
2418 if (post_shift
!= 1)
2420 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2422 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2424 = gimple_build_assign (q
, RSHIFT_EXPR
, t4
,
2425 build_int_cst (itype
, post_shift
- 1));
2430 pattern_stmt
= def_stmt
;
2435 if (pre_shift
>= prec
|| post_shift
>= prec
)
2438 /* t1 = oprnd0 >> pre_shift;
2440 q = t2 >> post_shift; */
2443 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2445 = gimple_build_assign (t1
, RSHIFT_EXPR
, oprnd0
,
2446 build_int_cst (NULL
, pre_shift
));
2447 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2452 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2453 def_stmt
= gimple_build_assign (t2
, MULT_HIGHPART_EXPR
, t1
,
2454 build_int_cst (itype
, ml
));
2458 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2460 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2462 = gimple_build_assign (q
, RSHIFT_EXPR
, t2
,
2463 build_int_cst (itype
, post_shift
));
2468 pattern_stmt
= def_stmt
;
2473 unsigned HOST_WIDE_INT ml
;
2475 HOST_WIDE_INT d
= TREE_INT_CST_LOW (oprnd1
);
2476 unsigned HOST_WIDE_INT abs_d
;
2478 tree t1
, t2
, t3
, t4
;
2480 /* Give up for -1. */
2484 /* Since d might be INT_MIN, we have to cast to
2485 unsigned HOST_WIDE_INT before negating to avoid
2486 undefined signed overflow. */
2488 ? (unsigned HOST_WIDE_INT
) d
2489 : - (unsigned HOST_WIDE_INT
) d
);
2491 /* n rem d = n rem -d */
2492 if (rhs_code
== TRUNC_MOD_EXPR
&& d
< 0)
2495 oprnd1
= build_int_cst (itype
, abs_d
);
2497 else if (HOST_BITS_PER_WIDE_INT
>= prec
2498 && abs_d
== (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2499 /* This case is not handled correctly below. */
2502 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
, &post_shift
, &dummy_int
);
2503 if (ml
>= (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2506 ml
|= (~(unsigned HOST_WIDE_INT
) 0) << (prec
- 1);
2508 if (post_shift
>= prec
)
2511 /* t1 = oprnd0 h* ml; */
2512 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2513 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2514 build_int_cst (itype
, ml
));
2518 /* t2 = t1 + oprnd0; */
2519 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2520 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2521 def_stmt
= gimple_build_assign (t2
, PLUS_EXPR
, t1
, oprnd0
);
2528 /* t3 = t2 >> post_shift; */
2529 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2530 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2531 def_stmt
= gimple_build_assign (t3
, RSHIFT_EXPR
, t2
,
2532 build_int_cst (itype
, post_shift
));
2537 wide_int oprnd0_min
, oprnd0_max
;
2539 if (get_range_info (oprnd0
, &oprnd0_min
, &oprnd0_max
) == VR_RANGE
)
2541 if (!wi::neg_p (oprnd0_min
, TYPE_SIGN (itype
)))
2543 else if (wi::neg_p (oprnd0_max
, TYPE_SIGN (itype
)))
2547 if (msb
== 0 && d
>= 0)
2551 pattern_stmt
= def_stmt
;
2555 /* t4 = oprnd0 >> (prec - 1);
2556 or if we know from VRP that oprnd0 >= 0
2558 or if we know from VRP that oprnd0 < 0
2560 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2561 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2563 def_stmt
= gimple_build_assign (t4
, INTEGER_CST
,
2564 build_int_cst (itype
, msb
));
2566 def_stmt
= gimple_build_assign (t4
, RSHIFT_EXPR
, oprnd0
,
2567 build_int_cst (itype
, prec
- 1));
2568 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2570 /* q = t3 - t4; or q = t4 - t3; */
2571 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2572 pattern_stmt
= gimple_build_assign (q
, MINUS_EXPR
, d
< 0 ? t4
: t3
,
2577 if (rhs_code
== TRUNC_MOD_EXPR
)
2581 /* We divided. Now finish by:
2584 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2586 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2587 def_stmt
= gimple_build_assign (t1
, MULT_EXPR
, q
, oprnd1
);
2588 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2590 r
= vect_recog_temp_ssa_var (itype
, NULL
);
2591 pattern_stmt
= gimple_build_assign (r
, MINUS_EXPR
, oprnd0
, t1
);
2594 /* Pattern detected. */
2595 if (dump_enabled_p ())
2597 dump_printf_loc (MSG_NOTE
, vect_location
,
2598 "vect_recog_divmod_pattern: detected: ");
2599 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
2600 dump_printf (MSG_NOTE
, "\n");
2603 stmts
->safe_push (last_stmt
);
2606 *type_out
= vectype
;
2607 return pattern_stmt
;
2610 /* Function vect_recog_mixed_size_cond_pattern
2612 Try to find the following pattern:
2617 S1 a_T = x_t CMP y_t ? b_T : c_T;
2619 where type 'TYPE' is an integral type which has different size
2620 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2621 than 'type', the constants need to fit into an integer type
2622 with the same width as 'type') or results of conversion from 'type'.
2626 * LAST_STMT: A stmt from which the pattern search begins.
2630 * TYPE_IN: The type of the input arguments to the pattern.
2632 * TYPE_OUT: The type of the output of this pattern.
2634 * Return value: A new stmt that will be used to replace the pattern.
2635 Additionally a def_stmt is added.
2637 a_it = x_t CMP y_t ? b_it : c_it;
2638 a_T = (TYPE) a_it; */
2641 vect_recog_mixed_size_cond_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
2644 gimple last_stmt
= (*stmts
)[0];
2645 tree cond_expr
, then_clause
, else_clause
;
2646 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
), def_stmt_info
;
2647 tree type
, vectype
, comp_vectype
, itype
= NULL_TREE
, vecitype
;
2648 machine_mode cmpmode
;
2649 gimple pattern_stmt
, def_stmt
;
2650 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2651 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2652 tree orig_type0
= NULL_TREE
, orig_type1
= NULL_TREE
;
2653 gimple def_stmt0
= NULL
, def_stmt1
= NULL
;
2655 tree comp_scalar_type
;
2657 if (!is_gimple_assign (last_stmt
)
2658 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
2659 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
2662 cond_expr
= gimple_assign_rhs1 (last_stmt
);
2663 then_clause
= gimple_assign_rhs2 (last_stmt
);
2664 else_clause
= gimple_assign_rhs3 (last_stmt
);
2666 if (!COMPARISON_CLASS_P (cond_expr
))
2669 comp_scalar_type
= TREE_TYPE (TREE_OPERAND (cond_expr
, 0));
2670 comp_vectype
= get_vectype_for_scalar_type (comp_scalar_type
);
2671 if (comp_vectype
== NULL_TREE
)
2674 type
= gimple_expr_type (last_stmt
);
2675 if (types_compatible_p (type
, comp_scalar_type
)
2676 || ((TREE_CODE (then_clause
) != INTEGER_CST
2677 || TREE_CODE (else_clause
) != INTEGER_CST
)
2678 && !INTEGRAL_TYPE_P (comp_scalar_type
))
2679 || !INTEGRAL_TYPE_P (type
))
2682 if ((TREE_CODE (then_clause
) != INTEGER_CST
2683 && !type_conversion_p (then_clause
, last_stmt
, false, &orig_type0
,
2684 &def_stmt0
, &promotion
))
2685 || (TREE_CODE (else_clause
) != INTEGER_CST
2686 && !type_conversion_p (else_clause
, last_stmt
, false, &orig_type1
,
2687 &def_stmt1
, &promotion
)))
2690 if (orig_type0
&& orig_type1
2691 && !types_compatible_p (orig_type0
, orig_type1
))
2696 if (!types_compatible_p (orig_type0
, comp_scalar_type
))
2698 then_clause
= gimple_assign_rhs1 (def_stmt0
);
2704 if (!types_compatible_p (orig_type1
, comp_scalar_type
))
2706 else_clause
= gimple_assign_rhs1 (def_stmt1
);
2710 cmpmode
= GET_MODE_INNER (TYPE_MODE (comp_vectype
));
2712 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) == GET_MODE_BITSIZE (cmpmode
))
2715 vectype
= get_vectype_for_scalar_type (type
);
2716 if (vectype
== NULL_TREE
)
2719 if (expand_vec_cond_expr_p (vectype
, comp_vectype
))
2722 if (itype
== NULL_TREE
)
2723 itype
= build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode
),
2724 TYPE_UNSIGNED (type
));
2726 if (itype
== NULL_TREE
2727 || GET_MODE_BITSIZE (TYPE_MODE (itype
)) != GET_MODE_BITSIZE (cmpmode
))
2730 vecitype
= get_vectype_for_scalar_type (itype
);
2731 if (vecitype
== NULL_TREE
)
2734 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
))
2737 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) > GET_MODE_BITSIZE (cmpmode
))
2739 if ((TREE_CODE (then_clause
) == INTEGER_CST
2740 && !int_fits_type_p (then_clause
, itype
))
2741 || (TREE_CODE (else_clause
) == INTEGER_CST
2742 && !int_fits_type_p (else_clause
, itype
)))
2746 def_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2747 COND_EXPR
, unshare_expr (cond_expr
),
2748 fold_convert (itype
, then_clause
),
2749 fold_convert (itype
, else_clause
));
2750 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
2751 NOP_EXPR
, gimple_assign_lhs (def_stmt
));
2753 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2754 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2755 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
2756 STMT_VINFO_VECTYPE (def_stmt_info
) = vecitype
;
2757 *type_in
= vecitype
;
2758 *type_out
= vectype
;
2760 if (dump_enabled_p ())
2761 dump_printf_loc (MSG_NOTE
, vect_location
,
2762 "vect_recog_mixed_size_cond_pattern: detected:\n");
2764 return pattern_stmt
;
2768 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2769 true if bool VAR can be optimized that way. */
2772 check_bool_pattern (tree var
, loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
2775 enum vect_def_type dt
;
2777 enum tree_code rhs_code
;
2779 if (!vect_is_simple_use (var
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
, &def
,
2783 if (dt
!= vect_internal_def
)
2786 if (!is_gimple_assign (def_stmt
))
2789 if (!has_single_use (def
))
2792 rhs1
= gimple_assign_rhs1 (def_stmt
);
2793 rhs_code
= gimple_assign_rhs_code (def_stmt
);
2797 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2800 if ((TYPE_PRECISION (TREE_TYPE (rhs1
)) != 1
2801 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
2802 && TREE_CODE (TREE_TYPE (rhs1
)) != BOOLEAN_TYPE
)
2804 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2807 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2812 if (!check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
))
2814 return check_bool_pattern (gimple_assign_rhs2 (def_stmt
), loop_vinfo
,
2818 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
2820 tree vecitype
, comp_vectype
;
2822 /* If the comparison can throw, then is_gimple_condexpr will be
2823 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2824 if (stmt_could_throw_p (def_stmt
))
2827 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
2828 if (comp_vectype
== NULL_TREE
)
2831 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
2833 machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
2835 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
2836 vecitype
= get_vectype_for_scalar_type (itype
);
2837 if (vecitype
== NULL_TREE
)
2841 vecitype
= comp_vectype
;
2842 return expand_vec_cond_expr_p (vecitype
, comp_vectype
);
2849 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2850 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2851 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2854 adjust_bool_pattern_cast (tree type
, tree var
)
2856 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (SSA_NAME_DEF_STMT (var
));
2857 gimple cast_stmt
, pattern_stmt
;
2859 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
));
2860 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
2861 new_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2862 cast_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
2863 NOP_EXPR
, gimple_assign_lhs (pattern_stmt
));
2864 STMT_VINFO_RELATED_STMT (stmt_vinfo
) = cast_stmt
;
2865 return gimple_assign_lhs (cast_stmt
);
2869 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2870 recursively. VAR is an SSA_NAME that should be transformed from bool
2871 to a wider integer type, OUT_TYPE is the desired final integer type of
2872 the whole pattern, TRUEVAL should be NULL unless optimizing
2873 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2874 in the then_clause, STMTS is where statements with added pattern stmts
2875 should be pushed to. */
2878 adjust_bool_pattern (tree var
, tree out_type
, tree trueval
,
2881 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2882 enum tree_code rhs_code
, def_rhs_code
;
2883 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
2885 gimple pattern_stmt
, def_stmt
;
2887 rhs1
= gimple_assign_rhs1 (stmt
);
2888 rhs2
= gimple_assign_rhs2 (stmt
);
2889 rhs_code
= gimple_assign_rhs_code (stmt
);
2890 loc
= gimple_location (stmt
);
2895 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2896 itype
= TREE_TYPE (irhs1
);
2898 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2903 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2904 itype
= TREE_TYPE (irhs1
);
2906 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2907 BIT_XOR_EXPR
, irhs1
, build_int_cst (itype
, 1));
2911 /* Try to optimize x = y & (a < b ? 1 : 0); into
2912 x = (a < b ? y : 0);
2918 S1 a_b = x1 CMP1 y1;
2919 S2 b_b = x2 CMP2 y2;
2921 S4 d_T = (TYPE) c_b;
2923 we would normally emit:
2925 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2926 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2927 S3' c_T = a_T & b_T;
2930 but we can save one stmt by using the
2931 result of one of the COND_EXPRs in the other COND_EXPR and leave
2932 BIT_AND_EXPR stmt out:
2934 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2935 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2938 At least when VEC_COND_EXPR is implemented using masks
2939 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2940 computes the comparison masks and ands it, in one case with
2941 all ones vector, in the other case with a vector register.
2942 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2943 often more expensive. */
2944 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
2945 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
2946 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
2948 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
2949 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2950 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
2951 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
2954 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
2955 irhs2
= adjust_bool_pattern (rhs2
, out_type
, irhs1
, stmts
);
2956 tstmt
= stmts
->pop ();
2957 gcc_assert (tstmt
== def_stmt
);
2958 stmts
->quick_push (stmt
);
2959 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
2960 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
2961 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
2962 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
2966 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
2969 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
2970 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
2971 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
2973 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
2974 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
2975 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
2976 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
2979 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
2980 irhs1
= adjust_bool_pattern (rhs1
, out_type
, irhs2
, stmts
);
2981 tstmt
= stmts
->pop ();
2982 gcc_assert (tstmt
== def_stmt
);
2983 stmts
->quick_push (stmt
);
2984 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
2985 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
2986 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
2987 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
2991 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2997 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2998 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
3000 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3001 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
3003 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
3004 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
3005 int out_prec
= TYPE_PRECISION (out_type
);
3006 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
3007 irhs2
= adjust_bool_pattern_cast (TREE_TYPE (irhs1
), rhs2
);
3008 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
3009 irhs1
= adjust_bool_pattern_cast (TREE_TYPE (irhs2
), rhs1
);
3012 irhs1
= adjust_bool_pattern_cast (out_type
, rhs1
);
3013 irhs2
= adjust_bool_pattern_cast (out_type
, rhs2
);
3016 itype
= TREE_TYPE (irhs1
);
3018 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3019 rhs_code
, irhs1
, irhs2
);
3023 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
3024 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
3025 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
))
3026 || (TYPE_PRECISION (TREE_TYPE (rhs1
))
3027 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1
)))))
3029 machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
3031 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3034 itype
= TREE_TYPE (rhs1
);
3035 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
3036 if (trueval
== NULL_TREE
)
3037 trueval
= build_int_cst (itype
, 1);
3039 gcc_checking_assert (useless_type_conversion_p (itype
,
3040 TREE_TYPE (trueval
)));
3042 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3043 COND_EXPR
, cond_expr
, trueval
,
3044 build_int_cst (itype
, 0));
3048 stmts
->safe_push (stmt
);
3049 gimple_set_location (pattern_stmt
, loc
);
3050 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
3051 return gimple_assign_lhs (pattern_stmt
);
3055 /* Function vect_recog_bool_pattern
3057 Try to find pattern like following:
3059 bool a_b, b_b, c_b, d_b, e_b;
3062 S1 a_b = x1 CMP1 y1;
3063 S2 b_b = x2 CMP2 y2;
3065 S4 d_b = x3 CMP3 y3;
3067 S6 f_T = (TYPE) e_b;
3069 where type 'TYPE' is an integral type. Or a similar pattern
3072 S6 f_Y = e_b ? r_Y : s_Y;
3074 as results from if-conversion of a complex condition.
3078 * LAST_STMT: A stmt at the end from which the pattern
3079 search begins, i.e. cast of a bool to
3084 * TYPE_IN: The type of the input arguments to the pattern.
3086 * TYPE_OUT: The type of the output of this pattern.
3088 * Return value: A new stmt that will be used to replace the pattern.
3090 Assuming size of TYPE is the same as size of all comparisons
3091 (otherwise some casts would be added where needed), the above
3092 sequence we create related pattern stmts:
3093 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3094 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3095 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3096 S5' e_T = c_T | d_T;
3099 Instead of the above S3' we could emit:
3100 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3101 S3' c_T = a_T | b_T;
3102 but the above is more efficient. */
3105 vect_recog_bool_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
3108 gimple last_stmt
= stmts
->pop ();
3109 enum tree_code rhs_code
;
3110 tree var
, lhs
, rhs
, vectype
;
3111 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
3112 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
3113 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
3114 gimple pattern_stmt
;
3116 if (!is_gimple_assign (last_stmt
))
3119 var
= gimple_assign_rhs1 (last_stmt
);
3120 lhs
= gimple_assign_lhs (last_stmt
);
3122 if ((TYPE_PRECISION (TREE_TYPE (var
)) != 1
3123 || !TYPE_UNSIGNED (TREE_TYPE (var
)))
3124 && TREE_CODE (TREE_TYPE (var
)) != BOOLEAN_TYPE
)
3127 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3128 if (CONVERT_EXPR_CODE_P (rhs_code
))
3130 if (TREE_CODE (TREE_TYPE (lhs
)) != INTEGER_TYPE
3131 || TYPE_PRECISION (TREE_TYPE (lhs
)) == 1)
3133 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3134 if (vectype
== NULL_TREE
)
3137 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
3140 rhs
= adjust_bool_pattern (var
, TREE_TYPE (lhs
), NULL_TREE
, stmts
);
3141 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3142 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3143 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3146 = gimple_build_assign (lhs
, NOP_EXPR
, rhs
);
3147 *type_out
= vectype
;
3149 stmts
->safe_push (last_stmt
);
3150 if (dump_enabled_p ())
3151 dump_printf_loc (MSG_NOTE
, vect_location
,
3152 "vect_recog_bool_pattern: detected:\n");
3154 return pattern_stmt
;
3156 else if (rhs_code
== COND_EXPR
3157 && TREE_CODE (var
) == SSA_NAME
)
3159 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3160 if (vectype
== NULL_TREE
)
3163 /* Build a scalar type for the boolean result that when
3164 vectorized matches the vector type of the result in
3165 size and number of elements. */
3167 = wi::udiv_trunc (TYPE_SIZE (vectype
),
3168 TYPE_VECTOR_SUBPARTS (vectype
)).to_uhwi ();
3170 = build_nonstandard_integer_type (prec
,
3171 TYPE_UNSIGNED (TREE_TYPE (var
)));
3172 if (get_vectype_for_scalar_type (type
) == NULL_TREE
)
3175 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
3178 rhs
= adjust_bool_pattern (var
, type
, NULL_TREE
, stmts
);
3179 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3181 = gimple_build_assign (lhs
, COND_EXPR
,
3182 build2 (NE_EXPR
, boolean_type_node
,
3183 rhs
, build_int_cst (type
, 0)),
3184 gimple_assign_rhs2 (last_stmt
),
3185 gimple_assign_rhs3 (last_stmt
));
3186 *type_out
= vectype
;
3188 stmts
->safe_push (last_stmt
);
3189 if (dump_enabled_p ())
3190 dump_printf_loc (MSG_NOTE
, vect_location
,
3191 "vect_recog_bool_pattern: detected:\n");
3193 return pattern_stmt
;
3195 else if (rhs_code
== SSA_NAME
3196 && STMT_VINFO_DATA_REF (stmt_vinfo
))
3198 stmt_vec_info pattern_stmt_info
;
3199 vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
3200 gcc_assert (vectype
!= NULL_TREE
);
3201 if (!VECTOR_MODE_P (TYPE_MODE (vectype
)))
3203 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
3206 rhs
= adjust_bool_pattern (var
, TREE_TYPE (vectype
), NULL_TREE
, stmts
);
3207 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
3208 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3210 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3211 gimple cast_stmt
= gimple_build_assign (rhs2
, NOP_EXPR
, rhs
);
3212 new_pattern_def_seq (stmt_vinfo
, cast_stmt
);
3215 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3216 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
,
3218 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3219 STMT_VINFO_DATA_REF (pattern_stmt_info
)
3220 = STMT_VINFO_DATA_REF (stmt_vinfo
);
3221 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info
)
3222 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo
);
3223 STMT_VINFO_DR_INIT (pattern_stmt_info
) = STMT_VINFO_DR_INIT (stmt_vinfo
);
3224 STMT_VINFO_DR_OFFSET (pattern_stmt_info
)
3225 = STMT_VINFO_DR_OFFSET (stmt_vinfo
);
3226 STMT_VINFO_DR_STEP (pattern_stmt_info
) = STMT_VINFO_DR_STEP (stmt_vinfo
);
3227 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info
)
3228 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo
);
3229 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo
)) = pattern_stmt
;
3230 *type_out
= vectype
;
3232 stmts
->safe_push (last_stmt
);
3233 if (dump_enabled_p ())
3234 dump_printf_loc (MSG_NOTE
, vect_location
,
3235 "vect_recog_bool_pattern: detected:\n");
3236 return pattern_stmt
;
3243 /* Mark statements that are involved in a pattern. */
3246 vect_mark_pattern_stmts (gimple orig_stmt
, gimple pattern_stmt
,
3247 tree pattern_vectype
)
3249 stmt_vec_info pattern_stmt_info
, def_stmt_info
;
3250 stmt_vec_info orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
3251 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (orig_stmt_info
);
3252 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (orig_stmt_info
);
3255 pattern_stmt_info
= vinfo_for_stmt (pattern_stmt
);
3256 if (pattern_stmt_info
== NULL
)
3258 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
,
3260 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3262 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt
));
3264 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt
;
3265 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
3266 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
3267 STMT_VINFO_VECTYPE (pattern_stmt_info
) = pattern_vectype
;
3268 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
3269 STMT_VINFO_RELATED_STMT (orig_stmt_info
) = pattern_stmt
;
3270 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
)
3271 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
3272 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
))
3274 gimple_stmt_iterator si
;
3275 for (si
= gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
));
3276 !gsi_end_p (si
); gsi_next (&si
))
3278 def_stmt
= gsi_stmt (si
);
3279 def_stmt_info
= vinfo_for_stmt (def_stmt
);
3280 if (def_stmt_info
== NULL
)
3282 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
,
3284 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
3286 gimple_set_bb (def_stmt
, gimple_bb (orig_stmt
));
3287 STMT_VINFO_RELATED_STMT (def_stmt_info
) = orig_stmt
;
3288 STMT_VINFO_DEF_TYPE (def_stmt_info
) = vect_internal_def
;
3289 if (STMT_VINFO_VECTYPE (def_stmt_info
) == NULL_TREE
)
3290 STMT_VINFO_VECTYPE (def_stmt_info
) = pattern_vectype
;
3295 /* Function vect_pattern_recog_1
3298 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3299 computation pattern.
3300 STMT: A stmt from which the pattern search should start.
3302 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3303 expression that computes the same functionality and can be used to
3304 replace the sequence of stmts that are involved in the pattern.
3307 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3308 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3309 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3310 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3311 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3312 to the available target pattern.
3314 This function also does some bookkeeping, as explained in the documentation
3315 for vect_recog_pattern. */
3318 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func
,
3319 gimple_stmt_iterator si
,
3320 vec
<gimple
> *stmts_to_replace
)
3322 gimple stmt
= gsi_stmt (si
), pattern_stmt
;
3323 stmt_vec_info stmt_info
;
3324 loop_vec_info loop_vinfo
;
3325 tree pattern_vectype
;
3326 tree type_in
, type_out
;
3327 enum tree_code code
;
3331 stmts_to_replace
->truncate (0);
3332 stmts_to_replace
->quick_push (stmt
);
3333 pattern_stmt
= (* vect_recog_func
) (stmts_to_replace
, &type_in
, &type_out
);
3337 stmt
= stmts_to_replace
->last ();
3338 stmt_info
= vinfo_for_stmt (stmt
);
3339 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3341 if (VECTOR_MODE_P (TYPE_MODE (type_in
)))
3343 /* No need to check target support (already checked by the pattern
3344 recognition function). */
3345 pattern_vectype
= type_out
? type_out
: type_in
;
3349 machine_mode vec_mode
;
3350 enum insn_code icode
;
3353 /* Check target support */
3354 type_in
= get_vectype_for_scalar_type (type_in
);
3358 type_out
= get_vectype_for_scalar_type (type_out
);
3363 pattern_vectype
= type_out
;
3365 if (is_gimple_assign (pattern_stmt
))
3366 code
= gimple_assign_rhs_code (pattern_stmt
);
3369 gcc_assert (is_gimple_call (pattern_stmt
));
3373 optab
= optab_for_tree_code (code
, type_in
, optab_default
);
3374 vec_mode
= TYPE_MODE (type_in
);
3376 || (icode
= optab_handler (optab
, vec_mode
)) == CODE_FOR_nothing
3377 || (insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (type_out
)))
3381 /* Found a vectorizable pattern. */
3382 if (dump_enabled_p ())
3384 dump_printf_loc (MSG_NOTE
, vect_location
,
3385 "pattern recognized: ");
3386 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3389 /* Mark the stmts that are involved in the pattern. */
3390 vect_mark_pattern_stmts (stmt
, pattern_stmt
, pattern_vectype
);
3392 /* Patterns cannot be vectorized using SLP, because they change the order of
3395 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
, next
)
3397 LOOP_VINFO_REDUCTIONS (loop_vinfo
).ordered_remove (i
);
3399 /* It is possible that additional pattern stmts are created and inserted in
3400 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3401 relevant statements. */
3402 for (i
= 0; stmts_to_replace
->iterate (i
, &stmt
)
3403 && (unsigned) i
< (stmts_to_replace
->length () - 1);
3406 stmt_info
= vinfo_for_stmt (stmt
);
3407 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
3408 if (dump_enabled_p ())
3410 dump_printf_loc (MSG_NOTE
, vect_location
,
3411 "additional pattern stmt: ");
3412 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3415 vect_mark_pattern_stmts (stmt
, pattern_stmt
, NULL_TREE
);
3420 /* Function vect_pattern_recog
3423 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3426 Output - for each computation idiom that is detected we create a new stmt
3427 that provides the same functionality and that can be vectorized. We
3428 also record some information in the struct_stmt_info of the relevant
3429 stmts, as explained below:
3431 At the entry to this function we have the following stmts, with the
3432 following initial value in the STMT_VINFO fields:
3434 stmt in_pattern_p related_stmt vec_stmt
3435 S1: a_i = .... - - -
3436 S2: a_2 = ..use(a_i).. - - -
3437 S3: a_1 = ..use(a_2).. - - -
3438 S4: a_0 = ..use(a_1).. - - -
3439 S5: ... = ..use(a_0).. - - -
3441 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3442 represented by a single stmt. We then:
3443 - create a new stmt S6 equivalent to the pattern (the stmt is not
3444 inserted into the code)
3445 - fill in the STMT_VINFO fields as follows:
3447 in_pattern_p related_stmt vec_stmt
3448 S1: a_i = .... - - -
3449 S2: a_2 = ..use(a_i).. - - -
3450 S3: a_1 = ..use(a_2).. - - -
3451 S4: a_0 = ..use(a_1).. true S6 -
3452 '---> S6: a_new = .... - S4 -
3453 S5: ... = ..use(a_0).. - - -
3455 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3456 to each other through the RELATED_STMT field).
3458 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3459 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3460 remain irrelevant unless used by stmts other than S4.
3462 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3463 (because they are marked as irrelevant). It will vectorize S6, and record
3464 a pointer to the new vector stmt VS6 from S6 (as usual).
3465 S4 will be skipped, and S5 will be vectorized as usual:
3467 in_pattern_p related_stmt vec_stmt
3468 S1: a_i = .... - - -
3469 S2: a_2 = ..use(a_i).. - - -
3470 S3: a_1 = ..use(a_2).. - - -
3471 > VS6: va_new = .... - - -
3472 S4: a_0 = ..use(a_1).. true S6 VS6
3473 '---> S6: a_new = .... - S4 VS6
3474 > VS5: ... = ..vuse(va_new).. - - -
3475 S5: ... = ..use(a_0).. - - -
3477 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3478 elsewhere), and we'll end up with:
3481 VS5: ... = ..vuse(va_new)..
3483 In case of more than one pattern statements, e.g., widen-mult with
3487 S2 a_T = (TYPE) a_t;
3488 '--> S3: a_it = (interm_type) a_t;
3489 S4 prod_T = a_T * CONST;
3490 '--> S5: prod_T' = a_it w* CONST;
3492 there may be other users of a_T outside the pattern. In that case S2 will
3493 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3494 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3495 be recorded in S3. */
3498 vect_pattern_recog (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
3503 gimple_stmt_iterator si
;
3505 vect_recog_func_ptr vect_recog_func
;
3506 auto_vec
<gimple
, 1> stmts_to_replace
;
3509 if (dump_enabled_p ())
3510 dump_printf_loc (MSG_NOTE
, vect_location
,
3511 "=== vect_pattern_recog ===\n");
3515 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3516 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
3517 nbbs
= loop
->num_nodes
;
3521 bbs
= &BB_VINFO_BB (bb_vinfo
);
3525 /* Scan through the loop stmts, applying the pattern recognition
3526 functions starting at each stmt visited: */
3527 for (i
= 0; i
< nbbs
; i
++)
3529 basic_block bb
= bbs
[i
];
3530 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
3532 if (bb_vinfo
&& (stmt
= gsi_stmt (si
))
3533 && vinfo_for_stmt (stmt
)
3534 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
3537 /* Scan over all generic vect_recog_xxx_pattern functions. */
3538 for (j
= 0; j
< NUM_PATTERNS
; j
++)
3540 vect_recog_func
= vect_vect_recog_func_ptrs
[j
];
3541 vect_pattern_recog_1 (vect_recog_func
, si
,