1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
28 #include "double-int.h"
35 #include "fold-const.h"
36 #include "stor-layout.h"
39 #include "hard-reg-set.h"
41 #include "dominance.h"
42 #include "basic-block.h"
43 #include "gimple-pretty-print.h"
44 #include "tree-ssa-alias.h"
45 #include "internal-fn.h"
47 #include "gimple-expr.h"
51 #include "gimple-iterator.h"
52 #include "gimple-ssa.h"
53 #include "tree-phinodes.h"
54 #include "ssa-iterators.h"
55 #include "stringpool.h"
56 #include "tree-ssanames.h"
61 #include "statistics.h"
63 #include "fixed-value.h"
64 #include "insn-config.h"
73 #include "insn-codes.h"
76 #include "tree-data-ref.h"
77 #include "tree-vectorizer.h"
78 #include "recog.h" /* FIXME: for insn_data */
79 #include "diagnostic-core.h"
83 /* Pattern recognition functions */
84 static gimple
vect_recog_widen_sum_pattern (vec
<gimple
> *, tree
*,
86 static gimple
vect_recog_widen_mult_pattern (vec
<gimple
> *, tree
*,
88 static gimple
vect_recog_dot_prod_pattern (vec
<gimple
> *, tree
*,
90 static gimple
vect_recog_sad_pattern (vec
<gimple
> *, tree
*,
92 static gimple
vect_recog_pow_pattern (vec
<gimple
> *, tree
*, tree
*);
93 static gimple
vect_recog_over_widening_pattern (vec
<gimple
> *, tree
*,
95 static gimple
vect_recog_widen_shift_pattern (vec
<gimple
> *,
97 static gimple
vect_recog_rotate_pattern (vec
<gimple
> *, tree
*, tree
*);
98 static gimple
vect_recog_vector_vector_shift_pattern (vec
<gimple
> *,
100 static gimple
vect_recog_divmod_pattern (vec
<gimple
> *,
102 static gimple
vect_recog_mixed_size_cond_pattern (vec
<gimple
> *,
104 static gimple
vect_recog_bool_pattern (vec
<gimple
> *, tree
*, tree
*);
105 static vect_recog_func_ptr vect_vect_recog_func_ptrs
[NUM_PATTERNS
] = {
106 vect_recog_widen_mult_pattern
,
107 vect_recog_widen_sum_pattern
,
108 vect_recog_dot_prod_pattern
,
109 vect_recog_sad_pattern
,
110 vect_recog_pow_pattern
,
111 vect_recog_widen_shift_pattern
,
112 vect_recog_over_widening_pattern
,
113 vect_recog_rotate_pattern
,
114 vect_recog_vector_vector_shift_pattern
,
115 vect_recog_divmod_pattern
,
116 vect_recog_mixed_size_cond_pattern
,
117 vect_recog_bool_pattern
};
120 append_pattern_def_seq (stmt_vec_info stmt_info
, gimple stmt
)
122 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
127 new_pattern_def_seq (stmt_vec_info stmt_info
, gimple stmt
)
129 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
130 append_pattern_def_seq (stmt_info
, stmt
);
133 /* Check whether STMT2 is in the same loop or basic block as STMT1.
134 Which of the two applies depends on whether we're currently doing
135 loop-based or basic-block-based vectorization, as determined by
136 the vinfo_for_stmt for STMT1 (which must be defined).
138 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
139 to be defined as well. */
142 vect_same_loop_or_bb_p (gimple stmt1
, gimple stmt2
)
144 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt1
);
145 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
146 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
148 if (!gimple_bb (stmt2
))
153 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
154 if (!flow_bb_inside_loop_p (loop
, gimple_bb (stmt2
)))
159 if (gimple_bb (stmt2
) != BB_VINFO_BB (bb_vinfo
)
160 || gimple_code (stmt2
) == GIMPLE_PHI
)
164 gcc_assert (vinfo_for_stmt (stmt2
));
168 /* If the LHS of DEF_STMT has a single use, and that statement is
169 in the same loop or basic block, return it. */
172 vect_single_imm_use (gimple def_stmt
)
174 tree lhs
= gimple_assign_lhs (def_stmt
);
178 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
181 if (!vect_same_loop_or_bb_p (def_stmt
, use_stmt
))
187 /* Check whether NAME, an ssa-name used in USE_STMT,
188 is a result of a type promotion, such that:
189 DEF_STMT: NAME = NOP (name0)
190 If CHECK_SIGN is TRUE, check that either both types are signed or both are
194 type_conversion_p (tree name
, gimple use_stmt
, bool check_sign
,
195 tree
*orig_type
, gimple
*def_stmt
, bool *promotion
)
199 loop_vec_info loop_vinfo
;
200 stmt_vec_info stmt_vinfo
;
201 tree type
= TREE_TYPE (name
);
203 enum vect_def_type dt
;
205 bb_vec_info bb_vinfo
;
207 stmt_vinfo
= vinfo_for_stmt (use_stmt
);
208 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
209 bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
210 if (!vect_is_simple_use (name
, use_stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
214 if (dt
!= vect_internal_def
215 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
221 if (!is_gimple_assign (*def_stmt
))
224 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt
)))
227 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
229 *orig_type
= TREE_TYPE (oprnd0
);
230 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*orig_type
)
231 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*orig_type
)) && check_sign
))
234 if (TYPE_PRECISION (type
) >= (TYPE_PRECISION (*orig_type
) * 2))
239 if (!vect_is_simple_use (oprnd0
, *def_stmt
, loop_vinfo
,
240 bb_vinfo
, &dummy_gimple
, &dummy
, &dt
))
246 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
247 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
250 vect_recog_temp_ssa_var (tree type
, gimple stmt
)
252 return make_temp_ssa_name (type
, stmt
, "patt");
255 /* Function vect_recog_dot_prod_pattern
257 Try to find the following pattern:
263 sum_0 = phi <init, sum_1>
266 S3 x_T = (TYPE1) x_t;
267 S4 y_T = (TYPE1) y_t;
269 [S6 prod = (TYPE2) prod; #optional]
270 S7 sum_1 = prod + sum_0;
272 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
273 same size of 'TYPE1' or bigger. This is a special case of a reduction
278 * STMTS: Contains a stmt from which the pattern search begins. In the
279 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
284 * TYPE_IN: The type of the input arguments to the pattern.
286 * TYPE_OUT: The type of the output of this pattern.
288 * Return value: A new stmt that will be used to replace the sequence of
289 stmts that constitute the pattern. In this case it will be:
290 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
292 Note: The dot-prod idiom is a widening reduction pattern that is
293 vectorized without preserving all the intermediate results. It
294 produces only N/2 (widened) results (by summing up pairs of
295 intermediate results) rather than all N results. Therefore, we
296 cannot allow this pattern when we want to get all the results and in
297 the correct order (as is the case when this computation is in an
298 inner-loop nested in an outer-loop that us being vectorized). */
301 vect_recog_dot_prod_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
304 gimple stmt
, last_stmt
= (*stmts
)[0];
306 tree oprnd00
, oprnd01
;
307 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
308 tree type
, half_type
;
311 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
319 loop
= LOOP_VINFO_LOOP (loop_info
);
321 /* We don't allow changing the order of the computation in the inner-loop
322 when doing outer-loop vectorization. */
323 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
326 if (!is_gimple_assign (last_stmt
))
329 type
= gimple_expr_type (last_stmt
);
331 /* Look for the following pattern
335 DDPROD = (TYPE2) DPROD;
336 sum_1 = DDPROD + sum_0;
338 - DX is double the size of X
339 - DY is double the size of Y
340 - DX, DY, DPROD all have the same type
341 - sum is the same size of DPROD or bigger
342 - sum has been recognized as a reduction variable.
344 This is equivalent to:
345 DPROD = X w* Y; #widen mult
346 sum_1 = DPROD w+ sum_0; #widen summation
348 DPROD = X w* Y; #widen mult
349 sum_1 = DPROD + sum_0; #summation
352 /* Starting from LAST_STMT, follow the defs of its uses in search
353 of the above pattern. */
355 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
358 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
360 /* Has been detected as widening-summation? */
362 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
363 type
= gimple_expr_type (stmt
);
364 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
366 oprnd0
= gimple_assign_rhs1 (stmt
);
367 oprnd1
= gimple_assign_rhs2 (stmt
);
368 half_type
= TREE_TYPE (oprnd0
);
374 oprnd0
= gimple_assign_rhs1 (last_stmt
);
375 oprnd1
= gimple_assign_rhs2 (last_stmt
);
376 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
377 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
381 if (type_conversion_p (oprnd0
, stmt
, true, &half_type
, &def_stmt
,
386 oprnd0
= gimple_assign_rhs1 (stmt
);
392 /* So far so good. Since last_stmt was detected as a (summation) reduction,
393 we know that oprnd1 is the reduction variable (defined by a loop-header
394 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
395 Left to check that oprnd0 is defined by a (widen_)mult_expr */
396 if (TREE_CODE (oprnd0
) != SSA_NAME
)
399 prod_type
= half_type
;
400 stmt
= SSA_NAME_DEF_STMT (oprnd0
);
402 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
403 if (!gimple_bb (stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
406 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
407 inside the loop (in case we are analyzing an outer-loop). */
408 if (!is_gimple_assign (stmt
))
410 stmt_vinfo
= vinfo_for_stmt (stmt
);
411 gcc_assert (stmt_vinfo
);
412 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
414 if (gimple_assign_rhs_code (stmt
) != MULT_EXPR
)
416 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
418 /* Has been detected as a widening multiplication? */
420 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
421 if (gimple_assign_rhs_code (stmt
) != WIDEN_MULT_EXPR
)
423 stmt_vinfo
= vinfo_for_stmt (stmt
);
424 gcc_assert (stmt_vinfo
);
425 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_internal_def
);
426 oprnd00
= gimple_assign_rhs1 (stmt
);
427 oprnd01
= gimple_assign_rhs2 (stmt
);
428 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt
))
429 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
);
433 tree half_type0
, half_type1
;
437 oprnd0
= gimple_assign_rhs1 (stmt
);
438 oprnd1
= gimple_assign_rhs2 (stmt
);
439 if (!types_compatible_p (TREE_TYPE (oprnd0
), prod_type
)
440 || !types_compatible_p (TREE_TYPE (oprnd1
), prod_type
))
442 if (!type_conversion_p (oprnd0
, stmt
, true, &half_type0
, &def_stmt
,
446 oprnd00
= gimple_assign_rhs1 (def_stmt
);
447 if (!type_conversion_p (oprnd1
, stmt
, true, &half_type1
, &def_stmt
,
451 oprnd01
= gimple_assign_rhs1 (def_stmt
);
452 if (!types_compatible_p (half_type0
, half_type1
))
454 if (TYPE_PRECISION (prod_type
) != TYPE_PRECISION (half_type0
) * 2)
458 half_type
= TREE_TYPE (oprnd00
);
459 *type_in
= half_type
;
462 /* Pattern detected. Create a stmt to be used to replace the pattern: */
463 var
= vect_recog_temp_ssa_var (type
, NULL
);
464 pattern_stmt
= gimple_build_assign (var
, DOT_PROD_EXPR
,
465 oprnd00
, oprnd01
, oprnd1
);
467 if (dump_enabled_p ())
469 dump_printf_loc (MSG_NOTE
, vect_location
,
470 "vect_recog_dot_prod_pattern: detected: ");
471 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
472 dump_printf (MSG_NOTE
, "\n");
479 /* Function vect_recog_sad_pattern
481 Try to find the following Sum of Absolute Difference (SAD) pattern:
484 signed TYPE1 diff, abs_diff;
487 sum_0 = phi <init, sum_1>
490 S3 x_T = (TYPE1) x_t;
491 S4 y_T = (TYPE1) y_t;
493 S6 abs_diff = ABS_EXPR <diff>;
494 [S7 abs_diff = (TYPE2) abs_diff; #optional]
495 S8 sum_1 = abs_diff + sum_0;
497 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
498 same size of 'TYPE1' or bigger. This is a special case of a reduction
503 * STMTS: Contains a stmt from which the pattern search begins. In the
504 example, when this function is called with S8, the pattern
505 {S3,S4,S5,S6,S7,S8} will be detected.
509 * TYPE_IN: The type of the input arguments to the pattern.
511 * TYPE_OUT: The type of the output of this pattern.
513 * Return value: A new stmt that will be used to replace the sequence of
514 stmts that constitute the pattern. In this case it will be:
515 SAD_EXPR <x_t, y_t, sum_0>
519 vect_recog_sad_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
522 gimple last_stmt
= (*stmts
)[0];
523 tree sad_oprnd0
, sad_oprnd1
;
524 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
526 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
533 loop
= LOOP_VINFO_LOOP (loop_info
);
535 /* We don't allow changing the order of the computation in the inner-loop
536 when doing outer-loop vectorization. */
537 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
540 if (!is_gimple_assign (last_stmt
))
543 tree sum_type
= gimple_expr_type (last_stmt
);
545 /* Look for the following pattern
549 DAD = ABS_EXPR <DDIFF>;
550 DDPROD = (TYPE2) DPROD;
553 - DX is at least double the size of X
554 - DY is at least double the size of Y
555 - DX, DY, DDIFF, DAD all have the same type
556 - sum is the same size of DAD or bigger
557 - sum has been recognized as a reduction variable.
559 This is equivalent to:
560 DDIFF = X w- Y; #widen sub
561 DAD = ABS_EXPR <DDIFF>;
562 sum_1 = DAD w+ sum_0; #widen summation
564 DDIFF = X w- Y; #widen sub
565 DAD = ABS_EXPR <DDIFF>;
566 sum_1 = DAD + sum_0; #summation
569 /* Starting from LAST_STMT, follow the defs of its uses in search
570 of the above pattern. */
572 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
575 tree plus_oprnd0
, plus_oprnd1
;
577 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
579 /* Has been detected as widening-summation? */
581 gimple stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
582 sum_type
= gimple_expr_type (stmt
);
583 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
585 plus_oprnd0
= gimple_assign_rhs1 (stmt
);
586 plus_oprnd1
= gimple_assign_rhs2 (stmt
);
587 half_type
= TREE_TYPE (plus_oprnd0
);
593 plus_oprnd0
= gimple_assign_rhs1 (last_stmt
);
594 plus_oprnd1
= gimple_assign_rhs2 (last_stmt
);
595 if (!types_compatible_p (TREE_TYPE (plus_oprnd0
), sum_type
)
596 || !types_compatible_p (TREE_TYPE (plus_oprnd1
), sum_type
))
599 /* The type conversion could be promotion, demotion,
600 or just signed -> unsigned. */
601 if (type_conversion_p (plus_oprnd0
, last_stmt
, false,
602 &half_type
, &def_stmt
, &promotion
))
603 plus_oprnd0
= gimple_assign_rhs1 (def_stmt
);
605 half_type
= sum_type
;
608 /* So far so good. Since last_stmt was detected as a (summation) reduction,
609 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
610 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
611 Then check that plus_oprnd0 is defined by an abs_expr. */
613 if (TREE_CODE (plus_oprnd0
) != SSA_NAME
)
616 tree abs_type
= half_type
;
617 gimple abs_stmt
= SSA_NAME_DEF_STMT (plus_oprnd0
);
619 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
620 if (!gimple_bb (abs_stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (abs_stmt
)))
623 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
624 inside the loop (in case we are analyzing an outer-loop). */
625 if (!is_gimple_assign (abs_stmt
))
628 stmt_vec_info abs_stmt_vinfo
= vinfo_for_stmt (abs_stmt
);
629 gcc_assert (abs_stmt_vinfo
);
630 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo
) != vect_internal_def
)
632 if (gimple_assign_rhs_code (abs_stmt
) != ABS_EXPR
)
635 tree abs_oprnd
= gimple_assign_rhs1 (abs_stmt
);
636 if (!types_compatible_p (TREE_TYPE (abs_oprnd
), abs_type
))
638 if (TYPE_UNSIGNED (abs_type
))
641 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
643 if (TREE_CODE (abs_oprnd
) != SSA_NAME
)
646 gimple diff_stmt
= SSA_NAME_DEF_STMT (abs_oprnd
);
648 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
649 if (!gimple_bb (diff_stmt
)
650 || !flow_bb_inside_loop_p (loop
, gimple_bb (diff_stmt
)))
653 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
654 inside the loop (in case we are analyzing an outer-loop). */
655 if (!is_gimple_assign (diff_stmt
))
658 stmt_vec_info diff_stmt_vinfo
= vinfo_for_stmt (diff_stmt
);
659 gcc_assert (diff_stmt_vinfo
);
660 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo
) != vect_internal_def
)
662 if (gimple_assign_rhs_code (diff_stmt
) != MINUS_EXPR
)
665 tree half_type0
, half_type1
;
668 tree minus_oprnd0
= gimple_assign_rhs1 (diff_stmt
);
669 tree minus_oprnd1
= gimple_assign_rhs2 (diff_stmt
);
671 if (!types_compatible_p (TREE_TYPE (minus_oprnd0
), abs_type
)
672 || !types_compatible_p (TREE_TYPE (minus_oprnd1
), abs_type
))
674 if (!type_conversion_p (minus_oprnd0
, diff_stmt
, false,
675 &half_type0
, &def_stmt
, &promotion
)
678 sad_oprnd0
= gimple_assign_rhs1 (def_stmt
);
680 if (!type_conversion_p (minus_oprnd1
, diff_stmt
, false,
681 &half_type1
, &def_stmt
, &promotion
)
684 sad_oprnd1
= gimple_assign_rhs1 (def_stmt
);
686 if (!types_compatible_p (half_type0
, half_type1
))
688 if (TYPE_PRECISION (abs_type
) < TYPE_PRECISION (half_type0
) * 2
689 || TYPE_PRECISION (sum_type
) < TYPE_PRECISION (half_type0
) * 2)
692 *type_in
= TREE_TYPE (sad_oprnd0
);
693 *type_out
= sum_type
;
695 /* Pattern detected. Create a stmt to be used to replace the pattern: */
696 tree var
= vect_recog_temp_ssa_var (sum_type
, NULL
);
697 gimple pattern_stmt
= gimple_build_assign (var
, SAD_EXPR
, sad_oprnd0
,
698 sad_oprnd1
, plus_oprnd1
);
700 if (dump_enabled_p ())
702 dump_printf_loc (MSG_NOTE
, vect_location
,
703 "vect_recog_sad_pattern: detected: ");
704 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
705 dump_printf (MSG_NOTE
, "\n");
712 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
715 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
716 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
718 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
719 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
720 that satisfies the above restrictions, we can perform a widening opeartion
721 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
722 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
725 vect_handle_widen_op_by_const (gimple stmt
, enum tree_code code
,
726 tree const_oprnd
, tree
*oprnd
,
727 gimple
*wstmt
, tree type
,
728 tree
*half_type
, gimple def_stmt
)
730 tree new_type
, new_oprnd
;
732 if (code
!= MULT_EXPR
&& code
!= LSHIFT_EXPR
)
735 if (((code
== MULT_EXPR
&& int_fits_type_p (const_oprnd
, *half_type
))
736 || (code
== LSHIFT_EXPR
737 && compare_tree_int (const_oprnd
, TYPE_PRECISION (*half_type
))
739 && TYPE_PRECISION (type
) == (TYPE_PRECISION (*half_type
) * 2))
741 /* CONST_OPRND is a constant of HALF_TYPE. */
742 *oprnd
= gimple_assign_rhs1 (def_stmt
);
746 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (*half_type
) * 4))
749 if (!vect_same_loop_or_bb_p (stmt
, def_stmt
))
752 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
753 a type 2 times bigger than HALF_TYPE. */
754 new_type
= build_nonstandard_integer_type (TYPE_PRECISION (type
) / 2,
755 TYPE_UNSIGNED (type
));
756 if ((code
== MULT_EXPR
&& !int_fits_type_p (const_oprnd
, new_type
))
757 || (code
== LSHIFT_EXPR
758 && compare_tree_int (const_oprnd
, TYPE_PRECISION (new_type
)) == 1))
761 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
762 *oprnd
= gimple_assign_rhs1 (def_stmt
);
763 new_oprnd
= make_ssa_name (new_type
);
764 *wstmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, *oprnd
);
767 *half_type
= new_type
;
772 /* Function vect_recog_widen_mult_pattern
774 Try to find the following pattern:
778 TYPE a_T, b_T, prod_T;
784 S5 prod_T = a_T * b_T;
786 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
788 Also detect unsigned cases:
792 unsigned TYPE u_prod_T;
793 TYPE a_T, b_T, prod_T;
799 S5 prod_T = a_T * b_T;
800 S6 u_prod_T = (unsigned TYPE) prod_T;
802 and multiplication by constants:
809 S5 prod_T = a_T * CONST;
811 A special case of multiplication by constants is when 'TYPE' is 4 times
812 bigger than 'type', but CONST fits an intermediate type 2 times smaller
813 than 'TYPE'. In that case we create an additional pattern stmt for S3
814 to create a variable of the intermediate type, and perform widen-mult
815 on the intermediate type as well:
819 TYPE a_T, prod_T, prod_T';
823 '--> a_it = (interm_type) a_t;
824 S5 prod_T = a_T * CONST;
825 '--> prod_T' = a_it w* CONST;
829 * STMTS: Contains a stmt from which the pattern search begins. In the
830 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
831 is detected. In case of unsigned widen-mult, the original stmt (S5) is
832 replaced with S6 in STMTS. In case of multiplication by a constant
833 of an intermediate type (the last case above), STMTS also contains S3
834 (inserted before S5).
838 * TYPE_IN: The type of the input arguments to the pattern.
840 * TYPE_OUT: The type of the output of this pattern.
842 * Return value: A new stmt that will be used to replace the sequence of
843 stmts that constitute the pattern. In this case it will be:
844 WIDEN_MULT <a_t, b_t>
845 If the result of WIDEN_MULT needs to be converted to a larger type, the
846 returned stmt will be this type conversion stmt.
850 vect_recog_widen_mult_pattern (vec
<gimple
> *stmts
,
851 tree
*type_in
, tree
*type_out
)
853 gimple last_stmt
= stmts
->pop ();
854 gimple def_stmt0
, def_stmt1
;
856 tree type
, half_type0
, half_type1
;
857 gimple new_stmt
= NULL
, pattern_stmt
= NULL
;
858 tree vectype
, vecitype
;
860 enum tree_code dummy_code
;
866 if (!is_gimple_assign (last_stmt
))
869 type
= gimple_expr_type (last_stmt
);
871 /* Starting from LAST_STMT, follow the defs of its uses in search
872 of the above pattern. */
874 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
877 oprnd0
= gimple_assign_rhs1 (last_stmt
);
878 oprnd1
= gimple_assign_rhs2 (last_stmt
);
879 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
880 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
883 /* Check argument 0. */
884 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
888 /* Check argument 1. */
889 op1_ok
= type_conversion_p (oprnd1
, last_stmt
, false, &half_type1
,
890 &def_stmt1
, &promotion
);
892 if (op1_ok
&& promotion
)
894 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
895 oprnd1
= gimple_assign_rhs1 (def_stmt1
);
899 if (TREE_CODE (oprnd1
) == INTEGER_CST
900 && TREE_CODE (half_type0
) == INTEGER_TYPE
901 && vect_handle_widen_op_by_const (last_stmt
, MULT_EXPR
, oprnd1
,
902 &oprnd0
, &new_stmt
, type
,
903 &half_type0
, def_stmt0
))
905 half_type1
= half_type0
;
906 oprnd1
= fold_convert (half_type1
, oprnd1
);
912 /* If the two arguments have different sizes, convert the one with
913 the smaller type into the larger type. */
914 if (TYPE_PRECISION (half_type0
) != TYPE_PRECISION (half_type1
))
916 /* If we already used up the single-stmt slot give up. */
921 gimple def_stmt
= NULL
;
923 if (TYPE_PRECISION (half_type0
) < TYPE_PRECISION (half_type1
))
925 def_stmt
= def_stmt0
;
926 half_type0
= half_type1
;
931 def_stmt
= def_stmt1
;
932 half_type1
= half_type0
;
936 tree old_oprnd
= gimple_assign_rhs1 (def_stmt
);
937 tree new_oprnd
= make_ssa_name (half_type0
);
938 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, old_oprnd
);
942 /* Handle unsigned case. Look for
943 S6 u_prod_T = (unsigned TYPE) prod_T;
944 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
945 if (TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type0
))
951 if (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (half_type1
))
954 use_stmt
= vect_single_imm_use (last_stmt
);
955 if (!use_stmt
|| !is_gimple_assign (use_stmt
)
956 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
959 use_lhs
= gimple_assign_lhs (use_stmt
);
960 use_type
= TREE_TYPE (use_lhs
);
961 if (!INTEGRAL_TYPE_P (use_type
)
962 || (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (use_type
))
963 || (TYPE_PRECISION (type
) != TYPE_PRECISION (use_type
)))
967 last_stmt
= use_stmt
;
970 if (!types_compatible_p (half_type0
, half_type1
))
973 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
974 to get an intermediate result of type ITYPE. In this case we need
975 to build a statement to convert this intermediate result to type TYPE. */
977 if (TYPE_PRECISION (type
) > TYPE_PRECISION (half_type0
) * 2)
978 itype
= build_nonstandard_integer_type
979 (GET_MODE_BITSIZE (TYPE_MODE (half_type0
)) * 2,
980 TYPE_UNSIGNED (type
));
982 /* Pattern detected. */
983 if (dump_enabled_p ())
984 dump_printf_loc (MSG_NOTE
, vect_location
,
985 "vect_recog_widen_mult_pattern: detected:\n");
987 /* Check target support */
988 vectype
= get_vectype_for_scalar_type (half_type0
);
989 vecitype
= get_vectype_for_scalar_type (itype
);
992 || !supportable_widening_operation (WIDEN_MULT_EXPR
, last_stmt
,
994 &dummy_code
, &dummy_code
,
995 &dummy_int
, &dummy_vec
))
999 *type_out
= get_vectype_for_scalar_type (type
);
1001 /* Pattern supported. Create a stmt to be used to replace the pattern: */
1002 var
= vect_recog_temp_ssa_var (itype
, NULL
);
1003 pattern_stmt
= gimple_build_assign (var
, WIDEN_MULT_EXPR
, oprnd0
, oprnd1
);
1005 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1006 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1007 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1008 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
1010 /* If the original two operands have different sizes, we may need to convert
1011 the smaller one into the larget type. If this is the case, at this point
1012 the new stmt is already built. */
1015 append_pattern_def_seq (stmt_vinfo
, new_stmt
);
1016 stmt_vec_info new_stmt_info
1017 = new_stmt_vec_info (new_stmt
, loop_vinfo
, bb_vinfo
);
1018 set_vinfo_for_stmt (new_stmt
, new_stmt_info
);
1019 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
1022 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
1023 the result of the widen-mult operation into type TYPE. */
1026 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
1027 stmt_vec_info pattern_stmt_info
1028 = new_stmt_vec_info (pattern_stmt
, loop_vinfo
, bb_vinfo
);
1029 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
1030 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vecitype
;
1031 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
1033 gimple_assign_lhs (pattern_stmt
));
1036 if (dump_enabled_p ())
1037 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1039 stmts
->safe_push (last_stmt
);
1040 return pattern_stmt
;
1044 /* Function vect_recog_pow_pattern
1046 Try to find the following pattern:
1050 with POW being one of pow, powf, powi, powif and N being
1055 * LAST_STMT: A stmt from which the pattern search begins.
1059 * TYPE_IN: The type of the input arguments to the pattern.
1061 * TYPE_OUT: The type of the output of this pattern.
1063 * Return value: A new stmt that will be used to replace the sequence of
1064 stmts that constitute the pattern. In this case it will be:
1071 vect_recog_pow_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
1074 gimple last_stmt
= (*stmts
)[0];
1075 tree fn
, base
, exp
= NULL
;
1079 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
1082 fn
= gimple_call_fndecl (last_stmt
);
1083 if (fn
== NULL_TREE
|| DECL_BUILT_IN_CLASS (fn
) != BUILT_IN_NORMAL
)
1086 switch (DECL_FUNCTION_CODE (fn
))
1088 case BUILT_IN_POWIF
:
1092 base
= gimple_call_arg (last_stmt
, 0);
1093 exp
= gimple_call_arg (last_stmt
, 1);
1094 if (TREE_CODE (exp
) != REAL_CST
1095 && TREE_CODE (exp
) != INTEGER_CST
)
1103 /* We now have a pow or powi builtin function call with a constant
1106 *type_out
= NULL_TREE
;
1108 /* Catch squaring. */
1109 if ((tree_fits_shwi_p (exp
)
1110 && tree_to_shwi (exp
) == 2)
1111 || (TREE_CODE (exp
) == REAL_CST
1112 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconst2
)))
1114 *type_in
= TREE_TYPE (base
);
1116 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1117 stmt
= gimple_build_assign (var
, MULT_EXPR
, base
, base
);
1121 /* Catch square root. */
1122 if (TREE_CODE (exp
) == REAL_CST
1123 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconsthalf
))
1125 tree newfn
= mathfn_built_in (TREE_TYPE (base
), BUILT_IN_SQRT
);
1126 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (base
));
1129 gcall
*stmt
= gimple_build_call (newfn
, 1, base
);
1130 if (vectorizable_function (stmt
, *type_in
, *type_in
)
1133 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
1134 gimple_call_set_lhs (stmt
, var
);
1144 /* Function vect_recog_widen_sum_pattern
1146 Try to find the following pattern:
1149 TYPE x_T, sum = init;
1151 sum_0 = phi <init, sum_1>
1153 S2 x_T = (TYPE) x_t;
1154 S3 sum_1 = x_T + sum_0;
1156 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1157 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1158 a special case of a reduction computation.
1162 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1163 when this function is called with S3, the pattern {S2,S3} will be detected.
1167 * TYPE_IN: The type of the input arguments to the pattern.
1169 * TYPE_OUT: The type of the output of this pattern.
1171 * Return value: A new stmt that will be used to replace the sequence of
1172 stmts that constitute the pattern. In this case it will be:
1173 WIDEN_SUM <x_t, sum_0>
1175 Note: The widening-sum idiom is a widening reduction pattern that is
1176 vectorized without preserving all the intermediate results. It
1177 produces only N/2 (widened) results (by summing up pairs of
1178 intermediate results) rather than all N results. Therefore, we
1179 cannot allow this pattern when we want to get all the results and in
1180 the correct order (as is the case when this computation is in an
1181 inner-loop nested in an outer-loop that us being vectorized). */
1184 vect_recog_widen_sum_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
1187 gimple stmt
, last_stmt
= (*stmts
)[0];
1188 tree oprnd0
, oprnd1
;
1189 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1190 tree type
, half_type
;
1191 gimple pattern_stmt
;
1192 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1200 loop
= LOOP_VINFO_LOOP (loop_info
);
1202 /* We don't allow changing the order of the computation in the inner-loop
1203 when doing outer-loop vectorization. */
1204 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
1207 if (!is_gimple_assign (last_stmt
))
1210 type
= gimple_expr_type (last_stmt
);
1212 /* Look for the following pattern
1215 In which DX is at least double the size of X, and sum_1 has been
1216 recognized as a reduction variable.
1219 /* Starting from LAST_STMT, follow the defs of its uses in search
1220 of the above pattern. */
1222 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
1225 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1226 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1227 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
1228 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
1231 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1232 we know that oprnd1 is the reduction variable (defined by a loop-header
1233 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1234 Left to check that oprnd0 is defined by a cast from type 'type' to type
1237 if (!type_conversion_p (oprnd0
, last_stmt
, true, &half_type
, &stmt
,
1242 oprnd0
= gimple_assign_rhs1 (stmt
);
1243 *type_in
= half_type
;
1246 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1247 var
= vect_recog_temp_ssa_var (type
, NULL
);
1248 pattern_stmt
= gimple_build_assign (var
, WIDEN_SUM_EXPR
, oprnd0
, oprnd1
);
1250 if (dump_enabled_p ())
1252 dump_printf_loc (MSG_NOTE
, vect_location
,
1253 "vect_recog_widen_sum_pattern: detected: ");
1254 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1255 dump_printf (MSG_NOTE
, "\n");
1258 return pattern_stmt
;
1262 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1265 STMT - a statement to check.
1266 DEF - we support operations with two operands, one of which is constant.
1267 The other operand can be defined by a demotion operation, or by a
1268 previous statement in a sequence of over-promoted operations. In the
1269 later case DEF is used to replace that operand. (It is defined by a
1270 pattern statement we created for the previous statement in the
1274 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1275 NULL, it's the type of DEF.
1276 STMTS - additional pattern statements. If a pattern statement (type
1277 conversion) is created in this function, its original statement is
1281 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1282 operands to use in the new pattern statement for STMT (will be created
1283 in vect_recog_over_widening_pattern ()).
1284 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1285 statements for STMT: the first one is a type promotion and the second
1286 one is the operation itself. We return the type promotion statement
1287 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1288 the second pattern statement. */
1291 vect_operation_fits_smaller_type (gimple stmt
, tree def
, tree
*new_type
,
1292 tree
*op0
, tree
*op1
, gimple
*new_def_stmt
,
1295 enum tree_code code
;
1296 tree const_oprnd
, oprnd
;
1297 tree interm_type
= NULL_TREE
, half_type
, new_oprnd
, type
;
1298 gimple def_stmt
, new_stmt
;
1304 *new_def_stmt
= NULL
;
1306 if (!is_gimple_assign (stmt
))
1309 code
= gimple_assign_rhs_code (stmt
);
1310 if (code
!= LSHIFT_EXPR
&& code
!= RSHIFT_EXPR
1311 && code
!= BIT_IOR_EXPR
&& code
!= BIT_XOR_EXPR
&& code
!= BIT_AND_EXPR
)
1314 oprnd
= gimple_assign_rhs1 (stmt
);
1315 const_oprnd
= gimple_assign_rhs2 (stmt
);
1316 type
= gimple_expr_type (stmt
);
1318 if (TREE_CODE (oprnd
) != SSA_NAME
1319 || TREE_CODE (const_oprnd
) != INTEGER_CST
)
1322 /* If oprnd has other uses besides that in stmt we cannot mark it
1323 as being part of a pattern only. */
1324 if (!has_single_use (oprnd
))
1327 /* If we are in the middle of a sequence, we use DEF from a previous
1328 statement. Otherwise, OPRND has to be a result of type promotion. */
1331 half_type
= *new_type
;
1337 if (!type_conversion_p (oprnd
, stmt
, false, &half_type
, &def_stmt
,
1340 || !vect_same_loop_or_bb_p (stmt
, def_stmt
))
1344 /* Can we perform the operation on a smaller type? */
1350 if (!int_fits_type_p (const_oprnd
, half_type
))
1352 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1353 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1356 interm_type
= build_nonstandard_integer_type (
1357 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1358 if (!int_fits_type_p (const_oprnd
, interm_type
))
1365 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1366 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1369 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1370 (e.g., if the original value was char, the shift amount is at most 8
1371 if we want to use short). */
1372 if (compare_tree_int (const_oprnd
, TYPE_PRECISION (half_type
)) == 1)
1375 interm_type
= build_nonstandard_integer_type (
1376 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1378 if (!vect_supportable_shift (code
, interm_type
))
1384 if (vect_supportable_shift (code
, half_type
))
1387 /* Try intermediate type - HALF_TYPE is not supported. */
1388 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1391 interm_type
= build_nonstandard_integer_type (
1392 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1394 if (!vect_supportable_shift (code
, interm_type
))
1403 /* There are four possible cases:
1404 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1405 the first statement in the sequence)
1406 a. The original, HALF_TYPE, is not enough - we replace the promotion
1407 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1408 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1410 2. OPRND is defined by a pattern statement we created.
1411 a. Its type is not sufficient for the operation, we create a new stmt:
1412 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1413 this statement in NEW_DEF_STMT, and it is later put in
1414 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1415 b. OPRND is good to use in the new statement. */
1420 /* Replace the original type conversion HALF_TYPE->TYPE with
1421 HALF_TYPE->INTERM_TYPE. */
1422 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
1424 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
1425 /* Check if the already created pattern stmt is what we need. */
1426 if (!is_gimple_assign (new_stmt
)
1427 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt
))
1428 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != interm_type
)
1431 stmts
->safe_push (def_stmt
);
1432 oprnd
= gimple_assign_lhs (new_stmt
);
1436 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1437 oprnd
= gimple_assign_rhs1 (def_stmt
);
1438 new_oprnd
= make_ssa_name (interm_type
);
1439 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, oprnd
);
1440 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
1441 stmts
->safe_push (def_stmt
);
1447 /* Retrieve the operand before the type promotion. */
1448 oprnd
= gimple_assign_rhs1 (def_stmt
);
1455 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1456 new_oprnd
= make_ssa_name (interm_type
);
1457 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, oprnd
);
1459 *new_def_stmt
= new_stmt
;
1462 /* Otherwise, OPRND is already set. */
1466 *new_type
= interm_type
;
1468 *new_type
= half_type
;
1471 *op1
= fold_convert (*new_type
, const_oprnd
);
1477 /* Try to find a statement or a sequence of statements that can be performed
1481 TYPE x_T, res0_T, res1_T;
1484 S2 x_T = (TYPE) x_t;
1485 S3 res0_T = op (x_T, C0);
1486 S4 res1_T = op (res0_T, C1);
1487 S5 ... = () res1_T; - type demotion
1489 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1491 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1492 be 'type' or some intermediate type. For now, we expect S5 to be a type
1493 demotion operation. We also check that S3 and S4 have only one use. */
1496 vect_recog_over_widening_pattern (vec
<gimple
> *stmts
,
1497 tree
*type_in
, tree
*type_out
)
1499 gimple stmt
= stmts
->pop ();
1500 gimple pattern_stmt
= NULL
, new_def_stmt
, prev_stmt
= NULL
, use_stmt
= NULL
;
1501 tree op0
, op1
, vectype
= NULL_TREE
, use_lhs
, use_type
;
1502 tree var
= NULL_TREE
, new_type
= NULL_TREE
, new_oprnd
;
1509 if (!vinfo_for_stmt (stmt
)
1510 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt
)))
1513 new_def_stmt
= NULL
;
1514 if (!vect_operation_fits_smaller_type (stmt
, var
, &new_type
,
1515 &op0
, &op1
, &new_def_stmt
,
1524 /* STMT can be performed on a smaller type. Check its uses. */
1525 use_stmt
= vect_single_imm_use (stmt
);
1526 if (!use_stmt
|| !is_gimple_assign (use_stmt
))
1529 /* Create pattern statement for STMT. */
1530 vectype
= get_vectype_for_scalar_type (new_type
);
1534 /* We want to collect all the statements for which we create pattern
1535 statetments, except for the case when the last statement in the
1536 sequence doesn't have a corresponding pattern statement. In such
1537 case we associate the last pattern statement with the last statement
1538 in the sequence. Therefore, we only add the original statement to
1539 the list if we know that it is not the last. */
1541 stmts
->safe_push (prev_stmt
);
1543 var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1545 = gimple_build_assign (var
, gimple_assign_rhs_code (stmt
), op0
, op1
);
1546 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
1547 new_pattern_def_seq (vinfo_for_stmt (stmt
), new_def_stmt
);
1549 if (dump_enabled_p ())
1551 dump_printf_loc (MSG_NOTE
, vect_location
,
1552 "created pattern stmt: ");
1553 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1554 dump_printf (MSG_NOTE
, "\n");
1557 type
= gimple_expr_type (stmt
);
1564 /* We got a sequence. We expect it to end with a type demotion operation.
1565 Otherwise, we quit (for now). There are three possible cases: the
1566 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1567 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1568 NEW_TYPE differs (we create a new conversion statement). */
1569 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
1571 use_lhs
= gimple_assign_lhs (use_stmt
);
1572 use_type
= TREE_TYPE (use_lhs
);
1573 /* Support only type demotion or signedess change. */
1574 if (!INTEGRAL_TYPE_P (use_type
)
1575 || TYPE_PRECISION (type
) <= TYPE_PRECISION (use_type
))
1578 /* Check that NEW_TYPE is not bigger than the conversion result. */
1579 if (TYPE_PRECISION (new_type
) > TYPE_PRECISION (use_type
))
1582 if (TYPE_UNSIGNED (new_type
) != TYPE_UNSIGNED (use_type
)
1583 || TYPE_PRECISION (new_type
) != TYPE_PRECISION (use_type
))
1585 /* Create NEW_TYPE->USE_TYPE conversion. */
1586 new_oprnd
= make_ssa_name (use_type
);
1587 pattern_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, var
);
1588 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt
)) = pattern_stmt
;
1590 *type_in
= get_vectype_for_scalar_type (new_type
);
1591 *type_out
= get_vectype_for_scalar_type (use_type
);
1593 /* We created a pattern statement for the last statement in the
1594 sequence, so we don't need to associate it with the pattern
1595 statement created for PREV_STMT. Therefore, we add PREV_STMT
1596 to the list in order to mark it later in vect_pattern_recog_1. */
1598 stmts
->safe_push (prev_stmt
);
1603 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt
))
1604 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt
));
1607 *type_out
= NULL_TREE
;
1610 stmts
->safe_push (use_stmt
);
1613 /* TODO: support general case, create a conversion to the correct type. */
1616 /* Pattern detected. */
1617 if (dump_enabled_p ())
1619 dump_printf_loc (MSG_NOTE
, vect_location
,
1620 "vect_recog_over_widening_pattern: detected: ");
1621 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1622 dump_printf (MSG_NOTE
, "\n");
1625 return pattern_stmt
;
1628 /* Detect widening shift pattern:
1634 S2 a_T = (TYPE) a_t;
1635 S3 res_T = a_T << CONST;
1637 where type 'TYPE' is at least double the size of type 'type'.
1639 Also detect cases where the shift result is immediately converted
1640 to another type 'result_type' that is no larger in size than 'TYPE'.
1641 In those cases we perform a widen-shift that directly results in
1642 'result_type', to avoid a possible over-widening situation:
1646 result_type res_result;
1649 S2 a_T = (TYPE) a_t;
1650 S3 res_T = a_T << CONST;
1651 S4 res_result = (result_type) res_T;
1652 '--> res_result' = a_t w<< CONST;
1654 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1655 create an additional pattern stmt for S2 to create a variable of an
1656 intermediate type, and perform widen-shift on the intermediate type:
1660 TYPE a_T, res_T, res_T';
1663 S2 a_T = (TYPE) a_t;
1664 '--> a_it = (interm_type) a_t;
1665 S3 res_T = a_T << CONST;
1666 '--> res_T' = a_it <<* CONST;
1670 * STMTS: Contains a stmt from which the pattern search begins.
1671 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1672 in STMTS. When an intermediate type is used and a pattern statement is
1673 created for S2, we also put S2 here (before S3).
1677 * TYPE_IN: The type of the input arguments to the pattern.
1679 * TYPE_OUT: The type of the output of this pattern.
1681 * Return value: A new stmt that will be used to replace the sequence of
1682 stmts that constitute the pattern. In this case it will be:
1683 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1686 vect_recog_widen_shift_pattern (vec
<gimple
> *stmts
,
1687 tree
*type_in
, tree
*type_out
)
1689 gimple last_stmt
= stmts
->pop ();
1691 tree oprnd0
, oprnd1
;
1692 tree type
, half_type0
;
1693 gimple pattern_stmt
;
1694 tree vectype
, vectype_out
= NULL_TREE
;
1696 enum tree_code dummy_code
;
1698 vec
<tree
> dummy_vec
;
1702 if (!is_gimple_assign (last_stmt
) || !vinfo_for_stmt (last_stmt
))
1705 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt
)))
1708 if (gimple_assign_rhs_code (last_stmt
) != LSHIFT_EXPR
)
1711 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1712 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1713 if (TREE_CODE (oprnd0
) != SSA_NAME
|| TREE_CODE (oprnd1
) != INTEGER_CST
)
1716 /* Check operand 0: it has to be defined by a type promotion. */
1717 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
1722 /* Check operand 1: has to be positive. We check that it fits the type
1723 in vect_handle_widen_op_by_const (). */
1724 if (tree_int_cst_compare (oprnd1
, size_zero_node
) <= 0)
1727 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
1728 type
= gimple_expr_type (last_stmt
);
1730 /* Check for subsequent conversion to another type. */
1731 use_stmt
= vect_single_imm_use (last_stmt
);
1732 if (use_stmt
&& is_gimple_assign (use_stmt
)
1733 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
))
1734 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
1736 tree use_lhs
= gimple_assign_lhs (use_stmt
);
1737 tree use_type
= TREE_TYPE (use_lhs
);
1739 if (INTEGRAL_TYPE_P (use_type
)
1740 && TYPE_PRECISION (use_type
) <= TYPE_PRECISION (type
))
1742 last_stmt
= use_stmt
;
1747 /* Check if this a widening operation. */
1748 gimple wstmt
= NULL
;
1749 if (!vect_handle_widen_op_by_const (last_stmt
, LSHIFT_EXPR
, oprnd1
,
1751 type
, &half_type0
, def_stmt0
))
1754 /* Pattern detected. */
1755 if (dump_enabled_p ())
1756 dump_printf_loc (MSG_NOTE
, vect_location
,
1757 "vect_recog_widen_shift_pattern: detected:\n");
1759 /* Check target support. */
1760 vectype
= get_vectype_for_scalar_type (half_type0
);
1761 vectype_out
= get_vectype_for_scalar_type (type
);
1765 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR
, last_stmt
,
1766 vectype_out
, vectype
,
1767 &dummy_code
, &dummy_code
,
1768 &dummy_int
, &dummy_vec
))
1772 *type_out
= vectype_out
;
1774 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1775 var
= vect_recog_temp_ssa_var (type
, NULL
);
1777 gimple_build_assign (var
, WIDEN_LSHIFT_EXPR
, oprnd0
, oprnd1
);
1780 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1781 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1782 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1783 new_pattern_def_seq (stmt_vinfo
, wstmt
);
1784 stmt_vec_info new_stmt_info
1785 = new_stmt_vec_info (wstmt
, loop_vinfo
, bb_vinfo
);
1786 set_vinfo_for_stmt (wstmt
, new_stmt_info
);
1787 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
1790 if (dump_enabled_p ())
1791 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1793 stmts
->safe_push (last_stmt
);
1794 return pattern_stmt
;
1797 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1801 S0 a_t = b_t r<< c_t;
1805 * STMTS: Contains a stmt from which the pattern search begins,
1806 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1810 S2 e_t = d_t & (B - 1);
1811 S3 f_t = b_t << c_t;
1812 S4 g_t = b_t >> e_t;
1815 where B is element bitsize of type.
1819 * TYPE_IN: The type of the input arguments to the pattern.
1821 * TYPE_OUT: The type of the output of this pattern.
1823 * Return value: A new stmt that will be used to replace the rotate
1827 vect_recog_rotate_pattern (vec
<gimple
> *stmts
, tree
*type_in
, tree
*type_out
)
1829 gimple last_stmt
= stmts
->pop ();
1830 tree oprnd0
, oprnd1
, lhs
, var
, var1
, var2
, vectype
, type
, stype
, def
, def2
;
1831 gimple pattern_stmt
, def_stmt
;
1832 enum tree_code rhs_code
;
1833 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1834 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1835 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1836 enum vect_def_type dt
;
1837 optab optab1
, optab2
;
1838 edge ext_def
= NULL
;
1840 if (!is_gimple_assign (last_stmt
))
1843 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1853 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
1856 lhs
= gimple_assign_lhs (last_stmt
);
1857 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1858 type
= TREE_TYPE (oprnd0
);
1859 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1860 if (TREE_CODE (oprnd0
) != SSA_NAME
1861 || TYPE_PRECISION (TREE_TYPE (lhs
)) != TYPE_PRECISION (type
)
1862 || !INTEGRAL_TYPE_P (type
)
1863 || !TYPE_UNSIGNED (type
))
1866 if (!vect_is_simple_use (oprnd1
, last_stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
1870 if (dt
!= vect_internal_def
1871 && dt
!= vect_constant_def
1872 && dt
!= vect_external_def
)
1875 vectype
= get_vectype_for_scalar_type (type
);
1876 if (vectype
== NULL_TREE
)
1879 /* If vector/vector or vector/scalar rotate is supported by the target,
1880 don't do anything here. */
1881 optab1
= optab_for_tree_code (rhs_code
, vectype
, optab_vector
);
1883 && optab_handler (optab1
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1886 if (bb_vinfo
!= NULL
|| dt
!= vect_internal_def
)
1888 optab2
= optab_for_tree_code (rhs_code
, vectype
, optab_scalar
);
1890 && optab_handler (optab2
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1894 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1895 don't do anything here either. */
1896 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_vector
);
1897 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_vector
);
1899 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1901 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1903 if (bb_vinfo
== NULL
&& dt
== vect_internal_def
)
1905 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_scalar
);
1906 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_scalar
);
1908 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1910 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1915 *type_out
= vectype
;
1916 if (*type_in
== NULL_TREE
)
1919 if (dt
== vect_external_def
1920 && TREE_CODE (oprnd1
) == SSA_NAME
1923 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1924 ext_def
= loop_preheader_edge (loop
);
1925 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1
))
1927 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (oprnd1
));
1929 || !dominated_by_p (CDI_DOMINATORS
, ext_def
->dest
, bb
))
1935 if (TREE_CODE (oprnd1
) == INTEGER_CST
1936 || TYPE_MODE (TREE_TYPE (oprnd1
)) == TYPE_MODE (type
))
1938 else if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
1940 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
1941 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (type
)
1942 && TYPE_PRECISION (TREE_TYPE (rhs1
))
1943 == TYPE_PRECISION (type
))
1947 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
1948 if (def
== NULL_TREE
)
1950 def
= vect_recog_temp_ssa_var (type
, NULL
);
1951 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
1955 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1956 gcc_assert (!new_bb
);
1959 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1961 stype
= TREE_TYPE (def
);
1963 if (TREE_CODE (def
) == INTEGER_CST
)
1965 if (!tree_fits_uhwi_p (def
)
1966 || tree_to_uhwi (def
) >= GET_MODE_PRECISION (TYPE_MODE (type
))
1967 || integer_zerop (def
))
1969 def2
= build_int_cst (stype
,
1970 GET_MODE_PRECISION (TYPE_MODE (type
))
1971 - tree_to_uhwi (def
));
1975 tree vecstype
= get_vectype_for_scalar_type (stype
);
1976 stmt_vec_info def_stmt_vinfo
;
1978 if (vecstype
== NULL_TREE
)
1980 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1981 def_stmt
= gimple_build_assign (def2
, NEGATE_EXPR
, def
);
1985 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1986 gcc_assert (!new_bb
);
1990 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
1991 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1992 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
1993 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1996 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1998 = build_int_cst (stype
, GET_MODE_PRECISION (TYPE_MODE (stype
)) - 1);
1999 def_stmt
= gimple_build_assign (def2
, BIT_AND_EXPR
,
2000 gimple_assign_lhs (def_stmt
), mask
);
2004 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
2005 gcc_assert (!new_bb
);
2009 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2010 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2011 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
2012 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2016 var1
= vect_recog_temp_ssa_var (type
, NULL
);
2017 def_stmt
= gimple_build_assign (var1
, rhs_code
== LROTATE_EXPR
2018 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
2020 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2022 var2
= vect_recog_temp_ssa_var (type
, NULL
);
2023 def_stmt
= gimple_build_assign (var2
, rhs_code
== LROTATE_EXPR
2024 ? RSHIFT_EXPR
: LSHIFT_EXPR
,
2026 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2028 /* Pattern detected. */
2029 if (dump_enabled_p ())
2030 dump_printf_loc (MSG_NOTE
, vect_location
,
2031 "vect_recog_rotate_pattern: detected:\n");
2033 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2034 var
= vect_recog_temp_ssa_var (type
, NULL
);
2035 pattern_stmt
= gimple_build_assign (var
, BIT_IOR_EXPR
, var1
, var2
);
2037 if (dump_enabled_p ())
2038 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
2040 stmts
->safe_push (last_stmt
);
2041 return pattern_stmt
;
2044 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2052 S3 res_T = b_T op a_t;
2054 where type 'TYPE' is a type with different size than 'type',
2055 and op is <<, >> or rotate.
2060 TYPE b_T, c_T, res_T;
2063 S1 a_t = (type) c_T;
2065 S3 res_T = b_T op a_t;
2069 * STMTS: Contains a stmt from which the pattern search begins,
2070 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2071 with a shift/rotate which has same type on both operands, in the
2072 second case just b_T op c_T, in the first case with added cast
2073 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2077 * TYPE_IN: The type of the input arguments to the pattern.
2079 * TYPE_OUT: The type of the output of this pattern.
2081 * Return value: A new stmt that will be used to replace the shift/rotate
2085 vect_recog_vector_vector_shift_pattern (vec
<gimple
> *stmts
,
2086 tree
*type_in
, tree
*type_out
)
2088 gimple last_stmt
= stmts
->pop ();
2089 tree oprnd0
, oprnd1
, lhs
, var
;
2090 gimple pattern_stmt
, def_stmt
;
2091 enum tree_code rhs_code
;
2092 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2093 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2094 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2095 enum vect_def_type dt
;
2098 if (!is_gimple_assign (last_stmt
))
2101 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2113 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2116 lhs
= gimple_assign_lhs (last_stmt
);
2117 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2118 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2119 if (TREE_CODE (oprnd0
) != SSA_NAME
2120 || TREE_CODE (oprnd1
) != SSA_NAME
2121 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
2122 || TYPE_PRECISION (TREE_TYPE (oprnd1
))
2123 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1
)))
2124 || TYPE_PRECISION (TREE_TYPE (lhs
))
2125 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2128 if (!vect_is_simple_use (oprnd1
, last_stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
2132 if (dt
!= vect_internal_def
)
2135 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (oprnd0
));
2136 *type_out
= *type_in
;
2137 if (*type_in
== NULL_TREE
)
2141 if (gimple_assign_cast_p (def_stmt
))
2143 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
2144 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
2145 && TYPE_PRECISION (TREE_TYPE (rhs1
))
2146 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2150 if (def
== NULL_TREE
)
2152 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2153 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
2154 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2157 /* Pattern detected. */
2158 if (dump_enabled_p ())
2159 dump_printf_loc (MSG_NOTE
, vect_location
,
2160 "vect_recog_vector_vector_shift_pattern: detected:\n");
2162 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2163 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2164 pattern_stmt
= gimple_build_assign (var
, rhs_code
, oprnd0
, def
);
2166 if (dump_enabled_p ())
2167 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
2169 stmts
->safe_push (last_stmt
);
2170 return pattern_stmt
;
2173 /* Detect a signed division by a constant that wouldn't be
2174 otherwise vectorized:
2180 where type 'type' is an integral type and N is a constant.
2182 Similarly handle modulo by a constant:
2188 * STMTS: Contains a stmt from which the pattern search begins,
2189 i.e. the division stmt. S1 is replaced by if N is a power
2190 of two constant and type is signed:
2191 S3 y_t = b_t < 0 ? N - 1 : 0;
2193 S1' a_t = x_t >> log2 (N);
2195 S4 is replaced if N is a power of two constant and
2196 type is signed by (where *_T temporaries have unsigned type):
2197 S9 y_T = b_t < 0 ? -1U : 0U;
2198 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2199 S7 z_t = (type) z_T;
2201 S5 x_t = w_t & (N - 1);
2202 S4' a_t = x_t - z_t;
2206 * TYPE_IN: The type of the input arguments to the pattern.
2208 * TYPE_OUT: The type of the output of this pattern.
2210 * Return value: A new stmt that will be used to replace the division
2211 S1 or modulo S4 stmt. */
2214 vect_recog_divmod_pattern (vec
<gimple
> *stmts
,
2215 tree
*type_in
, tree
*type_out
)
2217 gimple last_stmt
= stmts
->pop ();
2218 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
2219 gimple pattern_stmt
, def_stmt
;
2220 enum tree_code rhs_code
;
2221 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2222 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2223 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2226 int dummy_int
, prec
;
2227 stmt_vec_info def_stmt_vinfo
;
2229 if (!is_gimple_assign (last_stmt
))
2232 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2235 case TRUNC_DIV_EXPR
:
2236 case TRUNC_MOD_EXPR
:
2242 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2245 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2246 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2247 itype
= TREE_TYPE (oprnd0
);
2248 if (TREE_CODE (oprnd0
) != SSA_NAME
2249 || TREE_CODE (oprnd1
) != INTEGER_CST
2250 || TREE_CODE (itype
) != INTEGER_TYPE
2251 || TYPE_PRECISION (itype
) != GET_MODE_PRECISION (TYPE_MODE (itype
)))
2254 vectype
= get_vectype_for_scalar_type (itype
);
2255 if (vectype
== NULL_TREE
)
2258 /* If the target can handle vectorized division or modulo natively,
2259 don't attempt to optimize this. */
2260 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
2261 if (optab
!= unknown_optab
)
2263 machine_mode vec_mode
= TYPE_MODE (vectype
);
2264 int icode
= (int) optab_handler (optab
, vec_mode
);
2265 if (icode
!= CODE_FOR_nothing
)
2269 prec
= TYPE_PRECISION (itype
);
2270 if (integer_pow2p (oprnd1
))
2272 if (TYPE_UNSIGNED (itype
) || tree_int_cst_sgn (oprnd1
) != 1)
2275 /* Pattern detected. */
2276 if (dump_enabled_p ())
2277 dump_printf_loc (MSG_NOTE
, vect_location
,
2278 "vect_recog_divmod_pattern: detected:\n");
2280 cond
= build2 (LT_EXPR
, boolean_type_node
, oprnd0
,
2281 build_int_cst (itype
, 0));
2282 if (rhs_code
== TRUNC_DIV_EXPR
)
2284 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
2287 = gimple_build_assign (var
, COND_EXPR
, cond
,
2288 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2289 build_int_cst (itype
, 1)),
2290 build_int_cst (itype
, 0));
2291 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2292 var
= vect_recog_temp_ssa_var (itype
, NULL
);
2294 = gimple_build_assign (var
, PLUS_EXPR
, oprnd0
,
2295 gimple_assign_lhs (def_stmt
));
2296 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2298 shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
2300 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2301 RSHIFT_EXPR
, var
, shift
);
2306 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2307 if (compare_tree_int (oprnd1
, 2) == 0)
2309 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2310 def_stmt
= gimple_build_assign (signmask
, COND_EXPR
, cond
,
2311 build_int_cst (itype
, 1),
2312 build_int_cst (itype
, 0));
2313 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2318 = build_nonstandard_integer_type (prec
, 1);
2319 tree vecutype
= get_vectype_for_scalar_type (utype
);
2321 = build_int_cst (utype
, GET_MODE_BITSIZE (TYPE_MODE (itype
))
2322 - tree_log2 (oprnd1
));
2323 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
2325 def_stmt
= gimple_build_assign (var
, COND_EXPR
, cond
,
2326 build_int_cst (utype
, -1),
2327 build_int_cst (utype
, 0));
2329 = new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2330 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2331 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2332 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2333 var
= vect_recog_temp_ssa_var (utype
, NULL
);
2334 def_stmt
= gimple_build_assign (var
, RSHIFT_EXPR
,
2335 gimple_assign_lhs (def_stmt
),
2338 = new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2339 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2340 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2341 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2342 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2344 = gimple_build_assign (signmask
, NOP_EXPR
, var
);
2345 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2348 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2349 PLUS_EXPR
, oprnd0
, signmask
);
2350 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2352 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2353 BIT_AND_EXPR
, gimple_assign_lhs (def_stmt
),
2354 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2355 build_int_cst (itype
, 1)));
2356 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2359 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2360 MINUS_EXPR
, gimple_assign_lhs (def_stmt
),
2364 if (dump_enabled_p ())
2365 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
,
2368 stmts
->safe_push (last_stmt
);
2371 *type_out
= vectype
;
2372 return pattern_stmt
;
2375 if (prec
> HOST_BITS_PER_WIDE_INT
2376 || integer_zerop (oprnd1
))
2379 if (!can_mult_highpart_p (TYPE_MODE (vectype
), TYPE_UNSIGNED (itype
)))
2382 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2384 if (TYPE_UNSIGNED (itype
))
2386 unsigned HOST_WIDE_INT mh
, ml
;
2387 int pre_shift
, post_shift
;
2388 unsigned HOST_WIDE_INT d
= (TREE_INT_CST_LOW (oprnd1
)
2389 & GET_MODE_MASK (TYPE_MODE (itype
)));
2390 tree t1
, t2
, t3
, t4
;
2392 if (d
>= ((unsigned HOST_WIDE_INT
) 1 << (prec
- 1)))
2393 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2396 /* Find a suitable multiplier and right shift count
2397 instead of multiplying with D. */
2398 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
2400 /* If the suggested multiplier is more than SIZE bits, we can do better
2401 for even divisors, using an initial right shift. */
2402 if (mh
!= 0 && (d
& 1) == 0)
2404 pre_shift
= floor_log2 (d
& -d
);
2405 mh
= choose_multiplier (d
>> pre_shift
, prec
, prec
- pre_shift
,
2406 &ml
, &post_shift
, &dummy_int
);
2414 if (post_shift
- 1 >= prec
)
2417 /* t1 = oprnd0 h* ml;
2421 q = t4 >> (post_shift - 1); */
2422 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2423 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2424 build_int_cst (itype
, ml
));
2425 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2427 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2429 = gimple_build_assign (t2
, MINUS_EXPR
, oprnd0
, t1
);
2430 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2432 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2434 = gimple_build_assign (t3
, RSHIFT_EXPR
, t2
, integer_one_node
);
2435 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2437 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2439 = gimple_build_assign (t4
, PLUS_EXPR
, t1
, t3
);
2441 if (post_shift
!= 1)
2443 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2445 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2447 = gimple_build_assign (q
, RSHIFT_EXPR
, t4
,
2448 build_int_cst (itype
, post_shift
- 1));
2453 pattern_stmt
= def_stmt
;
2458 if (pre_shift
>= prec
|| post_shift
>= prec
)
2461 /* t1 = oprnd0 >> pre_shift;
2463 q = t2 >> post_shift; */
2466 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2468 = gimple_build_assign (t1
, RSHIFT_EXPR
, oprnd0
,
2469 build_int_cst (NULL
, pre_shift
));
2470 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2475 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2476 def_stmt
= gimple_build_assign (t2
, MULT_HIGHPART_EXPR
, t1
,
2477 build_int_cst (itype
, ml
));
2481 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2483 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2485 = gimple_build_assign (q
, RSHIFT_EXPR
, t2
,
2486 build_int_cst (itype
, post_shift
));
2491 pattern_stmt
= def_stmt
;
2496 unsigned HOST_WIDE_INT ml
;
2498 HOST_WIDE_INT d
= TREE_INT_CST_LOW (oprnd1
);
2499 unsigned HOST_WIDE_INT abs_d
;
2501 tree t1
, t2
, t3
, t4
;
2503 /* Give up for -1. */
2507 /* Since d might be INT_MIN, we have to cast to
2508 unsigned HOST_WIDE_INT before negating to avoid
2509 undefined signed overflow. */
2511 ? (unsigned HOST_WIDE_INT
) d
2512 : - (unsigned HOST_WIDE_INT
) d
);
2514 /* n rem d = n rem -d */
2515 if (rhs_code
== TRUNC_MOD_EXPR
&& d
< 0)
2518 oprnd1
= build_int_cst (itype
, abs_d
);
2520 else if (HOST_BITS_PER_WIDE_INT
>= prec
2521 && abs_d
== (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2522 /* This case is not handled correctly below. */
2525 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
, &post_shift
, &dummy_int
);
2526 if (ml
>= (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2529 ml
|= (~(unsigned HOST_WIDE_INT
) 0) << (prec
- 1);
2531 if (post_shift
>= prec
)
2534 /* t1 = oprnd0 h* ml; */
2535 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2536 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2537 build_int_cst (itype
, ml
));
2541 /* t2 = t1 + oprnd0; */
2542 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2543 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2544 def_stmt
= gimple_build_assign (t2
, PLUS_EXPR
, t1
, oprnd0
);
2551 /* t3 = t2 >> post_shift; */
2552 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2553 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2554 def_stmt
= gimple_build_assign (t3
, RSHIFT_EXPR
, t2
,
2555 build_int_cst (itype
, post_shift
));
2560 wide_int oprnd0_min
, oprnd0_max
;
2562 if (get_range_info (oprnd0
, &oprnd0_min
, &oprnd0_max
) == VR_RANGE
)
2564 if (!wi::neg_p (oprnd0_min
, TYPE_SIGN (itype
)))
2566 else if (wi::neg_p (oprnd0_max
, TYPE_SIGN (itype
)))
2570 if (msb
== 0 && d
>= 0)
2574 pattern_stmt
= def_stmt
;
2578 /* t4 = oprnd0 >> (prec - 1);
2579 or if we know from VRP that oprnd0 >= 0
2581 or if we know from VRP that oprnd0 < 0
2583 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2584 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2586 def_stmt
= gimple_build_assign (t4
, INTEGER_CST
,
2587 build_int_cst (itype
, msb
));
2589 def_stmt
= gimple_build_assign (t4
, RSHIFT_EXPR
, oprnd0
,
2590 build_int_cst (itype
, prec
- 1));
2591 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2593 /* q = t3 - t4; or q = t4 - t3; */
2594 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2595 pattern_stmt
= gimple_build_assign (q
, MINUS_EXPR
, d
< 0 ? t4
: t3
,
2600 if (rhs_code
== TRUNC_MOD_EXPR
)
2604 /* We divided. Now finish by:
2607 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2609 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2610 def_stmt
= gimple_build_assign (t1
, MULT_EXPR
, q
, oprnd1
);
2611 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2613 r
= vect_recog_temp_ssa_var (itype
, NULL
);
2614 pattern_stmt
= gimple_build_assign (r
, MINUS_EXPR
, oprnd0
, t1
);
2617 /* Pattern detected. */
2618 if (dump_enabled_p ())
2620 dump_printf_loc (MSG_NOTE
, vect_location
,
2621 "vect_recog_divmod_pattern: detected: ");
2622 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
2623 dump_printf (MSG_NOTE
, "\n");
2626 stmts
->safe_push (last_stmt
);
2629 *type_out
= vectype
;
2630 return pattern_stmt
;
2633 /* Function vect_recog_mixed_size_cond_pattern
2635 Try to find the following pattern:
2640 S1 a_T = x_t CMP y_t ? b_T : c_T;
2642 where type 'TYPE' is an integral type which has different size
2643 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2644 than 'type', the constants need to fit into an integer type
2645 with the same width as 'type') or results of conversion from 'type'.
2649 * LAST_STMT: A stmt from which the pattern search begins.
2653 * TYPE_IN: The type of the input arguments to the pattern.
2655 * TYPE_OUT: The type of the output of this pattern.
2657 * Return value: A new stmt that will be used to replace the pattern.
2658 Additionally a def_stmt is added.
2660 a_it = x_t CMP y_t ? b_it : c_it;
2661 a_T = (TYPE) a_it; */
2664 vect_recog_mixed_size_cond_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
2667 gimple last_stmt
= (*stmts
)[0];
2668 tree cond_expr
, then_clause
, else_clause
;
2669 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
), def_stmt_info
;
2670 tree type
, vectype
, comp_vectype
, itype
= NULL_TREE
, vecitype
;
2671 machine_mode cmpmode
;
2672 gimple pattern_stmt
, def_stmt
;
2673 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2674 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2675 tree orig_type0
= NULL_TREE
, orig_type1
= NULL_TREE
;
2676 gimple def_stmt0
= NULL
, def_stmt1
= NULL
;
2678 tree comp_scalar_type
;
2680 if (!is_gimple_assign (last_stmt
)
2681 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
2682 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
2685 cond_expr
= gimple_assign_rhs1 (last_stmt
);
2686 then_clause
= gimple_assign_rhs2 (last_stmt
);
2687 else_clause
= gimple_assign_rhs3 (last_stmt
);
2689 if (!COMPARISON_CLASS_P (cond_expr
))
2692 comp_scalar_type
= TREE_TYPE (TREE_OPERAND (cond_expr
, 0));
2693 comp_vectype
= get_vectype_for_scalar_type (comp_scalar_type
);
2694 if (comp_vectype
== NULL_TREE
)
2697 type
= gimple_expr_type (last_stmt
);
2698 if (types_compatible_p (type
, comp_scalar_type
)
2699 || ((TREE_CODE (then_clause
) != INTEGER_CST
2700 || TREE_CODE (else_clause
) != INTEGER_CST
)
2701 && !INTEGRAL_TYPE_P (comp_scalar_type
))
2702 || !INTEGRAL_TYPE_P (type
))
2705 if ((TREE_CODE (then_clause
) != INTEGER_CST
2706 && !type_conversion_p (then_clause
, last_stmt
, false, &orig_type0
,
2707 &def_stmt0
, &promotion
))
2708 || (TREE_CODE (else_clause
) != INTEGER_CST
2709 && !type_conversion_p (else_clause
, last_stmt
, false, &orig_type1
,
2710 &def_stmt1
, &promotion
)))
2713 if (orig_type0
&& orig_type1
2714 && !types_compatible_p (orig_type0
, orig_type1
))
2719 if (!types_compatible_p (orig_type0
, comp_scalar_type
))
2721 then_clause
= gimple_assign_rhs1 (def_stmt0
);
2727 if (!types_compatible_p (orig_type1
, comp_scalar_type
))
2729 else_clause
= gimple_assign_rhs1 (def_stmt1
);
2733 cmpmode
= GET_MODE_INNER (TYPE_MODE (comp_vectype
));
2735 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) == GET_MODE_BITSIZE (cmpmode
))
2738 vectype
= get_vectype_for_scalar_type (type
);
2739 if (vectype
== NULL_TREE
)
2742 if (expand_vec_cond_expr_p (vectype
, comp_vectype
))
2745 if (itype
== NULL_TREE
)
2746 itype
= build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode
),
2747 TYPE_UNSIGNED (type
));
2749 if (itype
== NULL_TREE
2750 || GET_MODE_BITSIZE (TYPE_MODE (itype
)) != GET_MODE_BITSIZE (cmpmode
))
2753 vecitype
= get_vectype_for_scalar_type (itype
);
2754 if (vecitype
== NULL_TREE
)
2757 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
))
2760 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) > GET_MODE_BITSIZE (cmpmode
))
2762 if ((TREE_CODE (then_clause
) == INTEGER_CST
2763 && !int_fits_type_p (then_clause
, itype
))
2764 || (TREE_CODE (else_clause
) == INTEGER_CST
2765 && !int_fits_type_p (else_clause
, itype
)))
2769 def_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2770 COND_EXPR
, unshare_expr (cond_expr
),
2771 fold_convert (itype
, then_clause
),
2772 fold_convert (itype
, else_clause
));
2773 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
2774 NOP_EXPR
, gimple_assign_lhs (def_stmt
));
2776 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2777 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2778 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
2779 STMT_VINFO_VECTYPE (def_stmt_info
) = vecitype
;
2780 *type_in
= vecitype
;
2781 *type_out
= vectype
;
2783 if (dump_enabled_p ())
2784 dump_printf_loc (MSG_NOTE
, vect_location
,
2785 "vect_recog_mixed_size_cond_pattern: detected:\n");
2787 return pattern_stmt
;
2791 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2792 true if bool VAR can be optimized that way. */
2795 check_bool_pattern (tree var
, loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
2798 enum vect_def_type dt
;
2800 enum tree_code rhs_code
;
2802 if (!vect_is_simple_use (var
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
, &def
,
2806 if (dt
!= vect_internal_def
)
2809 if (!is_gimple_assign (def_stmt
))
2812 if (!has_single_use (def
))
2815 rhs1
= gimple_assign_rhs1 (def_stmt
);
2816 rhs_code
= gimple_assign_rhs_code (def_stmt
);
2820 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2823 if ((TYPE_PRECISION (TREE_TYPE (rhs1
)) != 1
2824 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
2825 && TREE_CODE (TREE_TYPE (rhs1
)) != BOOLEAN_TYPE
)
2827 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2830 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2835 if (!check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
))
2837 return check_bool_pattern (gimple_assign_rhs2 (def_stmt
), loop_vinfo
,
2841 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
2843 tree vecitype
, comp_vectype
;
2845 /* If the comparison can throw, then is_gimple_condexpr will be
2846 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2847 if (stmt_could_throw_p (def_stmt
))
2850 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
2851 if (comp_vectype
== NULL_TREE
)
2854 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
2856 machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
2858 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
2859 vecitype
= get_vectype_for_scalar_type (itype
);
2860 if (vecitype
== NULL_TREE
)
2864 vecitype
= comp_vectype
;
2865 return expand_vec_cond_expr_p (vecitype
, comp_vectype
);
2872 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2873 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2874 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2877 adjust_bool_pattern_cast (tree type
, tree var
)
2879 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (SSA_NAME_DEF_STMT (var
));
2880 gimple cast_stmt
, pattern_stmt
;
2882 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
));
2883 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
2884 new_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2885 cast_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
2886 NOP_EXPR
, gimple_assign_lhs (pattern_stmt
));
2887 STMT_VINFO_RELATED_STMT (stmt_vinfo
) = cast_stmt
;
2888 return gimple_assign_lhs (cast_stmt
);
2892 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2893 recursively. VAR is an SSA_NAME that should be transformed from bool
2894 to a wider integer type, OUT_TYPE is the desired final integer type of
2895 the whole pattern, TRUEVAL should be NULL unless optimizing
2896 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2897 in the then_clause, STMTS is where statements with added pattern stmts
2898 should be pushed to. */
2901 adjust_bool_pattern (tree var
, tree out_type
, tree trueval
,
2904 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2905 enum tree_code rhs_code
, def_rhs_code
;
2906 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
2908 gimple pattern_stmt
, def_stmt
;
2910 rhs1
= gimple_assign_rhs1 (stmt
);
2911 rhs2
= gimple_assign_rhs2 (stmt
);
2912 rhs_code
= gimple_assign_rhs_code (stmt
);
2913 loc
= gimple_location (stmt
);
2918 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2919 itype
= TREE_TYPE (irhs1
);
2921 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2926 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2927 itype
= TREE_TYPE (irhs1
);
2929 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2930 BIT_XOR_EXPR
, irhs1
, build_int_cst (itype
, 1));
2934 /* Try to optimize x = y & (a < b ? 1 : 0); into
2935 x = (a < b ? y : 0);
2941 S1 a_b = x1 CMP1 y1;
2942 S2 b_b = x2 CMP2 y2;
2944 S4 d_T = (TYPE) c_b;
2946 we would normally emit:
2948 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2949 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2950 S3' c_T = a_T & b_T;
2953 but we can save one stmt by using the
2954 result of one of the COND_EXPRs in the other COND_EXPR and leave
2955 BIT_AND_EXPR stmt out:
2957 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2958 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2961 At least when VEC_COND_EXPR is implemented using masks
2962 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2963 computes the comparison masks and ands it, in one case with
2964 all ones vector, in the other case with a vector register.
2965 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2966 often more expensive. */
2967 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
2968 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
2969 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
2971 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
2972 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2973 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
2974 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
2977 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
2978 irhs2
= adjust_bool_pattern (rhs2
, out_type
, irhs1
, stmts
);
2979 tstmt
= stmts
->pop ();
2980 gcc_assert (tstmt
== def_stmt
);
2981 stmts
->quick_push (stmt
);
2982 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
2983 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
2984 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
2985 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
2989 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
2992 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
2993 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
2994 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
2996 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
2997 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
2998 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
2999 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
3002 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
3003 irhs1
= adjust_bool_pattern (rhs1
, out_type
, irhs2
, stmts
);
3004 tstmt
= stmts
->pop ();
3005 gcc_assert (tstmt
== def_stmt
);
3006 stmts
->quick_push (stmt
);
3007 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
3008 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
3009 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
3010 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
3014 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3020 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3021 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
3023 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3024 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
3026 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
3027 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
3028 int out_prec
= TYPE_PRECISION (out_type
);
3029 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
3030 irhs2
= adjust_bool_pattern_cast (TREE_TYPE (irhs1
), rhs2
);
3031 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
3032 irhs1
= adjust_bool_pattern_cast (TREE_TYPE (irhs2
), rhs1
);
3035 irhs1
= adjust_bool_pattern_cast (out_type
, rhs1
);
3036 irhs2
= adjust_bool_pattern_cast (out_type
, rhs2
);
3039 itype
= TREE_TYPE (irhs1
);
3041 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3042 rhs_code
, irhs1
, irhs2
);
3046 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
3047 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
3048 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
))
3049 || (TYPE_PRECISION (TREE_TYPE (rhs1
))
3050 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1
)))))
3052 machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
3054 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3057 itype
= TREE_TYPE (rhs1
);
3058 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
3059 if (trueval
== NULL_TREE
)
3060 trueval
= build_int_cst (itype
, 1);
3062 gcc_checking_assert (useless_type_conversion_p (itype
,
3063 TREE_TYPE (trueval
)));
3065 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3066 COND_EXPR
, cond_expr
, trueval
,
3067 build_int_cst (itype
, 0));
3071 stmts
->safe_push (stmt
);
3072 gimple_set_location (pattern_stmt
, loc
);
3073 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
3074 return gimple_assign_lhs (pattern_stmt
);
3078 /* Function vect_recog_bool_pattern
3080 Try to find pattern like following:
3082 bool a_b, b_b, c_b, d_b, e_b;
3085 S1 a_b = x1 CMP1 y1;
3086 S2 b_b = x2 CMP2 y2;
3088 S4 d_b = x3 CMP3 y3;
3090 S6 f_T = (TYPE) e_b;
3092 where type 'TYPE' is an integral type. Or a similar pattern
3095 S6 f_Y = e_b ? r_Y : s_Y;
3097 as results from if-conversion of a complex condition.
3101 * LAST_STMT: A stmt at the end from which the pattern
3102 search begins, i.e. cast of a bool to
3107 * TYPE_IN: The type of the input arguments to the pattern.
3109 * TYPE_OUT: The type of the output of this pattern.
3111 * Return value: A new stmt that will be used to replace the pattern.
3113 Assuming size of TYPE is the same as size of all comparisons
3114 (otherwise some casts would be added where needed), the above
3115 sequence we create related pattern stmts:
3116 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3117 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3118 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3119 S5' e_T = c_T | d_T;
3122 Instead of the above S3' we could emit:
3123 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3124 S3' c_T = a_T | b_T;
3125 but the above is more efficient. */
3128 vect_recog_bool_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
3131 gimple last_stmt
= stmts
->pop ();
3132 enum tree_code rhs_code
;
3133 tree var
, lhs
, rhs
, vectype
;
3134 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
3135 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
3136 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
3137 gimple pattern_stmt
;
3139 if (!is_gimple_assign (last_stmt
))
3142 var
= gimple_assign_rhs1 (last_stmt
);
3143 lhs
= gimple_assign_lhs (last_stmt
);
3145 if ((TYPE_PRECISION (TREE_TYPE (var
)) != 1
3146 || !TYPE_UNSIGNED (TREE_TYPE (var
)))
3147 && TREE_CODE (TREE_TYPE (var
)) != BOOLEAN_TYPE
)
3150 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3151 if (CONVERT_EXPR_CODE_P (rhs_code
))
3153 if (TREE_CODE (TREE_TYPE (lhs
)) != INTEGER_TYPE
3154 || TYPE_PRECISION (TREE_TYPE (lhs
)) == 1)
3156 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3157 if (vectype
== NULL_TREE
)
3160 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
3163 rhs
= adjust_bool_pattern (var
, TREE_TYPE (lhs
), NULL_TREE
, stmts
);
3164 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3165 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3166 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3169 = gimple_build_assign (lhs
, NOP_EXPR
, rhs
);
3170 *type_out
= vectype
;
3172 stmts
->safe_push (last_stmt
);
3173 if (dump_enabled_p ())
3174 dump_printf_loc (MSG_NOTE
, vect_location
,
3175 "vect_recog_bool_pattern: detected:\n");
3177 return pattern_stmt
;
3179 else if (rhs_code
== COND_EXPR
3180 && TREE_CODE (var
) == SSA_NAME
)
3182 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3183 if (vectype
== NULL_TREE
)
3186 /* Build a scalar type for the boolean result that when
3187 vectorized matches the vector type of the result in
3188 size and number of elements. */
3190 = wi::udiv_trunc (TYPE_SIZE (vectype
),
3191 TYPE_VECTOR_SUBPARTS (vectype
)).to_uhwi ();
3193 = build_nonstandard_integer_type (prec
,
3194 TYPE_UNSIGNED (TREE_TYPE (var
)));
3195 if (get_vectype_for_scalar_type (type
) == NULL_TREE
)
3198 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
3201 rhs
= adjust_bool_pattern (var
, type
, NULL_TREE
, stmts
);
3202 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3204 = gimple_build_assign (lhs
, COND_EXPR
,
3205 build2 (NE_EXPR
, boolean_type_node
,
3206 rhs
, build_int_cst (type
, 0)),
3207 gimple_assign_rhs2 (last_stmt
),
3208 gimple_assign_rhs3 (last_stmt
));
3209 *type_out
= vectype
;
3211 stmts
->safe_push (last_stmt
);
3212 if (dump_enabled_p ())
3213 dump_printf_loc (MSG_NOTE
, vect_location
,
3214 "vect_recog_bool_pattern: detected:\n");
3216 return pattern_stmt
;
3218 else if (rhs_code
== SSA_NAME
3219 && STMT_VINFO_DATA_REF (stmt_vinfo
))
3221 stmt_vec_info pattern_stmt_info
;
3222 vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
3223 gcc_assert (vectype
!= NULL_TREE
);
3224 if (!VECTOR_MODE_P (TYPE_MODE (vectype
)))
3226 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
3229 rhs
= adjust_bool_pattern (var
, TREE_TYPE (vectype
), NULL_TREE
, stmts
);
3230 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
3231 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3233 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3234 gimple cast_stmt
= gimple_build_assign (rhs2
, NOP_EXPR
, rhs
);
3235 new_pattern_def_seq (stmt_vinfo
, cast_stmt
);
3238 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3239 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
,
3241 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3242 STMT_VINFO_DATA_REF (pattern_stmt_info
)
3243 = STMT_VINFO_DATA_REF (stmt_vinfo
);
3244 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info
)
3245 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo
);
3246 STMT_VINFO_DR_INIT (pattern_stmt_info
) = STMT_VINFO_DR_INIT (stmt_vinfo
);
3247 STMT_VINFO_DR_OFFSET (pattern_stmt_info
)
3248 = STMT_VINFO_DR_OFFSET (stmt_vinfo
);
3249 STMT_VINFO_DR_STEP (pattern_stmt_info
) = STMT_VINFO_DR_STEP (stmt_vinfo
);
3250 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info
)
3251 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo
);
3252 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo
)) = pattern_stmt
;
3253 *type_out
= vectype
;
3255 stmts
->safe_push (last_stmt
);
3256 if (dump_enabled_p ())
3257 dump_printf_loc (MSG_NOTE
, vect_location
,
3258 "vect_recog_bool_pattern: detected:\n");
3259 return pattern_stmt
;
3266 /* Mark statements that are involved in a pattern. */
3269 vect_mark_pattern_stmts (gimple orig_stmt
, gimple pattern_stmt
,
3270 tree pattern_vectype
)
3272 stmt_vec_info pattern_stmt_info
, def_stmt_info
;
3273 stmt_vec_info orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
3274 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (orig_stmt_info
);
3275 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (orig_stmt_info
);
3278 pattern_stmt_info
= vinfo_for_stmt (pattern_stmt
);
3279 if (pattern_stmt_info
== NULL
)
3281 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
,
3283 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3285 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt
));
3287 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt
;
3288 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
3289 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
3290 STMT_VINFO_VECTYPE (pattern_stmt_info
) = pattern_vectype
;
3291 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
3292 STMT_VINFO_RELATED_STMT (orig_stmt_info
) = pattern_stmt
;
3293 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
)
3294 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
3295 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
))
3297 gimple_stmt_iterator si
;
3298 for (si
= gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
));
3299 !gsi_end_p (si
); gsi_next (&si
))
3301 def_stmt
= gsi_stmt (si
);
3302 def_stmt_info
= vinfo_for_stmt (def_stmt
);
3303 if (def_stmt_info
== NULL
)
3305 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
,
3307 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
3309 gimple_set_bb (def_stmt
, gimple_bb (orig_stmt
));
3310 STMT_VINFO_RELATED_STMT (def_stmt_info
) = orig_stmt
;
3311 STMT_VINFO_DEF_TYPE (def_stmt_info
) = vect_internal_def
;
3312 if (STMT_VINFO_VECTYPE (def_stmt_info
) == NULL_TREE
)
3313 STMT_VINFO_VECTYPE (def_stmt_info
) = pattern_vectype
;
3318 /* Function vect_pattern_recog_1
3321 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3322 computation pattern.
3323 STMT: A stmt from which the pattern search should start.
3325 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3326 expression that computes the same functionality and can be used to
3327 replace the sequence of stmts that are involved in the pattern.
3330 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3331 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3332 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3333 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3334 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3335 to the available target pattern.
3337 This function also does some bookkeeping, as explained in the documentation
3338 for vect_recog_pattern. */
3341 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func
,
3342 gimple_stmt_iterator si
,
3343 vec
<gimple
> *stmts_to_replace
)
3345 gimple stmt
= gsi_stmt (si
), pattern_stmt
;
3346 stmt_vec_info stmt_info
;
3347 loop_vec_info loop_vinfo
;
3348 tree pattern_vectype
;
3349 tree type_in
, type_out
;
3350 enum tree_code code
;
3354 stmts_to_replace
->truncate (0);
3355 stmts_to_replace
->quick_push (stmt
);
3356 pattern_stmt
= (* vect_recog_func
) (stmts_to_replace
, &type_in
, &type_out
);
3360 stmt
= stmts_to_replace
->last ();
3361 stmt_info
= vinfo_for_stmt (stmt
);
3362 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3364 if (VECTOR_MODE_P (TYPE_MODE (type_in
)))
3366 /* No need to check target support (already checked by the pattern
3367 recognition function). */
3368 pattern_vectype
= type_out
? type_out
: type_in
;
3372 machine_mode vec_mode
;
3373 enum insn_code icode
;
3376 /* Check target support */
3377 type_in
= get_vectype_for_scalar_type (type_in
);
3381 type_out
= get_vectype_for_scalar_type (type_out
);
3386 pattern_vectype
= type_out
;
3388 if (is_gimple_assign (pattern_stmt
))
3389 code
= gimple_assign_rhs_code (pattern_stmt
);
3392 gcc_assert (is_gimple_call (pattern_stmt
));
3396 optab
= optab_for_tree_code (code
, type_in
, optab_default
);
3397 vec_mode
= TYPE_MODE (type_in
);
3399 || (icode
= optab_handler (optab
, vec_mode
)) == CODE_FOR_nothing
3400 || (insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (type_out
)))
3404 /* Found a vectorizable pattern. */
3405 if (dump_enabled_p ())
3407 dump_printf_loc (MSG_NOTE
, vect_location
,
3408 "pattern recognized: ");
3409 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3412 /* Mark the stmts that are involved in the pattern. */
3413 vect_mark_pattern_stmts (stmt
, pattern_stmt
, pattern_vectype
);
3415 /* Patterns cannot be vectorized using SLP, because they change the order of
3418 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
, next
)
3420 LOOP_VINFO_REDUCTIONS (loop_vinfo
).ordered_remove (i
);
3422 /* It is possible that additional pattern stmts are created and inserted in
3423 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3424 relevant statements. */
3425 for (i
= 0; stmts_to_replace
->iterate (i
, &stmt
)
3426 && (unsigned) i
< (stmts_to_replace
->length () - 1);
3429 stmt_info
= vinfo_for_stmt (stmt
);
3430 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
3431 if (dump_enabled_p ())
3433 dump_printf_loc (MSG_NOTE
, vect_location
,
3434 "additional pattern stmt: ");
3435 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3438 vect_mark_pattern_stmts (stmt
, pattern_stmt
, NULL_TREE
);
3443 /* Function vect_pattern_recog
3446 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3449 Output - for each computation idiom that is detected we create a new stmt
3450 that provides the same functionality and that can be vectorized. We
3451 also record some information in the struct_stmt_info of the relevant
3452 stmts, as explained below:
3454 At the entry to this function we have the following stmts, with the
3455 following initial value in the STMT_VINFO fields:
3457 stmt in_pattern_p related_stmt vec_stmt
3458 S1: a_i = .... - - -
3459 S2: a_2 = ..use(a_i).. - - -
3460 S3: a_1 = ..use(a_2).. - - -
3461 S4: a_0 = ..use(a_1).. - - -
3462 S5: ... = ..use(a_0).. - - -
3464 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3465 represented by a single stmt. We then:
3466 - create a new stmt S6 equivalent to the pattern (the stmt is not
3467 inserted into the code)
3468 - fill in the STMT_VINFO fields as follows:
3470 in_pattern_p related_stmt vec_stmt
3471 S1: a_i = .... - - -
3472 S2: a_2 = ..use(a_i).. - - -
3473 S3: a_1 = ..use(a_2).. - - -
3474 S4: a_0 = ..use(a_1).. true S6 -
3475 '---> S6: a_new = .... - S4 -
3476 S5: ... = ..use(a_0).. - - -
3478 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3479 to each other through the RELATED_STMT field).
3481 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3482 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3483 remain irrelevant unless used by stmts other than S4.
3485 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3486 (because they are marked as irrelevant). It will vectorize S6, and record
3487 a pointer to the new vector stmt VS6 from S6 (as usual).
3488 S4 will be skipped, and S5 will be vectorized as usual:
3490 in_pattern_p related_stmt vec_stmt
3491 S1: a_i = .... - - -
3492 S2: a_2 = ..use(a_i).. - - -
3493 S3: a_1 = ..use(a_2).. - - -
3494 > VS6: va_new = .... - - -
3495 S4: a_0 = ..use(a_1).. true S6 VS6
3496 '---> S6: a_new = .... - S4 VS6
3497 > VS5: ... = ..vuse(va_new).. - - -
3498 S5: ... = ..use(a_0).. - - -
3500 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3501 elsewhere), and we'll end up with:
3504 VS5: ... = ..vuse(va_new)..
3506 In case of more than one pattern statements, e.g., widen-mult with
3510 S2 a_T = (TYPE) a_t;
3511 '--> S3: a_it = (interm_type) a_t;
3512 S4 prod_T = a_T * CONST;
3513 '--> S5: prod_T' = a_it w* CONST;
3515 there may be other users of a_T outside the pattern. In that case S2 will
3516 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3517 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3518 be recorded in S3. */
3521 vect_pattern_recog (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
3526 gimple_stmt_iterator si
;
3528 vect_recog_func_ptr vect_recog_func
;
3529 auto_vec
<gimple
, 1> stmts_to_replace
;
3532 if (dump_enabled_p ())
3533 dump_printf_loc (MSG_NOTE
, vect_location
,
3534 "=== vect_pattern_recog ===\n");
3538 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3539 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
3540 nbbs
= loop
->num_nodes
;
3544 bbs
= &BB_VINFO_BB (bb_vinfo
);
3548 /* Scan through the loop stmts, applying the pattern recognition
3549 functions starting at each stmt visited: */
3550 for (i
= 0; i
< nbbs
; i
++)
3552 basic_block bb
= bbs
[i
];
3553 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
3555 if (bb_vinfo
&& (stmt
= gsi_stmt (si
))
3556 && vinfo_for_stmt (stmt
)
3557 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
3560 /* Scan over all generic vect_recog_xxx_pattern functions. */
3561 for (j
= 0; j
< NUM_PATTERNS
; j
++)
3563 vect_recog_func
= vect_vect_recog_func_ptrs
[j
];
3564 vect_pattern_recog_1 (vect_recog_func
, si
,