1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
28 #include "fold-const.h"
29 #include "stor-layout.h"
32 #include "hard-reg-set.h"
34 #include "dominance.h"
35 #include "basic-block.h"
36 #include "gimple-pretty-print.h"
37 #include "tree-ssa-alias.h"
38 #include "internal-fn.h"
40 #include "gimple-expr.h"
43 #include "gimple-iterator.h"
44 #include "gimple-ssa.h"
45 #include "tree-phinodes.h"
46 #include "ssa-iterators.h"
47 #include "stringpool.h"
48 #include "tree-ssanames.h"
52 #include "insn-config.h"
61 #include "insn-codes.h"
64 #include "tree-data-ref.h"
65 #include "tree-vectorizer.h"
66 #include "recog.h" /* FIXME: for insn_data */
67 #include "diagnostic-core.h"
71 /* Pattern recognition functions */
72 static gimple
vect_recog_widen_sum_pattern (vec
<gimple
> *, tree
*,
74 static gimple
vect_recog_widen_mult_pattern (vec
<gimple
> *, tree
*,
76 static gimple
vect_recog_dot_prod_pattern (vec
<gimple
> *, tree
*,
78 static gimple
vect_recog_sad_pattern (vec
<gimple
> *, tree
*,
80 static gimple
vect_recog_pow_pattern (vec
<gimple
> *, tree
*, tree
*);
81 static gimple
vect_recog_over_widening_pattern (vec
<gimple
> *, tree
*,
83 static gimple
vect_recog_widen_shift_pattern (vec
<gimple
> *,
85 static gimple
vect_recog_rotate_pattern (vec
<gimple
> *, tree
*, tree
*);
86 static gimple
vect_recog_vector_vector_shift_pattern (vec
<gimple
> *,
88 static gimple
vect_recog_divmod_pattern (vec
<gimple
> *,
90 static gimple
vect_recog_mixed_size_cond_pattern (vec
<gimple
> *,
92 static gimple
vect_recog_bool_pattern (vec
<gimple
> *, tree
*, tree
*);
93 static vect_recog_func_ptr vect_vect_recog_func_ptrs
[NUM_PATTERNS
] = {
94 vect_recog_widen_mult_pattern
,
95 vect_recog_widen_sum_pattern
,
96 vect_recog_dot_prod_pattern
,
97 vect_recog_sad_pattern
,
98 vect_recog_pow_pattern
,
99 vect_recog_widen_shift_pattern
,
100 vect_recog_over_widening_pattern
,
101 vect_recog_rotate_pattern
,
102 vect_recog_vector_vector_shift_pattern
,
103 vect_recog_divmod_pattern
,
104 vect_recog_mixed_size_cond_pattern
,
105 vect_recog_bool_pattern
};
108 append_pattern_def_seq (stmt_vec_info stmt_info
, gimple stmt
)
110 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
115 new_pattern_def_seq (stmt_vec_info stmt_info
, gimple stmt
)
117 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
118 append_pattern_def_seq (stmt_info
, stmt
);
121 /* Check whether STMT2 is in the same loop or basic block as STMT1.
122 Which of the two applies depends on whether we're currently doing
123 loop-based or basic-block-based vectorization, as determined by
124 the vinfo_for_stmt for STMT1 (which must be defined).
126 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
127 to be defined as well. */
130 vect_same_loop_or_bb_p (gimple stmt1
, gimple stmt2
)
132 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt1
);
133 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
134 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
136 if (!gimple_bb (stmt2
))
141 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
142 if (!flow_bb_inside_loop_p (loop
, gimple_bb (stmt2
)))
147 if (gimple_bb (stmt2
) != BB_VINFO_BB (bb_vinfo
)
148 || gimple_code (stmt2
) == GIMPLE_PHI
)
152 gcc_assert (vinfo_for_stmt (stmt2
));
156 /* If the LHS of DEF_STMT has a single use, and that statement is
157 in the same loop or basic block, return it. */
160 vect_single_imm_use (gimple def_stmt
)
162 tree lhs
= gimple_assign_lhs (def_stmt
);
166 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
169 if (!vect_same_loop_or_bb_p (def_stmt
, use_stmt
))
175 /* Check whether NAME, an ssa-name used in USE_STMT,
176 is a result of a type promotion, such that:
177 DEF_STMT: NAME = NOP (name0)
178 If CHECK_SIGN is TRUE, check that either both types are signed or both are
182 type_conversion_p (tree name
, gimple use_stmt
, bool check_sign
,
183 tree
*orig_type
, gimple
*def_stmt
, bool *promotion
)
187 loop_vec_info loop_vinfo
;
188 stmt_vec_info stmt_vinfo
;
189 tree type
= TREE_TYPE (name
);
191 enum vect_def_type dt
;
193 bb_vec_info bb_vinfo
;
195 stmt_vinfo
= vinfo_for_stmt (use_stmt
);
196 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
197 bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
198 if (!vect_is_simple_use (name
, use_stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
202 if (dt
!= vect_internal_def
203 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
209 if (!is_gimple_assign (*def_stmt
))
212 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt
)))
215 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
217 *orig_type
= TREE_TYPE (oprnd0
);
218 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*orig_type
)
219 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*orig_type
)) && check_sign
))
222 if (TYPE_PRECISION (type
) >= (TYPE_PRECISION (*orig_type
) * 2))
227 if (!vect_is_simple_use (oprnd0
, *def_stmt
, loop_vinfo
,
228 bb_vinfo
, &dummy_gimple
, &dummy
, &dt
))
234 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
235 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
238 vect_recog_temp_ssa_var (tree type
, gimple stmt
)
240 return make_temp_ssa_name (type
, stmt
, "patt");
243 /* Function vect_recog_dot_prod_pattern
245 Try to find the following pattern:
251 sum_0 = phi <init, sum_1>
254 S3 x_T = (TYPE1) x_t;
255 S4 y_T = (TYPE1) y_t;
257 [S6 prod = (TYPE2) prod; #optional]
258 S7 sum_1 = prod + sum_0;
260 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
261 same size of 'TYPE1' or bigger. This is a special case of a reduction
266 * STMTS: Contains a stmt from which the pattern search begins. In the
267 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
272 * TYPE_IN: The type of the input arguments to the pattern.
274 * TYPE_OUT: The type of the output of this pattern.
276 * Return value: A new stmt that will be used to replace the sequence of
277 stmts that constitute the pattern. In this case it will be:
278 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
280 Note: The dot-prod idiom is a widening reduction pattern that is
281 vectorized without preserving all the intermediate results. It
282 produces only N/2 (widened) results (by summing up pairs of
283 intermediate results) rather than all N results. Therefore, we
284 cannot allow this pattern when we want to get all the results and in
285 the correct order (as is the case when this computation is in an
286 inner-loop nested in an outer-loop that us being vectorized). */
289 vect_recog_dot_prod_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
292 gimple stmt
, last_stmt
= (*stmts
)[0];
294 tree oprnd00
, oprnd01
;
295 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
296 tree type
, half_type
;
299 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
307 loop
= LOOP_VINFO_LOOP (loop_info
);
309 /* We don't allow changing the order of the computation in the inner-loop
310 when doing outer-loop vectorization. */
311 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
314 if (!is_gimple_assign (last_stmt
))
317 type
= gimple_expr_type (last_stmt
);
319 /* Look for the following pattern
323 DDPROD = (TYPE2) DPROD;
324 sum_1 = DDPROD + sum_0;
326 - DX is double the size of X
327 - DY is double the size of Y
328 - DX, DY, DPROD all have the same type
329 - sum is the same size of DPROD or bigger
330 - sum has been recognized as a reduction variable.
332 This is equivalent to:
333 DPROD = X w* Y; #widen mult
334 sum_1 = DPROD w+ sum_0; #widen summation
336 DPROD = X w* Y; #widen mult
337 sum_1 = DPROD + sum_0; #summation
340 /* Starting from LAST_STMT, follow the defs of its uses in search
341 of the above pattern. */
343 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
346 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
348 /* Has been detected as widening-summation? */
350 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
351 type
= gimple_expr_type (stmt
);
352 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
354 oprnd0
= gimple_assign_rhs1 (stmt
);
355 oprnd1
= gimple_assign_rhs2 (stmt
);
356 half_type
= TREE_TYPE (oprnd0
);
362 oprnd0
= gimple_assign_rhs1 (last_stmt
);
363 oprnd1
= gimple_assign_rhs2 (last_stmt
);
364 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
365 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
369 if (type_conversion_p (oprnd0
, stmt
, true, &half_type
, &def_stmt
,
374 oprnd0
= gimple_assign_rhs1 (stmt
);
380 /* So far so good. Since last_stmt was detected as a (summation) reduction,
381 we know that oprnd1 is the reduction variable (defined by a loop-header
382 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
383 Left to check that oprnd0 is defined by a (widen_)mult_expr */
384 if (TREE_CODE (oprnd0
) != SSA_NAME
)
387 prod_type
= half_type
;
388 stmt
= SSA_NAME_DEF_STMT (oprnd0
);
390 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
391 if (!gimple_bb (stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
394 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
395 inside the loop (in case we are analyzing an outer-loop). */
396 if (!is_gimple_assign (stmt
))
398 stmt_vinfo
= vinfo_for_stmt (stmt
);
399 gcc_assert (stmt_vinfo
);
400 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
402 if (gimple_assign_rhs_code (stmt
) != MULT_EXPR
)
404 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
406 /* Has been detected as a widening multiplication? */
408 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
409 if (gimple_assign_rhs_code (stmt
) != WIDEN_MULT_EXPR
)
411 stmt_vinfo
= vinfo_for_stmt (stmt
);
412 gcc_assert (stmt_vinfo
);
413 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_internal_def
);
414 oprnd00
= gimple_assign_rhs1 (stmt
);
415 oprnd01
= gimple_assign_rhs2 (stmt
);
416 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt
))
417 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
);
421 tree half_type0
, half_type1
;
425 oprnd0
= gimple_assign_rhs1 (stmt
);
426 oprnd1
= gimple_assign_rhs2 (stmt
);
427 if (!types_compatible_p (TREE_TYPE (oprnd0
), prod_type
)
428 || !types_compatible_p (TREE_TYPE (oprnd1
), prod_type
))
430 if (!type_conversion_p (oprnd0
, stmt
, true, &half_type0
, &def_stmt
,
434 oprnd00
= gimple_assign_rhs1 (def_stmt
);
435 if (!type_conversion_p (oprnd1
, stmt
, true, &half_type1
, &def_stmt
,
439 oprnd01
= gimple_assign_rhs1 (def_stmt
);
440 if (!types_compatible_p (half_type0
, half_type1
))
442 if (TYPE_PRECISION (prod_type
) != TYPE_PRECISION (half_type0
) * 2)
446 half_type
= TREE_TYPE (oprnd00
);
447 *type_in
= half_type
;
450 /* Pattern detected. Create a stmt to be used to replace the pattern: */
451 var
= vect_recog_temp_ssa_var (type
, NULL
);
452 pattern_stmt
= gimple_build_assign (var
, DOT_PROD_EXPR
,
453 oprnd00
, oprnd01
, oprnd1
);
455 if (dump_enabled_p ())
457 dump_printf_loc (MSG_NOTE
, vect_location
,
458 "vect_recog_dot_prod_pattern: detected: ");
459 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
460 dump_printf (MSG_NOTE
, "\n");
467 /* Function vect_recog_sad_pattern
469 Try to find the following Sum of Absolute Difference (SAD) pattern:
472 signed TYPE1 diff, abs_diff;
475 sum_0 = phi <init, sum_1>
478 S3 x_T = (TYPE1) x_t;
479 S4 y_T = (TYPE1) y_t;
481 S6 abs_diff = ABS_EXPR <diff>;
482 [S7 abs_diff = (TYPE2) abs_diff; #optional]
483 S8 sum_1 = abs_diff + sum_0;
485 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
486 same size of 'TYPE1' or bigger. This is a special case of a reduction
491 * STMTS: Contains a stmt from which the pattern search begins. In the
492 example, when this function is called with S8, the pattern
493 {S3,S4,S5,S6,S7,S8} will be detected.
497 * TYPE_IN: The type of the input arguments to the pattern.
499 * TYPE_OUT: The type of the output of this pattern.
501 * Return value: A new stmt that will be used to replace the sequence of
502 stmts that constitute the pattern. In this case it will be:
503 SAD_EXPR <x_t, y_t, sum_0>
507 vect_recog_sad_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
510 gimple last_stmt
= (*stmts
)[0];
511 tree sad_oprnd0
, sad_oprnd1
;
512 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
514 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
521 loop
= LOOP_VINFO_LOOP (loop_info
);
523 /* We don't allow changing the order of the computation in the inner-loop
524 when doing outer-loop vectorization. */
525 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
528 if (!is_gimple_assign (last_stmt
))
531 tree sum_type
= gimple_expr_type (last_stmt
);
533 /* Look for the following pattern
537 DAD = ABS_EXPR <DDIFF>;
538 DDPROD = (TYPE2) DPROD;
541 - DX is at least double the size of X
542 - DY is at least double the size of Y
543 - DX, DY, DDIFF, DAD all have the same type
544 - sum is the same size of DAD or bigger
545 - sum has been recognized as a reduction variable.
547 This is equivalent to:
548 DDIFF = X w- Y; #widen sub
549 DAD = ABS_EXPR <DDIFF>;
550 sum_1 = DAD w+ sum_0; #widen summation
552 DDIFF = X w- Y; #widen sub
553 DAD = ABS_EXPR <DDIFF>;
554 sum_1 = DAD + sum_0; #summation
557 /* Starting from LAST_STMT, follow the defs of its uses in search
558 of the above pattern. */
560 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
563 tree plus_oprnd0
, plus_oprnd1
;
565 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
567 /* Has been detected as widening-summation? */
569 gimple stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
570 sum_type
= gimple_expr_type (stmt
);
571 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
573 plus_oprnd0
= gimple_assign_rhs1 (stmt
);
574 plus_oprnd1
= gimple_assign_rhs2 (stmt
);
575 half_type
= TREE_TYPE (plus_oprnd0
);
581 plus_oprnd0
= gimple_assign_rhs1 (last_stmt
);
582 plus_oprnd1
= gimple_assign_rhs2 (last_stmt
);
583 if (!types_compatible_p (TREE_TYPE (plus_oprnd0
), sum_type
)
584 || !types_compatible_p (TREE_TYPE (plus_oprnd1
), sum_type
))
587 /* The type conversion could be promotion, demotion,
588 or just signed -> unsigned. */
589 if (type_conversion_p (plus_oprnd0
, last_stmt
, false,
590 &half_type
, &def_stmt
, &promotion
))
591 plus_oprnd0
= gimple_assign_rhs1 (def_stmt
);
593 half_type
= sum_type
;
596 /* So far so good. Since last_stmt was detected as a (summation) reduction,
597 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
598 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
599 Then check that plus_oprnd0 is defined by an abs_expr. */
601 if (TREE_CODE (plus_oprnd0
) != SSA_NAME
)
604 tree abs_type
= half_type
;
605 gimple abs_stmt
= SSA_NAME_DEF_STMT (plus_oprnd0
);
607 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
608 if (!gimple_bb (abs_stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (abs_stmt
)))
611 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
612 inside the loop (in case we are analyzing an outer-loop). */
613 if (!is_gimple_assign (abs_stmt
))
616 stmt_vec_info abs_stmt_vinfo
= vinfo_for_stmt (abs_stmt
);
617 gcc_assert (abs_stmt_vinfo
);
618 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo
) != vect_internal_def
)
620 if (gimple_assign_rhs_code (abs_stmt
) != ABS_EXPR
)
623 tree abs_oprnd
= gimple_assign_rhs1 (abs_stmt
);
624 if (!types_compatible_p (TREE_TYPE (abs_oprnd
), abs_type
))
626 if (TYPE_UNSIGNED (abs_type
))
629 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
631 if (TREE_CODE (abs_oprnd
) != SSA_NAME
)
634 gimple diff_stmt
= SSA_NAME_DEF_STMT (abs_oprnd
);
636 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
637 if (!gimple_bb (diff_stmt
)
638 || !flow_bb_inside_loop_p (loop
, gimple_bb (diff_stmt
)))
641 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
642 inside the loop (in case we are analyzing an outer-loop). */
643 if (!is_gimple_assign (diff_stmt
))
646 stmt_vec_info diff_stmt_vinfo
= vinfo_for_stmt (diff_stmt
);
647 gcc_assert (diff_stmt_vinfo
);
648 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo
) != vect_internal_def
)
650 if (gimple_assign_rhs_code (diff_stmt
) != MINUS_EXPR
)
653 tree half_type0
, half_type1
;
656 tree minus_oprnd0
= gimple_assign_rhs1 (diff_stmt
);
657 tree minus_oprnd1
= gimple_assign_rhs2 (diff_stmt
);
659 if (!types_compatible_p (TREE_TYPE (minus_oprnd0
), abs_type
)
660 || !types_compatible_p (TREE_TYPE (minus_oprnd1
), abs_type
))
662 if (!type_conversion_p (minus_oprnd0
, diff_stmt
, false,
663 &half_type0
, &def_stmt
, &promotion
)
666 sad_oprnd0
= gimple_assign_rhs1 (def_stmt
);
668 if (!type_conversion_p (minus_oprnd1
, diff_stmt
, false,
669 &half_type1
, &def_stmt
, &promotion
)
672 sad_oprnd1
= gimple_assign_rhs1 (def_stmt
);
674 if (!types_compatible_p (half_type0
, half_type1
))
676 if (TYPE_PRECISION (abs_type
) < TYPE_PRECISION (half_type0
) * 2
677 || TYPE_PRECISION (sum_type
) < TYPE_PRECISION (half_type0
) * 2)
680 *type_in
= TREE_TYPE (sad_oprnd0
);
681 *type_out
= sum_type
;
683 /* Pattern detected. Create a stmt to be used to replace the pattern: */
684 tree var
= vect_recog_temp_ssa_var (sum_type
, NULL
);
685 gimple pattern_stmt
= gimple_build_assign (var
, SAD_EXPR
, sad_oprnd0
,
686 sad_oprnd1
, plus_oprnd1
);
688 if (dump_enabled_p ())
690 dump_printf_loc (MSG_NOTE
, vect_location
,
691 "vect_recog_sad_pattern: detected: ");
692 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
693 dump_printf (MSG_NOTE
, "\n");
700 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
703 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
704 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
706 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
707 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
708 that satisfies the above restrictions, we can perform a widening opeartion
709 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
710 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
713 vect_handle_widen_op_by_const (gimple stmt
, enum tree_code code
,
714 tree const_oprnd
, tree
*oprnd
,
715 gimple
*wstmt
, tree type
,
716 tree
*half_type
, gimple def_stmt
)
718 tree new_type
, new_oprnd
;
720 if (code
!= MULT_EXPR
&& code
!= LSHIFT_EXPR
)
723 if (((code
== MULT_EXPR
&& int_fits_type_p (const_oprnd
, *half_type
))
724 || (code
== LSHIFT_EXPR
725 && compare_tree_int (const_oprnd
, TYPE_PRECISION (*half_type
))
727 && TYPE_PRECISION (type
) == (TYPE_PRECISION (*half_type
) * 2))
729 /* CONST_OPRND is a constant of HALF_TYPE. */
730 *oprnd
= gimple_assign_rhs1 (def_stmt
);
734 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (*half_type
) * 4))
737 if (!vect_same_loop_or_bb_p (stmt
, def_stmt
))
740 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
741 a type 2 times bigger than HALF_TYPE. */
742 new_type
= build_nonstandard_integer_type (TYPE_PRECISION (type
) / 2,
743 TYPE_UNSIGNED (type
));
744 if ((code
== MULT_EXPR
&& !int_fits_type_p (const_oprnd
, new_type
))
745 || (code
== LSHIFT_EXPR
746 && compare_tree_int (const_oprnd
, TYPE_PRECISION (new_type
)) == 1))
749 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
750 *oprnd
= gimple_assign_rhs1 (def_stmt
);
751 new_oprnd
= make_ssa_name (new_type
);
752 *wstmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, *oprnd
);
755 *half_type
= new_type
;
760 /* Function vect_recog_widen_mult_pattern
762 Try to find the following pattern:
766 TYPE a_T, b_T, prod_T;
772 S5 prod_T = a_T * b_T;
774 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
776 Also detect unsigned cases:
780 unsigned TYPE u_prod_T;
781 TYPE a_T, b_T, prod_T;
787 S5 prod_T = a_T * b_T;
788 S6 u_prod_T = (unsigned TYPE) prod_T;
790 and multiplication by constants:
797 S5 prod_T = a_T * CONST;
799 A special case of multiplication by constants is when 'TYPE' is 4 times
800 bigger than 'type', but CONST fits an intermediate type 2 times smaller
801 than 'TYPE'. In that case we create an additional pattern stmt for S3
802 to create a variable of the intermediate type, and perform widen-mult
803 on the intermediate type as well:
807 TYPE a_T, prod_T, prod_T';
811 '--> a_it = (interm_type) a_t;
812 S5 prod_T = a_T * CONST;
813 '--> prod_T' = a_it w* CONST;
817 * STMTS: Contains a stmt from which the pattern search begins. In the
818 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
819 is detected. In case of unsigned widen-mult, the original stmt (S5) is
820 replaced with S6 in STMTS. In case of multiplication by a constant
821 of an intermediate type (the last case above), STMTS also contains S3
822 (inserted before S5).
826 * TYPE_IN: The type of the input arguments to the pattern.
828 * TYPE_OUT: The type of the output of this pattern.
830 * Return value: A new stmt that will be used to replace the sequence of
831 stmts that constitute the pattern. In this case it will be:
832 WIDEN_MULT <a_t, b_t>
833 If the result of WIDEN_MULT needs to be converted to a larger type, the
834 returned stmt will be this type conversion stmt.
838 vect_recog_widen_mult_pattern (vec
<gimple
> *stmts
,
839 tree
*type_in
, tree
*type_out
)
841 gimple last_stmt
= stmts
->pop ();
842 gimple def_stmt0
, def_stmt1
;
844 tree type
, half_type0
, half_type1
;
845 gimple new_stmt
= NULL
, pattern_stmt
= NULL
;
846 tree vectype
, vecitype
;
848 enum tree_code dummy_code
;
854 if (!is_gimple_assign (last_stmt
))
857 type
= gimple_expr_type (last_stmt
);
859 /* Starting from LAST_STMT, follow the defs of its uses in search
860 of the above pattern. */
862 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
865 oprnd0
= gimple_assign_rhs1 (last_stmt
);
866 oprnd1
= gimple_assign_rhs2 (last_stmt
);
867 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
868 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
871 /* Check argument 0. */
872 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
876 /* Check argument 1. */
877 op1_ok
= type_conversion_p (oprnd1
, last_stmt
, false, &half_type1
,
878 &def_stmt1
, &promotion
);
880 if (op1_ok
&& promotion
)
882 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
883 oprnd1
= gimple_assign_rhs1 (def_stmt1
);
887 if (TREE_CODE (oprnd1
) == INTEGER_CST
888 && TREE_CODE (half_type0
) == INTEGER_TYPE
889 && vect_handle_widen_op_by_const (last_stmt
, MULT_EXPR
, oprnd1
,
890 &oprnd0
, &new_stmt
, type
,
891 &half_type0
, def_stmt0
))
893 half_type1
= half_type0
;
894 oprnd1
= fold_convert (half_type1
, oprnd1
);
900 /* If the two arguments have different sizes, convert the one with
901 the smaller type into the larger type. */
902 if (TYPE_PRECISION (half_type0
) != TYPE_PRECISION (half_type1
))
904 /* If we already used up the single-stmt slot give up. */
909 gimple def_stmt
= NULL
;
911 if (TYPE_PRECISION (half_type0
) < TYPE_PRECISION (half_type1
))
913 def_stmt
= def_stmt0
;
914 half_type0
= half_type1
;
919 def_stmt
= def_stmt1
;
920 half_type1
= half_type0
;
924 tree old_oprnd
= gimple_assign_rhs1 (def_stmt
);
925 tree new_oprnd
= make_ssa_name (half_type0
);
926 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, old_oprnd
);
930 /* Handle unsigned case. Look for
931 S6 u_prod_T = (unsigned TYPE) prod_T;
932 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
933 if (TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type0
))
939 if (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (half_type1
))
942 use_stmt
= vect_single_imm_use (last_stmt
);
943 if (!use_stmt
|| !is_gimple_assign (use_stmt
)
944 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
947 use_lhs
= gimple_assign_lhs (use_stmt
);
948 use_type
= TREE_TYPE (use_lhs
);
949 if (!INTEGRAL_TYPE_P (use_type
)
950 || (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (use_type
))
951 || (TYPE_PRECISION (type
) != TYPE_PRECISION (use_type
)))
955 last_stmt
= use_stmt
;
958 if (!types_compatible_p (half_type0
, half_type1
))
961 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
962 to get an intermediate result of type ITYPE. In this case we need
963 to build a statement to convert this intermediate result to type TYPE. */
965 if (TYPE_PRECISION (type
) > TYPE_PRECISION (half_type0
) * 2)
966 itype
= build_nonstandard_integer_type
967 (GET_MODE_BITSIZE (TYPE_MODE (half_type0
)) * 2,
968 TYPE_UNSIGNED (type
));
970 /* Pattern detected. */
971 if (dump_enabled_p ())
972 dump_printf_loc (MSG_NOTE
, vect_location
,
973 "vect_recog_widen_mult_pattern: detected:\n");
975 /* Check target support */
976 vectype
= get_vectype_for_scalar_type (half_type0
);
977 vecitype
= get_vectype_for_scalar_type (itype
);
980 || !supportable_widening_operation (WIDEN_MULT_EXPR
, last_stmt
,
982 &dummy_code
, &dummy_code
,
983 &dummy_int
, &dummy_vec
))
987 *type_out
= get_vectype_for_scalar_type (type
);
989 /* Pattern supported. Create a stmt to be used to replace the pattern: */
990 var
= vect_recog_temp_ssa_var (itype
, NULL
);
991 pattern_stmt
= gimple_build_assign (var
, WIDEN_MULT_EXPR
, oprnd0
, oprnd1
);
993 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
994 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
995 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
996 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
998 /* If the original two operands have different sizes, we may need to convert
999 the smaller one into the larget type. If this is the case, at this point
1000 the new stmt is already built. */
1003 append_pattern_def_seq (stmt_vinfo
, new_stmt
);
1004 stmt_vec_info new_stmt_info
1005 = new_stmt_vec_info (new_stmt
, loop_vinfo
, bb_vinfo
);
1006 set_vinfo_for_stmt (new_stmt
, new_stmt_info
);
1007 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
1010 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
1011 the result of the widen-mult operation into type TYPE. */
1014 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
1015 stmt_vec_info pattern_stmt_info
1016 = new_stmt_vec_info (pattern_stmt
, loop_vinfo
, bb_vinfo
);
1017 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
1018 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vecitype
;
1019 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
1021 gimple_assign_lhs (pattern_stmt
));
1024 if (dump_enabled_p ())
1025 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1027 stmts
->safe_push (last_stmt
);
1028 return pattern_stmt
;
1032 /* Function vect_recog_pow_pattern
1034 Try to find the following pattern:
1038 with POW being one of pow, powf, powi, powif and N being
1043 * LAST_STMT: A stmt from which the pattern search begins.
1047 * TYPE_IN: The type of the input arguments to the pattern.
1049 * TYPE_OUT: The type of the output of this pattern.
1051 * Return value: A new stmt that will be used to replace the sequence of
1052 stmts that constitute the pattern. In this case it will be:
1059 vect_recog_pow_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
1062 gimple last_stmt
= (*stmts
)[0];
1063 tree fn
, base
, exp
= NULL
;
1067 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
1070 fn
= gimple_call_fndecl (last_stmt
);
1071 if (fn
== NULL_TREE
|| DECL_BUILT_IN_CLASS (fn
) != BUILT_IN_NORMAL
)
1074 switch (DECL_FUNCTION_CODE (fn
))
1076 case BUILT_IN_POWIF
:
1080 base
= gimple_call_arg (last_stmt
, 0);
1081 exp
= gimple_call_arg (last_stmt
, 1);
1082 if (TREE_CODE (exp
) != REAL_CST
1083 && TREE_CODE (exp
) != INTEGER_CST
)
1091 /* We now have a pow or powi builtin function call with a constant
1094 *type_out
= NULL_TREE
;
1096 /* Catch squaring. */
1097 if ((tree_fits_shwi_p (exp
)
1098 && tree_to_shwi (exp
) == 2)
1099 || (TREE_CODE (exp
) == REAL_CST
1100 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconst2
)))
1102 *type_in
= TREE_TYPE (base
);
1104 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1105 stmt
= gimple_build_assign (var
, MULT_EXPR
, base
, base
);
1109 /* Catch square root. */
1110 if (TREE_CODE (exp
) == REAL_CST
1111 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconsthalf
))
1113 tree newfn
= mathfn_built_in (TREE_TYPE (base
), BUILT_IN_SQRT
);
1114 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (base
));
1117 gcall
*stmt
= gimple_build_call (newfn
, 1, base
);
1118 if (vectorizable_function (stmt
, *type_in
, *type_in
)
1121 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
1122 gimple_call_set_lhs (stmt
, var
);
1132 /* Function vect_recog_widen_sum_pattern
1134 Try to find the following pattern:
1137 TYPE x_T, sum = init;
1139 sum_0 = phi <init, sum_1>
1141 S2 x_T = (TYPE) x_t;
1142 S3 sum_1 = x_T + sum_0;
1144 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1145 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1146 a special case of a reduction computation.
1150 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1151 when this function is called with S3, the pattern {S2,S3} will be detected.
1155 * TYPE_IN: The type of the input arguments to the pattern.
1157 * TYPE_OUT: The type of the output of this pattern.
1159 * Return value: A new stmt that will be used to replace the sequence of
1160 stmts that constitute the pattern. In this case it will be:
1161 WIDEN_SUM <x_t, sum_0>
1163 Note: The widening-sum idiom is a widening reduction pattern that is
1164 vectorized without preserving all the intermediate results. It
1165 produces only N/2 (widened) results (by summing up pairs of
1166 intermediate results) rather than all N results. Therefore, we
1167 cannot allow this pattern when we want to get all the results and in
1168 the correct order (as is the case when this computation is in an
1169 inner-loop nested in an outer-loop that us being vectorized). */
1172 vect_recog_widen_sum_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
1175 gimple stmt
, last_stmt
= (*stmts
)[0];
1176 tree oprnd0
, oprnd1
;
1177 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1178 tree type
, half_type
;
1179 gimple pattern_stmt
;
1180 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1188 loop
= LOOP_VINFO_LOOP (loop_info
);
1190 /* We don't allow changing the order of the computation in the inner-loop
1191 when doing outer-loop vectorization. */
1192 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
1195 if (!is_gimple_assign (last_stmt
))
1198 type
= gimple_expr_type (last_stmt
);
1200 /* Look for the following pattern
1203 In which DX is at least double the size of X, and sum_1 has been
1204 recognized as a reduction variable.
1207 /* Starting from LAST_STMT, follow the defs of its uses in search
1208 of the above pattern. */
1210 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
1213 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1214 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1215 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
1216 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
1219 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1220 we know that oprnd1 is the reduction variable (defined by a loop-header
1221 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1222 Left to check that oprnd0 is defined by a cast from type 'type' to type
1225 if (!type_conversion_p (oprnd0
, last_stmt
, true, &half_type
, &stmt
,
1230 oprnd0
= gimple_assign_rhs1 (stmt
);
1231 *type_in
= half_type
;
1234 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1235 var
= vect_recog_temp_ssa_var (type
, NULL
);
1236 pattern_stmt
= gimple_build_assign (var
, WIDEN_SUM_EXPR
, oprnd0
, oprnd1
);
1238 if (dump_enabled_p ())
1240 dump_printf_loc (MSG_NOTE
, vect_location
,
1241 "vect_recog_widen_sum_pattern: detected: ");
1242 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1243 dump_printf (MSG_NOTE
, "\n");
1246 return pattern_stmt
;
1250 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1253 STMT - a statement to check.
1254 DEF - we support operations with two operands, one of which is constant.
1255 The other operand can be defined by a demotion operation, or by a
1256 previous statement in a sequence of over-promoted operations. In the
1257 later case DEF is used to replace that operand. (It is defined by a
1258 pattern statement we created for the previous statement in the
1262 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1263 NULL, it's the type of DEF.
1264 STMTS - additional pattern statements. If a pattern statement (type
1265 conversion) is created in this function, its original statement is
1269 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1270 operands to use in the new pattern statement for STMT (will be created
1271 in vect_recog_over_widening_pattern ()).
1272 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1273 statements for STMT: the first one is a type promotion and the second
1274 one is the operation itself. We return the type promotion statement
1275 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1276 the second pattern statement. */
1279 vect_operation_fits_smaller_type (gimple stmt
, tree def
, tree
*new_type
,
1280 tree
*op0
, tree
*op1
, gimple
*new_def_stmt
,
1283 enum tree_code code
;
1284 tree const_oprnd
, oprnd
;
1285 tree interm_type
= NULL_TREE
, half_type
, new_oprnd
, type
;
1286 gimple def_stmt
, new_stmt
;
1292 *new_def_stmt
= NULL
;
1294 if (!is_gimple_assign (stmt
))
1297 code
= gimple_assign_rhs_code (stmt
);
1298 if (code
!= LSHIFT_EXPR
&& code
!= RSHIFT_EXPR
1299 && code
!= BIT_IOR_EXPR
&& code
!= BIT_XOR_EXPR
&& code
!= BIT_AND_EXPR
)
1302 oprnd
= gimple_assign_rhs1 (stmt
);
1303 const_oprnd
= gimple_assign_rhs2 (stmt
);
1304 type
= gimple_expr_type (stmt
);
1306 if (TREE_CODE (oprnd
) != SSA_NAME
1307 || TREE_CODE (const_oprnd
) != INTEGER_CST
)
1310 /* If oprnd has other uses besides that in stmt we cannot mark it
1311 as being part of a pattern only. */
1312 if (!has_single_use (oprnd
))
1315 /* If we are in the middle of a sequence, we use DEF from a previous
1316 statement. Otherwise, OPRND has to be a result of type promotion. */
1319 half_type
= *new_type
;
1325 if (!type_conversion_p (oprnd
, stmt
, false, &half_type
, &def_stmt
,
1328 || !vect_same_loop_or_bb_p (stmt
, def_stmt
))
1332 /* Can we perform the operation on a smaller type? */
1338 if (!int_fits_type_p (const_oprnd
, half_type
))
1340 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1341 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1344 interm_type
= build_nonstandard_integer_type (
1345 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1346 if (!int_fits_type_p (const_oprnd
, interm_type
))
1353 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1354 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1357 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1358 (e.g., if the original value was char, the shift amount is at most 8
1359 if we want to use short). */
1360 if (compare_tree_int (const_oprnd
, TYPE_PRECISION (half_type
)) == 1)
1363 interm_type
= build_nonstandard_integer_type (
1364 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1366 if (!vect_supportable_shift (code
, interm_type
))
1372 if (vect_supportable_shift (code
, half_type
))
1375 /* Try intermediate type - HALF_TYPE is not supported. */
1376 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1379 interm_type
= build_nonstandard_integer_type (
1380 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1382 if (!vect_supportable_shift (code
, interm_type
))
1391 /* There are four possible cases:
1392 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1393 the first statement in the sequence)
1394 a. The original, HALF_TYPE, is not enough - we replace the promotion
1395 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1396 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1398 2. OPRND is defined by a pattern statement we created.
1399 a. Its type is not sufficient for the operation, we create a new stmt:
1400 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1401 this statement in NEW_DEF_STMT, and it is later put in
1402 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1403 b. OPRND is good to use in the new statement. */
1408 /* Replace the original type conversion HALF_TYPE->TYPE with
1409 HALF_TYPE->INTERM_TYPE. */
1410 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
1412 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
1413 /* Check if the already created pattern stmt is what we need. */
1414 if (!is_gimple_assign (new_stmt
)
1415 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt
))
1416 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != interm_type
)
1419 stmts
->safe_push (def_stmt
);
1420 oprnd
= gimple_assign_lhs (new_stmt
);
1424 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1425 oprnd
= gimple_assign_rhs1 (def_stmt
);
1426 new_oprnd
= make_ssa_name (interm_type
);
1427 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, oprnd
);
1428 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
1429 stmts
->safe_push (def_stmt
);
1435 /* Retrieve the operand before the type promotion. */
1436 oprnd
= gimple_assign_rhs1 (def_stmt
);
1443 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1444 new_oprnd
= make_ssa_name (interm_type
);
1445 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, oprnd
);
1447 *new_def_stmt
= new_stmt
;
1450 /* Otherwise, OPRND is already set. */
1454 *new_type
= interm_type
;
1456 *new_type
= half_type
;
1459 *op1
= fold_convert (*new_type
, const_oprnd
);
1465 /* Try to find a statement or a sequence of statements that can be performed
1469 TYPE x_T, res0_T, res1_T;
1472 S2 x_T = (TYPE) x_t;
1473 S3 res0_T = op (x_T, C0);
1474 S4 res1_T = op (res0_T, C1);
1475 S5 ... = () res1_T; - type demotion
1477 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1479 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1480 be 'type' or some intermediate type. For now, we expect S5 to be a type
1481 demotion operation. We also check that S3 and S4 have only one use. */
1484 vect_recog_over_widening_pattern (vec
<gimple
> *stmts
,
1485 tree
*type_in
, tree
*type_out
)
1487 gimple stmt
= stmts
->pop ();
1488 gimple pattern_stmt
= NULL
, new_def_stmt
, prev_stmt
= NULL
, use_stmt
= NULL
;
1489 tree op0
, op1
, vectype
= NULL_TREE
, use_lhs
, use_type
;
1490 tree var
= NULL_TREE
, new_type
= NULL_TREE
, new_oprnd
;
1497 if (!vinfo_for_stmt (stmt
)
1498 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt
)))
1501 new_def_stmt
= NULL
;
1502 if (!vect_operation_fits_smaller_type (stmt
, var
, &new_type
,
1503 &op0
, &op1
, &new_def_stmt
,
1512 /* STMT can be performed on a smaller type. Check its uses. */
1513 use_stmt
= vect_single_imm_use (stmt
);
1514 if (!use_stmt
|| !is_gimple_assign (use_stmt
))
1517 /* Create pattern statement for STMT. */
1518 vectype
= get_vectype_for_scalar_type (new_type
);
1522 /* We want to collect all the statements for which we create pattern
1523 statetments, except for the case when the last statement in the
1524 sequence doesn't have a corresponding pattern statement. In such
1525 case we associate the last pattern statement with the last statement
1526 in the sequence. Therefore, we only add the original statement to
1527 the list if we know that it is not the last. */
1529 stmts
->safe_push (prev_stmt
);
1531 var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1533 = gimple_build_assign (var
, gimple_assign_rhs_code (stmt
), op0
, op1
);
1534 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
1535 new_pattern_def_seq (vinfo_for_stmt (stmt
), new_def_stmt
);
1537 if (dump_enabled_p ())
1539 dump_printf_loc (MSG_NOTE
, vect_location
,
1540 "created pattern stmt: ");
1541 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1542 dump_printf (MSG_NOTE
, "\n");
1545 type
= gimple_expr_type (stmt
);
1552 /* We got a sequence. We expect it to end with a type demotion operation.
1553 Otherwise, we quit (for now). There are three possible cases: the
1554 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1555 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1556 NEW_TYPE differs (we create a new conversion statement). */
1557 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
1559 use_lhs
= gimple_assign_lhs (use_stmt
);
1560 use_type
= TREE_TYPE (use_lhs
);
1561 /* Support only type demotion or signedess change. */
1562 if (!INTEGRAL_TYPE_P (use_type
)
1563 || TYPE_PRECISION (type
) <= TYPE_PRECISION (use_type
))
1566 /* Check that NEW_TYPE is not bigger than the conversion result. */
1567 if (TYPE_PRECISION (new_type
) > TYPE_PRECISION (use_type
))
1570 if (TYPE_UNSIGNED (new_type
) != TYPE_UNSIGNED (use_type
)
1571 || TYPE_PRECISION (new_type
) != TYPE_PRECISION (use_type
))
1573 /* Create NEW_TYPE->USE_TYPE conversion. */
1574 new_oprnd
= make_ssa_name (use_type
);
1575 pattern_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, var
);
1576 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt
)) = pattern_stmt
;
1578 *type_in
= get_vectype_for_scalar_type (new_type
);
1579 *type_out
= get_vectype_for_scalar_type (use_type
);
1581 /* We created a pattern statement for the last statement in the
1582 sequence, so we don't need to associate it with the pattern
1583 statement created for PREV_STMT. Therefore, we add PREV_STMT
1584 to the list in order to mark it later in vect_pattern_recog_1. */
1586 stmts
->safe_push (prev_stmt
);
1591 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt
))
1592 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt
));
1595 *type_out
= NULL_TREE
;
1598 stmts
->safe_push (use_stmt
);
1601 /* TODO: support general case, create a conversion to the correct type. */
1604 /* Pattern detected. */
1605 if (dump_enabled_p ())
1607 dump_printf_loc (MSG_NOTE
, vect_location
,
1608 "vect_recog_over_widening_pattern: detected: ");
1609 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1610 dump_printf (MSG_NOTE
, "\n");
1613 return pattern_stmt
;
1616 /* Detect widening shift pattern:
1622 S2 a_T = (TYPE) a_t;
1623 S3 res_T = a_T << CONST;
1625 where type 'TYPE' is at least double the size of type 'type'.
1627 Also detect cases where the shift result is immediately converted
1628 to another type 'result_type' that is no larger in size than 'TYPE'.
1629 In those cases we perform a widen-shift that directly results in
1630 'result_type', to avoid a possible over-widening situation:
1634 result_type res_result;
1637 S2 a_T = (TYPE) a_t;
1638 S3 res_T = a_T << CONST;
1639 S4 res_result = (result_type) res_T;
1640 '--> res_result' = a_t w<< CONST;
1642 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1643 create an additional pattern stmt for S2 to create a variable of an
1644 intermediate type, and perform widen-shift on the intermediate type:
1648 TYPE a_T, res_T, res_T';
1651 S2 a_T = (TYPE) a_t;
1652 '--> a_it = (interm_type) a_t;
1653 S3 res_T = a_T << CONST;
1654 '--> res_T' = a_it <<* CONST;
1658 * STMTS: Contains a stmt from which the pattern search begins.
1659 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1660 in STMTS. When an intermediate type is used and a pattern statement is
1661 created for S2, we also put S2 here (before S3).
1665 * TYPE_IN: The type of the input arguments to the pattern.
1667 * TYPE_OUT: The type of the output of this pattern.
1669 * Return value: A new stmt that will be used to replace the sequence of
1670 stmts that constitute the pattern. In this case it will be:
1671 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1674 vect_recog_widen_shift_pattern (vec
<gimple
> *stmts
,
1675 tree
*type_in
, tree
*type_out
)
1677 gimple last_stmt
= stmts
->pop ();
1679 tree oprnd0
, oprnd1
;
1680 tree type
, half_type0
;
1681 gimple pattern_stmt
;
1682 tree vectype
, vectype_out
= NULL_TREE
;
1684 enum tree_code dummy_code
;
1686 vec
<tree
> dummy_vec
;
1690 if (!is_gimple_assign (last_stmt
) || !vinfo_for_stmt (last_stmt
))
1693 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt
)))
1696 if (gimple_assign_rhs_code (last_stmt
) != LSHIFT_EXPR
)
1699 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1700 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1701 if (TREE_CODE (oprnd0
) != SSA_NAME
|| TREE_CODE (oprnd1
) != INTEGER_CST
)
1704 /* Check operand 0: it has to be defined by a type promotion. */
1705 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
1710 /* Check operand 1: has to be positive. We check that it fits the type
1711 in vect_handle_widen_op_by_const (). */
1712 if (tree_int_cst_compare (oprnd1
, size_zero_node
) <= 0)
1715 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
1716 type
= gimple_expr_type (last_stmt
);
1718 /* Check for subsequent conversion to another type. */
1719 use_stmt
= vect_single_imm_use (last_stmt
);
1720 if (use_stmt
&& is_gimple_assign (use_stmt
)
1721 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
))
1722 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
1724 tree use_lhs
= gimple_assign_lhs (use_stmt
);
1725 tree use_type
= TREE_TYPE (use_lhs
);
1727 if (INTEGRAL_TYPE_P (use_type
)
1728 && TYPE_PRECISION (use_type
) <= TYPE_PRECISION (type
))
1730 last_stmt
= use_stmt
;
1735 /* Check if this a widening operation. */
1736 gimple wstmt
= NULL
;
1737 if (!vect_handle_widen_op_by_const (last_stmt
, LSHIFT_EXPR
, oprnd1
,
1739 type
, &half_type0
, def_stmt0
))
1742 /* Pattern detected. */
1743 if (dump_enabled_p ())
1744 dump_printf_loc (MSG_NOTE
, vect_location
,
1745 "vect_recog_widen_shift_pattern: detected:\n");
1747 /* Check target support. */
1748 vectype
= get_vectype_for_scalar_type (half_type0
);
1749 vectype_out
= get_vectype_for_scalar_type (type
);
1753 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR
, last_stmt
,
1754 vectype_out
, vectype
,
1755 &dummy_code
, &dummy_code
,
1756 &dummy_int
, &dummy_vec
))
1760 *type_out
= vectype_out
;
1762 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1763 var
= vect_recog_temp_ssa_var (type
, NULL
);
1765 gimple_build_assign (var
, WIDEN_LSHIFT_EXPR
, oprnd0
, oprnd1
);
1768 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1769 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1770 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1771 new_pattern_def_seq (stmt_vinfo
, wstmt
);
1772 stmt_vec_info new_stmt_info
1773 = new_stmt_vec_info (wstmt
, loop_vinfo
, bb_vinfo
);
1774 set_vinfo_for_stmt (wstmt
, new_stmt_info
);
1775 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
1778 if (dump_enabled_p ())
1779 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1781 stmts
->safe_push (last_stmt
);
1782 return pattern_stmt
;
1785 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1789 S0 a_t = b_t r<< c_t;
1793 * STMTS: Contains a stmt from which the pattern search begins,
1794 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1798 S2 e_t = d_t & (B - 1);
1799 S3 f_t = b_t << c_t;
1800 S4 g_t = b_t >> e_t;
1803 where B is element bitsize of type.
1807 * TYPE_IN: The type of the input arguments to the pattern.
1809 * TYPE_OUT: The type of the output of this pattern.
1811 * Return value: A new stmt that will be used to replace the rotate
1815 vect_recog_rotate_pattern (vec
<gimple
> *stmts
, tree
*type_in
, tree
*type_out
)
1817 gimple last_stmt
= stmts
->pop ();
1818 tree oprnd0
, oprnd1
, lhs
, var
, var1
, var2
, vectype
, type
, stype
, def
, def2
;
1819 gimple pattern_stmt
, def_stmt
;
1820 enum tree_code rhs_code
;
1821 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1822 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1823 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1824 enum vect_def_type dt
;
1825 optab optab1
, optab2
;
1826 edge ext_def
= NULL
;
1828 if (!is_gimple_assign (last_stmt
))
1831 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1841 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
1844 lhs
= gimple_assign_lhs (last_stmt
);
1845 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1846 type
= TREE_TYPE (oprnd0
);
1847 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1848 if (TREE_CODE (oprnd0
) != SSA_NAME
1849 || TYPE_PRECISION (TREE_TYPE (lhs
)) != TYPE_PRECISION (type
)
1850 || !INTEGRAL_TYPE_P (type
)
1851 || !TYPE_UNSIGNED (type
))
1854 if (!vect_is_simple_use (oprnd1
, last_stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
1858 if (dt
!= vect_internal_def
1859 && dt
!= vect_constant_def
1860 && dt
!= vect_external_def
)
1863 vectype
= get_vectype_for_scalar_type (type
);
1864 if (vectype
== NULL_TREE
)
1867 /* If vector/vector or vector/scalar rotate is supported by the target,
1868 don't do anything here. */
1869 optab1
= optab_for_tree_code (rhs_code
, vectype
, optab_vector
);
1871 && optab_handler (optab1
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1874 if (bb_vinfo
!= NULL
|| dt
!= vect_internal_def
)
1876 optab2
= optab_for_tree_code (rhs_code
, vectype
, optab_scalar
);
1878 && optab_handler (optab2
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1882 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1883 don't do anything here either. */
1884 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_vector
);
1885 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_vector
);
1887 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1889 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1891 if (bb_vinfo
== NULL
&& dt
== vect_internal_def
)
1893 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_scalar
);
1894 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_scalar
);
1896 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1898 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1903 *type_out
= vectype
;
1904 if (*type_in
== NULL_TREE
)
1907 if (dt
== vect_external_def
1908 && TREE_CODE (oprnd1
) == SSA_NAME
1911 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1912 ext_def
= loop_preheader_edge (loop
);
1913 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1
))
1915 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (oprnd1
));
1917 || !dominated_by_p (CDI_DOMINATORS
, ext_def
->dest
, bb
))
1923 if (TREE_CODE (oprnd1
) == INTEGER_CST
1924 || TYPE_MODE (TREE_TYPE (oprnd1
)) == TYPE_MODE (type
))
1926 else if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
1928 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
1929 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (type
)
1930 && TYPE_PRECISION (TREE_TYPE (rhs1
))
1931 == TYPE_PRECISION (type
))
1935 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
1936 if (def
== NULL_TREE
)
1938 def
= vect_recog_temp_ssa_var (type
, NULL
);
1939 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
1943 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1944 gcc_assert (!new_bb
);
1947 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1949 stype
= TREE_TYPE (def
);
1951 if (TREE_CODE (def
) == INTEGER_CST
)
1953 if (!tree_fits_uhwi_p (def
)
1954 || tree_to_uhwi (def
) >= GET_MODE_PRECISION (TYPE_MODE (type
))
1955 || integer_zerop (def
))
1957 def2
= build_int_cst (stype
,
1958 GET_MODE_PRECISION (TYPE_MODE (type
))
1959 - tree_to_uhwi (def
));
1963 tree vecstype
= get_vectype_for_scalar_type (stype
);
1964 stmt_vec_info def_stmt_vinfo
;
1966 if (vecstype
== NULL_TREE
)
1968 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1969 def_stmt
= gimple_build_assign (def2
, NEGATE_EXPR
, def
);
1973 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1974 gcc_assert (!new_bb
);
1978 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
1979 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1980 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
1981 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1984 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1986 = build_int_cst (stype
, GET_MODE_PRECISION (TYPE_MODE (stype
)) - 1);
1987 def_stmt
= gimple_build_assign (def2
, BIT_AND_EXPR
,
1988 gimple_assign_lhs (def_stmt
), mask
);
1992 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1993 gcc_assert (!new_bb
);
1997 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
1998 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1999 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
2000 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2004 var1
= vect_recog_temp_ssa_var (type
, NULL
);
2005 def_stmt
= gimple_build_assign (var1
, rhs_code
== LROTATE_EXPR
2006 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
2008 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2010 var2
= vect_recog_temp_ssa_var (type
, NULL
);
2011 def_stmt
= gimple_build_assign (var2
, rhs_code
== LROTATE_EXPR
2012 ? RSHIFT_EXPR
: LSHIFT_EXPR
,
2014 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2016 /* Pattern detected. */
2017 if (dump_enabled_p ())
2018 dump_printf_loc (MSG_NOTE
, vect_location
,
2019 "vect_recog_rotate_pattern: detected:\n");
2021 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2022 var
= vect_recog_temp_ssa_var (type
, NULL
);
2023 pattern_stmt
= gimple_build_assign (var
, BIT_IOR_EXPR
, var1
, var2
);
2025 if (dump_enabled_p ())
2026 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
2028 stmts
->safe_push (last_stmt
);
2029 return pattern_stmt
;
2032 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2040 S3 res_T = b_T op a_t;
2042 where type 'TYPE' is a type with different size than 'type',
2043 and op is <<, >> or rotate.
2048 TYPE b_T, c_T, res_T;
2051 S1 a_t = (type) c_T;
2053 S3 res_T = b_T op a_t;
2057 * STMTS: Contains a stmt from which the pattern search begins,
2058 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2059 with a shift/rotate which has same type on both operands, in the
2060 second case just b_T op c_T, in the first case with added cast
2061 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2065 * TYPE_IN: The type of the input arguments to the pattern.
2067 * TYPE_OUT: The type of the output of this pattern.
2069 * Return value: A new stmt that will be used to replace the shift/rotate
2073 vect_recog_vector_vector_shift_pattern (vec
<gimple
> *stmts
,
2074 tree
*type_in
, tree
*type_out
)
2076 gimple last_stmt
= stmts
->pop ();
2077 tree oprnd0
, oprnd1
, lhs
, var
;
2078 gimple pattern_stmt
, def_stmt
;
2079 enum tree_code rhs_code
;
2080 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2081 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2082 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2083 enum vect_def_type dt
;
2086 if (!is_gimple_assign (last_stmt
))
2089 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2101 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2104 lhs
= gimple_assign_lhs (last_stmt
);
2105 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2106 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2107 if (TREE_CODE (oprnd0
) != SSA_NAME
2108 || TREE_CODE (oprnd1
) != SSA_NAME
2109 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
2110 || TYPE_PRECISION (TREE_TYPE (oprnd1
))
2111 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1
)))
2112 || TYPE_PRECISION (TREE_TYPE (lhs
))
2113 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2116 if (!vect_is_simple_use (oprnd1
, last_stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
2120 if (dt
!= vect_internal_def
)
2123 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (oprnd0
));
2124 *type_out
= *type_in
;
2125 if (*type_in
== NULL_TREE
)
2129 if (gimple_assign_cast_p (def_stmt
))
2131 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
2132 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
2133 && TYPE_PRECISION (TREE_TYPE (rhs1
))
2134 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2138 if (def
== NULL_TREE
)
2140 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2141 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
2142 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2145 /* Pattern detected. */
2146 if (dump_enabled_p ())
2147 dump_printf_loc (MSG_NOTE
, vect_location
,
2148 "vect_recog_vector_vector_shift_pattern: detected:\n");
2150 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2151 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2152 pattern_stmt
= gimple_build_assign (var
, rhs_code
, oprnd0
, def
);
2154 if (dump_enabled_p ())
2155 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
2157 stmts
->safe_push (last_stmt
);
2158 return pattern_stmt
;
2161 /* Detect a signed division by a constant that wouldn't be
2162 otherwise vectorized:
2168 where type 'type' is an integral type and N is a constant.
2170 Similarly handle modulo by a constant:
2176 * STMTS: Contains a stmt from which the pattern search begins,
2177 i.e. the division stmt. S1 is replaced by if N is a power
2178 of two constant and type is signed:
2179 S3 y_t = b_t < 0 ? N - 1 : 0;
2181 S1' a_t = x_t >> log2 (N);
2183 S4 is replaced if N is a power of two constant and
2184 type is signed by (where *_T temporaries have unsigned type):
2185 S9 y_T = b_t < 0 ? -1U : 0U;
2186 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2187 S7 z_t = (type) z_T;
2189 S5 x_t = w_t & (N - 1);
2190 S4' a_t = x_t - z_t;
2194 * TYPE_IN: The type of the input arguments to the pattern.
2196 * TYPE_OUT: The type of the output of this pattern.
2198 * Return value: A new stmt that will be used to replace the division
2199 S1 or modulo S4 stmt. */
2202 vect_recog_divmod_pattern (vec
<gimple
> *stmts
,
2203 tree
*type_in
, tree
*type_out
)
2205 gimple last_stmt
= stmts
->pop ();
2206 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
2207 gimple pattern_stmt
, def_stmt
;
2208 enum tree_code rhs_code
;
2209 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2210 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2211 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2214 int dummy_int
, prec
;
2215 stmt_vec_info def_stmt_vinfo
;
2217 if (!is_gimple_assign (last_stmt
))
2220 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2223 case TRUNC_DIV_EXPR
:
2224 case TRUNC_MOD_EXPR
:
2230 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2233 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2234 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2235 itype
= TREE_TYPE (oprnd0
);
2236 if (TREE_CODE (oprnd0
) != SSA_NAME
2237 || TREE_CODE (oprnd1
) != INTEGER_CST
2238 || TREE_CODE (itype
) != INTEGER_TYPE
2239 || TYPE_PRECISION (itype
) != GET_MODE_PRECISION (TYPE_MODE (itype
)))
2242 vectype
= get_vectype_for_scalar_type (itype
);
2243 if (vectype
== NULL_TREE
)
2246 /* If the target can handle vectorized division or modulo natively,
2247 don't attempt to optimize this. */
2248 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
2249 if (optab
!= unknown_optab
)
2251 machine_mode vec_mode
= TYPE_MODE (vectype
);
2252 int icode
= (int) optab_handler (optab
, vec_mode
);
2253 if (icode
!= CODE_FOR_nothing
)
2257 prec
= TYPE_PRECISION (itype
);
2258 if (integer_pow2p (oprnd1
))
2260 if (TYPE_UNSIGNED (itype
) || tree_int_cst_sgn (oprnd1
) != 1)
2263 /* Pattern detected. */
2264 if (dump_enabled_p ())
2265 dump_printf_loc (MSG_NOTE
, vect_location
,
2266 "vect_recog_divmod_pattern: detected:\n");
2268 cond
= build2 (LT_EXPR
, boolean_type_node
, oprnd0
,
2269 build_int_cst (itype
, 0));
2270 if (rhs_code
== TRUNC_DIV_EXPR
)
2272 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
2275 = gimple_build_assign (var
, COND_EXPR
, cond
,
2276 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2277 build_int_cst (itype
, 1)),
2278 build_int_cst (itype
, 0));
2279 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2280 var
= vect_recog_temp_ssa_var (itype
, NULL
);
2282 = gimple_build_assign (var
, PLUS_EXPR
, oprnd0
,
2283 gimple_assign_lhs (def_stmt
));
2284 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2286 shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
2288 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2289 RSHIFT_EXPR
, var
, shift
);
2294 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2295 if (compare_tree_int (oprnd1
, 2) == 0)
2297 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2298 def_stmt
= gimple_build_assign (signmask
, COND_EXPR
, cond
,
2299 build_int_cst (itype
, 1),
2300 build_int_cst (itype
, 0));
2301 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2306 = build_nonstandard_integer_type (prec
, 1);
2307 tree vecutype
= get_vectype_for_scalar_type (utype
);
2309 = build_int_cst (utype
, GET_MODE_BITSIZE (TYPE_MODE (itype
))
2310 - tree_log2 (oprnd1
));
2311 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
2313 def_stmt
= gimple_build_assign (var
, COND_EXPR
, cond
,
2314 build_int_cst (utype
, -1),
2315 build_int_cst (utype
, 0));
2317 = new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2318 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2319 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2320 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2321 var
= vect_recog_temp_ssa_var (utype
, NULL
);
2322 def_stmt
= gimple_build_assign (var
, RSHIFT_EXPR
,
2323 gimple_assign_lhs (def_stmt
),
2326 = new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2327 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2328 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2329 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2330 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2332 = gimple_build_assign (signmask
, NOP_EXPR
, var
);
2333 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2336 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2337 PLUS_EXPR
, oprnd0
, signmask
);
2338 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2340 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2341 BIT_AND_EXPR
, gimple_assign_lhs (def_stmt
),
2342 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2343 build_int_cst (itype
, 1)));
2344 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2347 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2348 MINUS_EXPR
, gimple_assign_lhs (def_stmt
),
2352 if (dump_enabled_p ())
2353 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
,
2356 stmts
->safe_push (last_stmt
);
2359 *type_out
= vectype
;
2360 return pattern_stmt
;
2363 if (prec
> HOST_BITS_PER_WIDE_INT
2364 || integer_zerop (oprnd1
))
2367 if (!can_mult_highpart_p (TYPE_MODE (vectype
), TYPE_UNSIGNED (itype
)))
2370 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2372 if (TYPE_UNSIGNED (itype
))
2374 unsigned HOST_WIDE_INT mh
, ml
;
2375 int pre_shift
, post_shift
;
2376 unsigned HOST_WIDE_INT d
= (TREE_INT_CST_LOW (oprnd1
)
2377 & GET_MODE_MASK (TYPE_MODE (itype
)));
2378 tree t1
, t2
, t3
, t4
;
2380 if (d
>= ((unsigned HOST_WIDE_INT
) 1 << (prec
- 1)))
2381 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2384 /* Find a suitable multiplier and right shift count
2385 instead of multiplying with D. */
2386 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
2388 /* If the suggested multiplier is more than SIZE bits, we can do better
2389 for even divisors, using an initial right shift. */
2390 if (mh
!= 0 && (d
& 1) == 0)
2392 pre_shift
= floor_log2 (d
& -d
);
2393 mh
= choose_multiplier (d
>> pre_shift
, prec
, prec
- pre_shift
,
2394 &ml
, &post_shift
, &dummy_int
);
2402 if (post_shift
- 1 >= prec
)
2405 /* t1 = oprnd0 h* ml;
2409 q = t4 >> (post_shift - 1); */
2410 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2411 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2412 build_int_cst (itype
, ml
));
2413 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2415 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2417 = gimple_build_assign (t2
, MINUS_EXPR
, oprnd0
, t1
);
2418 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2420 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2422 = gimple_build_assign (t3
, RSHIFT_EXPR
, t2
, integer_one_node
);
2423 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2425 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2427 = gimple_build_assign (t4
, PLUS_EXPR
, t1
, t3
);
2429 if (post_shift
!= 1)
2431 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2433 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2435 = gimple_build_assign (q
, RSHIFT_EXPR
, t4
,
2436 build_int_cst (itype
, post_shift
- 1));
2441 pattern_stmt
= def_stmt
;
2446 if (pre_shift
>= prec
|| post_shift
>= prec
)
2449 /* t1 = oprnd0 >> pre_shift;
2451 q = t2 >> post_shift; */
2454 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2456 = gimple_build_assign (t1
, RSHIFT_EXPR
, oprnd0
,
2457 build_int_cst (NULL
, pre_shift
));
2458 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2463 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2464 def_stmt
= gimple_build_assign (t2
, MULT_HIGHPART_EXPR
, t1
,
2465 build_int_cst (itype
, ml
));
2469 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2471 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2473 = gimple_build_assign (q
, RSHIFT_EXPR
, t2
,
2474 build_int_cst (itype
, post_shift
));
2479 pattern_stmt
= def_stmt
;
2484 unsigned HOST_WIDE_INT ml
;
2486 HOST_WIDE_INT d
= TREE_INT_CST_LOW (oprnd1
);
2487 unsigned HOST_WIDE_INT abs_d
;
2489 tree t1
, t2
, t3
, t4
;
2491 /* Give up for -1. */
2495 /* Since d might be INT_MIN, we have to cast to
2496 unsigned HOST_WIDE_INT before negating to avoid
2497 undefined signed overflow. */
2499 ? (unsigned HOST_WIDE_INT
) d
2500 : - (unsigned HOST_WIDE_INT
) d
);
2502 /* n rem d = n rem -d */
2503 if (rhs_code
== TRUNC_MOD_EXPR
&& d
< 0)
2506 oprnd1
= build_int_cst (itype
, abs_d
);
2508 else if (HOST_BITS_PER_WIDE_INT
>= prec
2509 && abs_d
== (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2510 /* This case is not handled correctly below. */
2513 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
, &post_shift
, &dummy_int
);
2514 if (ml
>= (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2517 ml
|= (~(unsigned HOST_WIDE_INT
) 0) << (prec
- 1);
2519 if (post_shift
>= prec
)
2522 /* t1 = oprnd0 h* ml; */
2523 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2524 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2525 build_int_cst (itype
, ml
));
2529 /* t2 = t1 + oprnd0; */
2530 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2531 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2532 def_stmt
= gimple_build_assign (t2
, PLUS_EXPR
, t1
, oprnd0
);
2539 /* t3 = t2 >> post_shift; */
2540 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2541 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2542 def_stmt
= gimple_build_assign (t3
, RSHIFT_EXPR
, t2
,
2543 build_int_cst (itype
, post_shift
));
2548 wide_int oprnd0_min
, oprnd0_max
;
2550 if (get_range_info (oprnd0
, &oprnd0_min
, &oprnd0_max
) == VR_RANGE
)
2552 if (!wi::neg_p (oprnd0_min
, TYPE_SIGN (itype
)))
2554 else if (wi::neg_p (oprnd0_max
, TYPE_SIGN (itype
)))
2558 if (msb
== 0 && d
>= 0)
2562 pattern_stmt
= def_stmt
;
2566 /* t4 = oprnd0 >> (prec - 1);
2567 or if we know from VRP that oprnd0 >= 0
2569 or if we know from VRP that oprnd0 < 0
2571 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2572 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2574 def_stmt
= gimple_build_assign (t4
, INTEGER_CST
,
2575 build_int_cst (itype
, msb
));
2577 def_stmt
= gimple_build_assign (t4
, RSHIFT_EXPR
, oprnd0
,
2578 build_int_cst (itype
, prec
- 1));
2579 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2581 /* q = t3 - t4; or q = t4 - t3; */
2582 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2583 pattern_stmt
= gimple_build_assign (q
, MINUS_EXPR
, d
< 0 ? t4
: t3
,
2588 if (rhs_code
== TRUNC_MOD_EXPR
)
2592 /* We divided. Now finish by:
2595 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2597 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2598 def_stmt
= gimple_build_assign (t1
, MULT_EXPR
, q
, oprnd1
);
2599 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2601 r
= vect_recog_temp_ssa_var (itype
, NULL
);
2602 pattern_stmt
= gimple_build_assign (r
, MINUS_EXPR
, oprnd0
, t1
);
2605 /* Pattern detected. */
2606 if (dump_enabled_p ())
2608 dump_printf_loc (MSG_NOTE
, vect_location
,
2609 "vect_recog_divmod_pattern: detected: ");
2610 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
2611 dump_printf (MSG_NOTE
, "\n");
2614 stmts
->safe_push (last_stmt
);
2617 *type_out
= vectype
;
2618 return pattern_stmt
;
2621 /* Function vect_recog_mixed_size_cond_pattern
2623 Try to find the following pattern:
2628 S1 a_T = x_t CMP y_t ? b_T : c_T;
2630 where type 'TYPE' is an integral type which has different size
2631 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2632 than 'type', the constants need to fit into an integer type
2633 with the same width as 'type') or results of conversion from 'type'.
2637 * LAST_STMT: A stmt from which the pattern search begins.
2641 * TYPE_IN: The type of the input arguments to the pattern.
2643 * TYPE_OUT: The type of the output of this pattern.
2645 * Return value: A new stmt that will be used to replace the pattern.
2646 Additionally a def_stmt is added.
2648 a_it = x_t CMP y_t ? b_it : c_it;
2649 a_T = (TYPE) a_it; */
2652 vect_recog_mixed_size_cond_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
2655 gimple last_stmt
= (*stmts
)[0];
2656 tree cond_expr
, then_clause
, else_clause
;
2657 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
), def_stmt_info
;
2658 tree type
, vectype
, comp_vectype
, itype
= NULL_TREE
, vecitype
;
2659 machine_mode cmpmode
;
2660 gimple pattern_stmt
, def_stmt
;
2661 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2662 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2663 tree orig_type0
= NULL_TREE
, orig_type1
= NULL_TREE
;
2664 gimple def_stmt0
= NULL
, def_stmt1
= NULL
;
2666 tree comp_scalar_type
;
2668 if (!is_gimple_assign (last_stmt
)
2669 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
2670 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
2673 cond_expr
= gimple_assign_rhs1 (last_stmt
);
2674 then_clause
= gimple_assign_rhs2 (last_stmt
);
2675 else_clause
= gimple_assign_rhs3 (last_stmt
);
2677 if (!COMPARISON_CLASS_P (cond_expr
))
2680 comp_scalar_type
= TREE_TYPE (TREE_OPERAND (cond_expr
, 0));
2681 comp_vectype
= get_vectype_for_scalar_type (comp_scalar_type
);
2682 if (comp_vectype
== NULL_TREE
)
2685 type
= gimple_expr_type (last_stmt
);
2686 if (types_compatible_p (type
, comp_scalar_type
)
2687 || ((TREE_CODE (then_clause
) != INTEGER_CST
2688 || TREE_CODE (else_clause
) != INTEGER_CST
)
2689 && !INTEGRAL_TYPE_P (comp_scalar_type
))
2690 || !INTEGRAL_TYPE_P (type
))
2693 if ((TREE_CODE (then_clause
) != INTEGER_CST
2694 && !type_conversion_p (then_clause
, last_stmt
, false, &orig_type0
,
2695 &def_stmt0
, &promotion
))
2696 || (TREE_CODE (else_clause
) != INTEGER_CST
2697 && !type_conversion_p (else_clause
, last_stmt
, false, &orig_type1
,
2698 &def_stmt1
, &promotion
)))
2701 if (orig_type0
&& orig_type1
2702 && !types_compatible_p (orig_type0
, orig_type1
))
2707 if (!types_compatible_p (orig_type0
, comp_scalar_type
))
2709 then_clause
= gimple_assign_rhs1 (def_stmt0
);
2715 if (!types_compatible_p (orig_type1
, comp_scalar_type
))
2717 else_clause
= gimple_assign_rhs1 (def_stmt1
);
2721 cmpmode
= GET_MODE_INNER (TYPE_MODE (comp_vectype
));
2723 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) == GET_MODE_BITSIZE (cmpmode
))
2726 vectype
= get_vectype_for_scalar_type (type
);
2727 if (vectype
== NULL_TREE
)
2730 if (expand_vec_cond_expr_p (vectype
, comp_vectype
))
2733 if (itype
== NULL_TREE
)
2734 itype
= build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode
),
2735 TYPE_UNSIGNED (type
));
2737 if (itype
== NULL_TREE
2738 || GET_MODE_BITSIZE (TYPE_MODE (itype
)) != GET_MODE_BITSIZE (cmpmode
))
2741 vecitype
= get_vectype_for_scalar_type (itype
);
2742 if (vecitype
== NULL_TREE
)
2745 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
))
2748 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) > GET_MODE_BITSIZE (cmpmode
))
2750 if ((TREE_CODE (then_clause
) == INTEGER_CST
2751 && !int_fits_type_p (then_clause
, itype
))
2752 || (TREE_CODE (else_clause
) == INTEGER_CST
2753 && !int_fits_type_p (else_clause
, itype
)))
2757 def_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2758 COND_EXPR
, unshare_expr (cond_expr
),
2759 fold_convert (itype
, then_clause
),
2760 fold_convert (itype
, else_clause
));
2761 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
2762 NOP_EXPR
, gimple_assign_lhs (def_stmt
));
2764 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2765 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2766 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
2767 STMT_VINFO_VECTYPE (def_stmt_info
) = vecitype
;
2768 *type_in
= vecitype
;
2769 *type_out
= vectype
;
2771 if (dump_enabled_p ())
2772 dump_printf_loc (MSG_NOTE
, vect_location
,
2773 "vect_recog_mixed_size_cond_pattern: detected:\n");
2775 return pattern_stmt
;
2779 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2780 true if bool VAR can be optimized that way. */
2783 check_bool_pattern (tree var
, loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
2786 enum vect_def_type dt
;
2788 enum tree_code rhs_code
;
2790 if (!vect_is_simple_use (var
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
, &def
,
2794 if (dt
!= vect_internal_def
)
2797 if (!is_gimple_assign (def_stmt
))
2800 if (!has_single_use (def
))
2803 rhs1
= gimple_assign_rhs1 (def_stmt
);
2804 rhs_code
= gimple_assign_rhs_code (def_stmt
);
2808 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2811 if ((TYPE_PRECISION (TREE_TYPE (rhs1
)) != 1
2812 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
2813 && TREE_CODE (TREE_TYPE (rhs1
)) != BOOLEAN_TYPE
)
2815 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2818 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2823 if (!check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
))
2825 return check_bool_pattern (gimple_assign_rhs2 (def_stmt
), loop_vinfo
,
2829 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
2831 tree vecitype
, comp_vectype
;
2833 /* If the comparison can throw, then is_gimple_condexpr will be
2834 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2835 if (stmt_could_throw_p (def_stmt
))
2838 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
2839 if (comp_vectype
== NULL_TREE
)
2842 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
2844 machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
2846 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
2847 vecitype
= get_vectype_for_scalar_type (itype
);
2848 if (vecitype
== NULL_TREE
)
2852 vecitype
= comp_vectype
;
2853 return expand_vec_cond_expr_p (vecitype
, comp_vectype
);
2860 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2861 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2862 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2865 adjust_bool_pattern_cast (tree type
, tree var
)
2867 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (SSA_NAME_DEF_STMT (var
));
2868 gimple cast_stmt
, pattern_stmt
;
2870 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
));
2871 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
2872 new_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2873 cast_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
2874 NOP_EXPR
, gimple_assign_lhs (pattern_stmt
));
2875 STMT_VINFO_RELATED_STMT (stmt_vinfo
) = cast_stmt
;
2876 return gimple_assign_lhs (cast_stmt
);
2880 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2881 recursively. VAR is an SSA_NAME that should be transformed from bool
2882 to a wider integer type, OUT_TYPE is the desired final integer type of
2883 the whole pattern, TRUEVAL should be NULL unless optimizing
2884 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2885 in the then_clause, STMTS is where statements with added pattern stmts
2886 should be pushed to. */
2889 adjust_bool_pattern (tree var
, tree out_type
, tree trueval
,
2892 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2893 enum tree_code rhs_code
, def_rhs_code
;
2894 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
2896 gimple pattern_stmt
, def_stmt
;
2898 rhs1
= gimple_assign_rhs1 (stmt
);
2899 rhs2
= gimple_assign_rhs2 (stmt
);
2900 rhs_code
= gimple_assign_rhs_code (stmt
);
2901 loc
= gimple_location (stmt
);
2906 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2907 itype
= TREE_TYPE (irhs1
);
2909 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2914 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2915 itype
= TREE_TYPE (irhs1
);
2917 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2918 BIT_XOR_EXPR
, irhs1
, build_int_cst (itype
, 1));
2922 /* Try to optimize x = y & (a < b ? 1 : 0); into
2923 x = (a < b ? y : 0);
2929 S1 a_b = x1 CMP1 y1;
2930 S2 b_b = x2 CMP2 y2;
2932 S4 d_T = (TYPE) c_b;
2934 we would normally emit:
2936 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2937 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2938 S3' c_T = a_T & b_T;
2941 but we can save one stmt by using the
2942 result of one of the COND_EXPRs in the other COND_EXPR and leave
2943 BIT_AND_EXPR stmt out:
2945 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2946 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2949 At least when VEC_COND_EXPR is implemented using masks
2950 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2951 computes the comparison masks and ands it, in one case with
2952 all ones vector, in the other case with a vector register.
2953 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2954 often more expensive. */
2955 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
2956 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
2957 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
2959 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
2960 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2961 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
2962 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
2965 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
2966 irhs2
= adjust_bool_pattern (rhs2
, out_type
, irhs1
, stmts
);
2967 tstmt
= stmts
->pop ();
2968 gcc_assert (tstmt
== def_stmt
);
2969 stmts
->quick_push (stmt
);
2970 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
2971 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
2972 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
2973 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
2977 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
2980 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
2981 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
2982 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
2984 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
2985 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
2986 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
2987 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
2990 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
2991 irhs1
= adjust_bool_pattern (rhs1
, out_type
, irhs2
, stmts
);
2992 tstmt
= stmts
->pop ();
2993 gcc_assert (tstmt
== def_stmt
);
2994 stmts
->quick_push (stmt
);
2995 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
2996 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
2997 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
2998 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
3002 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3008 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3009 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
3011 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3012 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
3014 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
3015 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
3016 int out_prec
= TYPE_PRECISION (out_type
);
3017 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
3018 irhs2
= adjust_bool_pattern_cast (TREE_TYPE (irhs1
), rhs2
);
3019 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
3020 irhs1
= adjust_bool_pattern_cast (TREE_TYPE (irhs2
), rhs1
);
3023 irhs1
= adjust_bool_pattern_cast (out_type
, rhs1
);
3024 irhs2
= adjust_bool_pattern_cast (out_type
, rhs2
);
3027 itype
= TREE_TYPE (irhs1
);
3029 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3030 rhs_code
, irhs1
, irhs2
);
3034 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
3035 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
3036 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
))
3037 || (TYPE_PRECISION (TREE_TYPE (rhs1
))
3038 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1
)))))
3040 machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
3042 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3045 itype
= TREE_TYPE (rhs1
);
3046 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
3047 if (trueval
== NULL_TREE
)
3048 trueval
= build_int_cst (itype
, 1);
3050 gcc_checking_assert (useless_type_conversion_p (itype
,
3051 TREE_TYPE (trueval
)));
3053 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3054 COND_EXPR
, cond_expr
, trueval
,
3055 build_int_cst (itype
, 0));
3059 stmts
->safe_push (stmt
);
3060 gimple_set_location (pattern_stmt
, loc
);
3061 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
3062 return gimple_assign_lhs (pattern_stmt
);
3066 /* Function vect_recog_bool_pattern
3068 Try to find pattern like following:
3070 bool a_b, b_b, c_b, d_b, e_b;
3073 S1 a_b = x1 CMP1 y1;
3074 S2 b_b = x2 CMP2 y2;
3076 S4 d_b = x3 CMP3 y3;
3078 S6 f_T = (TYPE) e_b;
3080 where type 'TYPE' is an integral type. Or a similar pattern
3083 S6 f_Y = e_b ? r_Y : s_Y;
3085 as results from if-conversion of a complex condition.
3089 * LAST_STMT: A stmt at the end from which the pattern
3090 search begins, i.e. cast of a bool to
3095 * TYPE_IN: The type of the input arguments to the pattern.
3097 * TYPE_OUT: The type of the output of this pattern.
3099 * Return value: A new stmt that will be used to replace the pattern.
3101 Assuming size of TYPE is the same as size of all comparisons
3102 (otherwise some casts would be added where needed), the above
3103 sequence we create related pattern stmts:
3104 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3105 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3106 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3107 S5' e_T = c_T | d_T;
3110 Instead of the above S3' we could emit:
3111 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3112 S3' c_T = a_T | b_T;
3113 but the above is more efficient. */
3116 vect_recog_bool_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
3119 gimple last_stmt
= stmts
->pop ();
3120 enum tree_code rhs_code
;
3121 tree var
, lhs
, rhs
, vectype
;
3122 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
3123 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
3124 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
3125 gimple pattern_stmt
;
3127 if (!is_gimple_assign (last_stmt
))
3130 var
= gimple_assign_rhs1 (last_stmt
);
3131 lhs
= gimple_assign_lhs (last_stmt
);
3133 if ((TYPE_PRECISION (TREE_TYPE (var
)) != 1
3134 || !TYPE_UNSIGNED (TREE_TYPE (var
)))
3135 && TREE_CODE (TREE_TYPE (var
)) != BOOLEAN_TYPE
)
3138 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3139 if (CONVERT_EXPR_CODE_P (rhs_code
))
3141 if (TREE_CODE (TREE_TYPE (lhs
)) != INTEGER_TYPE
3142 || TYPE_PRECISION (TREE_TYPE (lhs
)) == 1)
3144 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3145 if (vectype
== NULL_TREE
)
3148 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
3151 rhs
= adjust_bool_pattern (var
, TREE_TYPE (lhs
), NULL_TREE
, stmts
);
3152 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3153 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3154 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3157 = gimple_build_assign (lhs
, NOP_EXPR
, rhs
);
3158 *type_out
= vectype
;
3160 stmts
->safe_push (last_stmt
);
3161 if (dump_enabled_p ())
3162 dump_printf_loc (MSG_NOTE
, vect_location
,
3163 "vect_recog_bool_pattern: detected:\n");
3165 return pattern_stmt
;
3167 else if (rhs_code
== COND_EXPR
3168 && TREE_CODE (var
) == SSA_NAME
)
3170 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3171 if (vectype
== NULL_TREE
)
3174 /* Build a scalar type for the boolean result that when
3175 vectorized matches the vector type of the result in
3176 size and number of elements. */
3178 = wi::udiv_trunc (TYPE_SIZE (vectype
),
3179 TYPE_VECTOR_SUBPARTS (vectype
)).to_uhwi ();
3181 = build_nonstandard_integer_type (prec
,
3182 TYPE_UNSIGNED (TREE_TYPE (var
)));
3183 if (get_vectype_for_scalar_type (type
) == NULL_TREE
)
3186 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
3189 rhs
= adjust_bool_pattern (var
, type
, NULL_TREE
, stmts
);
3190 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3192 = gimple_build_assign (lhs
, COND_EXPR
,
3193 build2 (NE_EXPR
, boolean_type_node
,
3194 rhs
, build_int_cst (type
, 0)),
3195 gimple_assign_rhs2 (last_stmt
),
3196 gimple_assign_rhs3 (last_stmt
));
3197 *type_out
= vectype
;
3199 stmts
->safe_push (last_stmt
);
3200 if (dump_enabled_p ())
3201 dump_printf_loc (MSG_NOTE
, vect_location
,
3202 "vect_recog_bool_pattern: detected:\n");
3204 return pattern_stmt
;
3206 else if (rhs_code
== SSA_NAME
3207 && STMT_VINFO_DATA_REF (stmt_vinfo
))
3209 stmt_vec_info pattern_stmt_info
;
3210 vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
3211 gcc_assert (vectype
!= NULL_TREE
);
3212 if (!VECTOR_MODE_P (TYPE_MODE (vectype
)))
3214 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
3217 rhs
= adjust_bool_pattern (var
, TREE_TYPE (vectype
), NULL_TREE
, stmts
);
3218 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
3219 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3221 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3222 gimple cast_stmt
= gimple_build_assign (rhs2
, NOP_EXPR
, rhs
);
3223 new_pattern_def_seq (stmt_vinfo
, cast_stmt
);
3226 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3227 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
,
3229 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3230 STMT_VINFO_DATA_REF (pattern_stmt_info
)
3231 = STMT_VINFO_DATA_REF (stmt_vinfo
);
3232 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info
)
3233 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo
);
3234 STMT_VINFO_DR_INIT (pattern_stmt_info
) = STMT_VINFO_DR_INIT (stmt_vinfo
);
3235 STMT_VINFO_DR_OFFSET (pattern_stmt_info
)
3236 = STMT_VINFO_DR_OFFSET (stmt_vinfo
);
3237 STMT_VINFO_DR_STEP (pattern_stmt_info
) = STMT_VINFO_DR_STEP (stmt_vinfo
);
3238 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info
)
3239 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo
);
3240 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo
)) = pattern_stmt
;
3241 *type_out
= vectype
;
3243 stmts
->safe_push (last_stmt
);
3244 if (dump_enabled_p ())
3245 dump_printf_loc (MSG_NOTE
, vect_location
,
3246 "vect_recog_bool_pattern: detected:\n");
3247 return pattern_stmt
;
3254 /* Mark statements that are involved in a pattern. */
3257 vect_mark_pattern_stmts (gimple orig_stmt
, gimple pattern_stmt
,
3258 tree pattern_vectype
)
3260 stmt_vec_info pattern_stmt_info
, def_stmt_info
;
3261 stmt_vec_info orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
3262 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (orig_stmt_info
);
3263 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (orig_stmt_info
);
3266 pattern_stmt_info
= vinfo_for_stmt (pattern_stmt
);
3267 if (pattern_stmt_info
== NULL
)
3269 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
,
3271 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3273 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt
));
3275 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt
;
3276 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
3277 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
3278 STMT_VINFO_VECTYPE (pattern_stmt_info
) = pattern_vectype
;
3279 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
3280 STMT_VINFO_RELATED_STMT (orig_stmt_info
) = pattern_stmt
;
3281 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
)
3282 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
3283 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
))
3285 gimple_stmt_iterator si
;
3286 for (si
= gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
));
3287 !gsi_end_p (si
); gsi_next (&si
))
3289 def_stmt
= gsi_stmt (si
);
3290 def_stmt_info
= vinfo_for_stmt (def_stmt
);
3291 if (def_stmt_info
== NULL
)
3293 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
,
3295 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
3297 gimple_set_bb (def_stmt
, gimple_bb (orig_stmt
));
3298 STMT_VINFO_RELATED_STMT (def_stmt_info
) = orig_stmt
;
3299 STMT_VINFO_DEF_TYPE (def_stmt_info
) = vect_internal_def
;
3300 if (STMT_VINFO_VECTYPE (def_stmt_info
) == NULL_TREE
)
3301 STMT_VINFO_VECTYPE (def_stmt_info
) = pattern_vectype
;
3306 /* Function vect_pattern_recog_1
3309 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3310 computation pattern.
3311 STMT: A stmt from which the pattern search should start.
3313 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3314 expression that computes the same functionality and can be used to
3315 replace the sequence of stmts that are involved in the pattern.
3318 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3319 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3320 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3321 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3322 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3323 to the available target pattern.
3325 This function also does some bookkeeping, as explained in the documentation
3326 for vect_recog_pattern. */
3329 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func
,
3330 gimple_stmt_iterator si
,
3331 vec
<gimple
> *stmts_to_replace
)
3333 gimple stmt
= gsi_stmt (si
), pattern_stmt
;
3334 stmt_vec_info stmt_info
;
3335 loop_vec_info loop_vinfo
;
3336 tree pattern_vectype
;
3337 tree type_in
, type_out
;
3338 enum tree_code code
;
3342 stmts_to_replace
->truncate (0);
3343 stmts_to_replace
->quick_push (stmt
);
3344 pattern_stmt
= (* vect_recog_func
) (stmts_to_replace
, &type_in
, &type_out
);
3348 stmt
= stmts_to_replace
->last ();
3349 stmt_info
= vinfo_for_stmt (stmt
);
3350 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3352 if (VECTOR_MODE_P (TYPE_MODE (type_in
)))
3354 /* No need to check target support (already checked by the pattern
3355 recognition function). */
3356 pattern_vectype
= type_out
? type_out
: type_in
;
3360 machine_mode vec_mode
;
3361 enum insn_code icode
;
3364 /* Check target support */
3365 type_in
= get_vectype_for_scalar_type (type_in
);
3369 type_out
= get_vectype_for_scalar_type (type_out
);
3374 pattern_vectype
= type_out
;
3376 if (is_gimple_assign (pattern_stmt
))
3377 code
= gimple_assign_rhs_code (pattern_stmt
);
3380 gcc_assert (is_gimple_call (pattern_stmt
));
3384 optab
= optab_for_tree_code (code
, type_in
, optab_default
);
3385 vec_mode
= TYPE_MODE (type_in
);
3387 || (icode
= optab_handler (optab
, vec_mode
)) == CODE_FOR_nothing
3388 || (insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (type_out
)))
3392 /* Found a vectorizable pattern. */
3393 if (dump_enabled_p ())
3395 dump_printf_loc (MSG_NOTE
, vect_location
,
3396 "pattern recognized: ");
3397 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3400 /* Mark the stmts that are involved in the pattern. */
3401 vect_mark_pattern_stmts (stmt
, pattern_stmt
, pattern_vectype
);
3403 /* Patterns cannot be vectorized using SLP, because they change the order of
3406 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
, next
)
3408 LOOP_VINFO_REDUCTIONS (loop_vinfo
).ordered_remove (i
);
3410 /* It is possible that additional pattern stmts are created and inserted in
3411 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3412 relevant statements. */
3413 for (i
= 0; stmts_to_replace
->iterate (i
, &stmt
)
3414 && (unsigned) i
< (stmts_to_replace
->length () - 1);
3417 stmt_info
= vinfo_for_stmt (stmt
);
3418 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
3419 if (dump_enabled_p ())
3421 dump_printf_loc (MSG_NOTE
, vect_location
,
3422 "additional pattern stmt: ");
3423 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3426 vect_mark_pattern_stmts (stmt
, pattern_stmt
, NULL_TREE
);
3431 /* Function vect_pattern_recog
3434 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3437 Output - for each computation idiom that is detected we create a new stmt
3438 that provides the same functionality and that can be vectorized. We
3439 also record some information in the struct_stmt_info of the relevant
3440 stmts, as explained below:
3442 At the entry to this function we have the following stmts, with the
3443 following initial value in the STMT_VINFO fields:
3445 stmt in_pattern_p related_stmt vec_stmt
3446 S1: a_i = .... - - -
3447 S2: a_2 = ..use(a_i).. - - -
3448 S3: a_1 = ..use(a_2).. - - -
3449 S4: a_0 = ..use(a_1).. - - -
3450 S5: ... = ..use(a_0).. - - -
3452 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3453 represented by a single stmt. We then:
3454 - create a new stmt S6 equivalent to the pattern (the stmt is not
3455 inserted into the code)
3456 - fill in the STMT_VINFO fields as follows:
3458 in_pattern_p related_stmt vec_stmt
3459 S1: a_i = .... - - -
3460 S2: a_2 = ..use(a_i).. - - -
3461 S3: a_1 = ..use(a_2).. - - -
3462 S4: a_0 = ..use(a_1).. true S6 -
3463 '---> S6: a_new = .... - S4 -
3464 S5: ... = ..use(a_0).. - - -
3466 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3467 to each other through the RELATED_STMT field).
3469 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3470 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3471 remain irrelevant unless used by stmts other than S4.
3473 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3474 (because they are marked as irrelevant). It will vectorize S6, and record
3475 a pointer to the new vector stmt VS6 from S6 (as usual).
3476 S4 will be skipped, and S5 will be vectorized as usual:
3478 in_pattern_p related_stmt vec_stmt
3479 S1: a_i = .... - - -
3480 S2: a_2 = ..use(a_i).. - - -
3481 S3: a_1 = ..use(a_2).. - - -
3482 > VS6: va_new = .... - - -
3483 S4: a_0 = ..use(a_1).. true S6 VS6
3484 '---> S6: a_new = .... - S4 VS6
3485 > VS5: ... = ..vuse(va_new).. - - -
3486 S5: ... = ..use(a_0).. - - -
3488 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3489 elsewhere), and we'll end up with:
3492 VS5: ... = ..vuse(va_new)..
3494 In case of more than one pattern statements, e.g., widen-mult with
3498 S2 a_T = (TYPE) a_t;
3499 '--> S3: a_it = (interm_type) a_t;
3500 S4 prod_T = a_T * CONST;
3501 '--> S5: prod_T' = a_it w* CONST;
3503 there may be other users of a_T outside the pattern. In that case S2 will
3504 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3505 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3506 be recorded in S3. */
3509 vect_pattern_recog (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
3514 gimple_stmt_iterator si
;
3516 vect_recog_func_ptr vect_recog_func
;
3517 auto_vec
<gimple
, 1> stmts_to_replace
;
3520 if (dump_enabled_p ())
3521 dump_printf_loc (MSG_NOTE
, vect_location
,
3522 "=== vect_pattern_recog ===\n");
3526 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3527 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
3528 nbbs
= loop
->num_nodes
;
3532 bbs
= &BB_VINFO_BB (bb_vinfo
);
3536 /* Scan through the loop stmts, applying the pattern recognition
3537 functions starting at each stmt visited: */
3538 for (i
= 0; i
< nbbs
; i
++)
3540 basic_block bb
= bbs
[i
];
3541 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
3543 if (bb_vinfo
&& (stmt
= gsi_stmt (si
))
3544 && vinfo_for_stmt (stmt
)
3545 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
3548 /* Scan over all generic vect_recog_xxx_pattern functions. */
3549 for (j
= 0; j
< NUM_PATTERNS
; j
++)
3551 vect_recog_func
= vect_vect_recog_func_ptrs
[j
];
3552 vect_pattern_recog_1 (vect_recog_func
, si
,