1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
28 #include "double-int.h"
35 #include "fold-const.h"
36 #include "stor-layout.h"
39 #include "hard-reg-set.h"
41 #include "dominance.h"
42 #include "basic-block.h"
43 #include "gimple-pretty-print.h"
44 #include "tree-ssa-alias.h"
45 #include "internal-fn.h"
47 #include "gimple-expr.h"
51 #include "gimple-iterator.h"
52 #include "gimple-ssa.h"
53 #include "tree-phinodes.h"
54 #include "ssa-iterators.h"
55 #include "stringpool.h"
56 #include "tree-ssanames.h"
61 #include "statistics.h"
63 #include "fixed-value.h"
64 #include "insn-config.h"
73 #include "insn-codes.h"
76 #include "tree-data-ref.h"
77 #include "tree-vectorizer.h"
78 #include "recog.h" /* FIXME: for insn_data */
79 #include "diagnostic-core.h"
83 /* Pattern recognition functions */
84 static gimple
vect_recog_widen_sum_pattern (vec
<gimple
> *, tree
*,
86 static gimple
vect_recog_widen_mult_pattern (vec
<gimple
> *, tree
*,
88 static gimple
vect_recog_dot_prod_pattern (vec
<gimple
> *, tree
*,
90 static gimple
vect_recog_sad_pattern (vec
<gimple
> *, tree
*,
92 static gimple
vect_recog_pow_pattern (vec
<gimple
> *, tree
*, tree
*);
93 static gimple
vect_recog_over_widening_pattern (vec
<gimple
> *, tree
*,
95 static gimple
vect_recog_widen_shift_pattern (vec
<gimple
> *,
97 static gimple
vect_recog_rotate_pattern (vec
<gimple
> *, tree
*, tree
*);
98 static gimple
vect_recog_vector_vector_shift_pattern (vec
<gimple
> *,
100 static gimple
vect_recog_divmod_pattern (vec
<gimple
> *,
102 static gimple
vect_recog_mixed_size_cond_pattern (vec
<gimple
> *,
104 static gimple
vect_recog_bool_pattern (vec
<gimple
> *, tree
*, tree
*);
105 static vect_recog_func_ptr vect_vect_recog_func_ptrs
[NUM_PATTERNS
] = {
106 vect_recog_widen_mult_pattern
,
107 vect_recog_widen_sum_pattern
,
108 vect_recog_dot_prod_pattern
,
109 vect_recog_sad_pattern
,
110 vect_recog_pow_pattern
,
111 vect_recog_widen_shift_pattern
,
112 vect_recog_over_widening_pattern
,
113 vect_recog_rotate_pattern
,
114 vect_recog_vector_vector_shift_pattern
,
115 vect_recog_divmod_pattern
,
116 vect_recog_mixed_size_cond_pattern
,
117 vect_recog_bool_pattern
};
120 append_pattern_def_seq (stmt_vec_info stmt_info
, gimple stmt
)
122 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
127 new_pattern_def_seq (stmt_vec_info stmt_info
, gimple stmt
)
129 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
130 append_pattern_def_seq (stmt_info
, stmt
);
133 /* Check whether STMT2 is in the same loop or basic block as STMT1.
134 Which of the two applies depends on whether we're currently doing
135 loop-based or basic-block-based vectorization, as determined by
136 the vinfo_for_stmt for STMT1 (which must be defined).
138 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
139 to be defined as well. */
142 vect_same_loop_or_bb_p (gimple stmt1
, gimple stmt2
)
144 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt1
);
145 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
146 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
148 if (!gimple_bb (stmt2
))
153 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
154 if (!flow_bb_inside_loop_p (loop
, gimple_bb (stmt2
)))
159 if (gimple_bb (stmt2
) != BB_VINFO_BB (bb_vinfo
)
160 || gimple_code (stmt2
) == GIMPLE_PHI
)
164 gcc_assert (vinfo_for_stmt (stmt2
));
168 /* If the LHS of DEF_STMT has a single use, and that statement is
169 in the same loop or basic block, return it. */
172 vect_single_imm_use (gimple def_stmt
)
174 tree lhs
= gimple_assign_lhs (def_stmt
);
178 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
181 if (!vect_same_loop_or_bb_p (def_stmt
, use_stmt
))
187 /* Check whether NAME, an ssa-name used in USE_STMT,
188 is a result of a type promotion, such that:
189 DEF_STMT: NAME = NOP (name0)
190 If CHECK_SIGN is TRUE, check that either both types are signed or both are
194 type_conversion_p (tree name
, gimple use_stmt
, bool check_sign
,
195 tree
*orig_type
, gimple
*def_stmt
, bool *promotion
)
199 loop_vec_info loop_vinfo
;
200 stmt_vec_info stmt_vinfo
;
201 tree type
= TREE_TYPE (name
);
203 enum vect_def_type dt
;
205 bb_vec_info bb_vinfo
;
207 stmt_vinfo
= vinfo_for_stmt (use_stmt
);
208 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
209 bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
210 if (!vect_is_simple_use (name
, use_stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
214 if (dt
!= vect_internal_def
215 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
221 if (!is_gimple_assign (*def_stmt
))
224 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt
)))
227 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
229 *orig_type
= TREE_TYPE (oprnd0
);
230 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*orig_type
)
231 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*orig_type
)) && check_sign
))
234 if (TYPE_PRECISION (type
) >= (TYPE_PRECISION (*orig_type
) * 2))
239 if (!vect_is_simple_use (oprnd0
, *def_stmt
, loop_vinfo
,
240 bb_vinfo
, &dummy_gimple
, &dummy
, &dt
))
246 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
247 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
250 vect_recog_temp_ssa_var (tree type
, gimple stmt
)
252 return make_temp_ssa_name (type
, stmt
, "patt");
255 /* Function vect_recog_dot_prod_pattern
257 Try to find the following pattern:
263 sum_0 = phi <init, sum_1>
266 S3 x_T = (TYPE1) x_t;
267 S4 y_T = (TYPE1) y_t;
269 [S6 prod = (TYPE2) prod; #optional]
270 S7 sum_1 = prod + sum_0;
272 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
273 same size of 'TYPE1' or bigger. This is a special case of a reduction
278 * STMTS: Contains a stmt from which the pattern search begins. In the
279 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
284 * TYPE_IN: The type of the input arguments to the pattern.
286 * TYPE_OUT: The type of the output of this pattern.
288 * Return value: A new stmt that will be used to replace the sequence of
289 stmts that constitute the pattern. In this case it will be:
290 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
292 Note: The dot-prod idiom is a widening reduction pattern that is
293 vectorized without preserving all the intermediate results. It
294 produces only N/2 (widened) results (by summing up pairs of
295 intermediate results) rather than all N results. Therefore, we
296 cannot allow this pattern when we want to get all the results and in
297 the correct order (as is the case when this computation is in an
298 inner-loop nested in an outer-loop that us being vectorized). */
301 vect_recog_dot_prod_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
304 gimple stmt
, last_stmt
= (*stmts
)[0];
306 tree oprnd00
, oprnd01
;
307 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
308 tree type
, half_type
;
311 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
319 loop
= LOOP_VINFO_LOOP (loop_info
);
321 if (!is_gimple_assign (last_stmt
))
324 type
= gimple_expr_type (last_stmt
);
326 /* Look for the following pattern
330 DDPROD = (TYPE2) DPROD;
331 sum_1 = DDPROD + sum_0;
333 - DX is double the size of X
334 - DY is double the size of Y
335 - DX, DY, DPROD all have the same type
336 - sum is the same size of DPROD or bigger
337 - sum has been recognized as a reduction variable.
339 This is equivalent to:
340 DPROD = X w* Y; #widen mult
341 sum_1 = DPROD w+ sum_0; #widen summation
343 DPROD = X w* Y; #widen mult
344 sum_1 = DPROD + sum_0; #summation
347 /* Starting from LAST_STMT, follow the defs of its uses in search
348 of the above pattern. */
350 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
353 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
355 /* Has been detected as widening-summation? */
357 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
358 type
= gimple_expr_type (stmt
);
359 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
361 oprnd0
= gimple_assign_rhs1 (stmt
);
362 oprnd1
= gimple_assign_rhs2 (stmt
);
363 half_type
= TREE_TYPE (oprnd0
);
369 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
)
371 oprnd0
= gimple_assign_rhs1 (last_stmt
);
372 oprnd1
= gimple_assign_rhs2 (last_stmt
);
373 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
374 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
378 if (type_conversion_p (oprnd0
, stmt
, true, &half_type
, &def_stmt
,
383 oprnd0
= gimple_assign_rhs1 (stmt
);
389 /* So far so good. Since last_stmt was detected as a (summation) reduction,
390 we know that oprnd1 is the reduction variable (defined by a loop-header
391 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
392 Left to check that oprnd0 is defined by a (widen_)mult_expr */
393 if (TREE_CODE (oprnd0
) != SSA_NAME
)
396 prod_type
= half_type
;
397 stmt
= SSA_NAME_DEF_STMT (oprnd0
);
399 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
400 if (!gimple_bb (stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
403 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
404 inside the loop (in case we are analyzing an outer-loop). */
405 if (!is_gimple_assign (stmt
))
407 stmt_vinfo
= vinfo_for_stmt (stmt
);
408 gcc_assert (stmt_vinfo
);
409 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
411 if (gimple_assign_rhs_code (stmt
) != MULT_EXPR
)
413 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
415 /* Has been detected as a widening multiplication? */
417 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
418 if (gimple_assign_rhs_code (stmt
) != WIDEN_MULT_EXPR
)
420 stmt_vinfo
= vinfo_for_stmt (stmt
);
421 gcc_assert (stmt_vinfo
);
422 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_internal_def
);
423 oprnd00
= gimple_assign_rhs1 (stmt
);
424 oprnd01
= gimple_assign_rhs2 (stmt
);
425 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt
))
426 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
);
430 tree half_type0
, half_type1
;
434 oprnd0
= gimple_assign_rhs1 (stmt
);
435 oprnd1
= gimple_assign_rhs2 (stmt
);
436 if (!types_compatible_p (TREE_TYPE (oprnd0
), prod_type
)
437 || !types_compatible_p (TREE_TYPE (oprnd1
), prod_type
))
439 if (!type_conversion_p (oprnd0
, stmt
, true, &half_type0
, &def_stmt
,
443 oprnd00
= gimple_assign_rhs1 (def_stmt
);
444 if (!type_conversion_p (oprnd1
, stmt
, true, &half_type1
, &def_stmt
,
448 oprnd01
= gimple_assign_rhs1 (def_stmt
);
449 if (!types_compatible_p (half_type0
, half_type1
))
451 if (TYPE_PRECISION (prod_type
) != TYPE_PRECISION (half_type0
) * 2)
455 half_type
= TREE_TYPE (oprnd00
);
456 *type_in
= half_type
;
459 /* Pattern detected. Create a stmt to be used to replace the pattern: */
460 var
= vect_recog_temp_ssa_var (type
, NULL
);
461 pattern_stmt
= gimple_build_assign (var
, DOT_PROD_EXPR
,
462 oprnd00
, oprnd01
, oprnd1
);
464 if (dump_enabled_p ())
466 dump_printf_loc (MSG_NOTE
, vect_location
,
467 "vect_recog_dot_prod_pattern: detected: ");
468 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
469 dump_printf (MSG_NOTE
, "\n");
472 /* We don't allow changing the order of the computation in the inner-loop
473 when doing outer-loop vectorization. */
474 gcc_assert (!nested_in_vect_loop_p (loop
, last_stmt
));
480 /* Function vect_recog_sad_pattern
482 Try to find the following Sum of Absolute Difference (SAD) pattern:
485 signed TYPE1 diff, abs_diff;
488 sum_0 = phi <init, sum_1>
491 S3 x_T = (TYPE1) x_t;
492 S4 y_T = (TYPE1) y_t;
494 S6 abs_diff = ABS_EXPR <diff>;
495 [S7 abs_diff = (TYPE2) abs_diff; #optional]
496 S8 sum_1 = abs_diff + sum_0;
498 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
499 same size of 'TYPE1' or bigger. This is a special case of a reduction
504 * STMTS: Contains a stmt from which the pattern search begins. In the
505 example, when this function is called with S8, the pattern
506 {S3,S4,S5,S6,S7,S8} will be detected.
510 * TYPE_IN: The type of the input arguments to the pattern.
512 * TYPE_OUT: The type of the output of this pattern.
514 * Return value: A new stmt that will be used to replace the sequence of
515 stmts that constitute the pattern. In this case it will be:
516 SAD_EXPR <x_t, y_t, sum_0>
520 vect_recog_sad_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
523 gimple last_stmt
= (*stmts
)[0];
524 tree sad_oprnd0
, sad_oprnd1
;
525 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
527 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
534 loop
= LOOP_VINFO_LOOP (loop_info
);
536 if (!is_gimple_assign (last_stmt
))
539 tree sum_type
= gimple_expr_type (last_stmt
);
541 /* Look for the following pattern
545 DAD = ABS_EXPR <DDIFF>;
546 DDPROD = (TYPE2) DPROD;
549 - DX is at least double the size of X
550 - DY is at least double the size of Y
551 - DX, DY, DDIFF, DAD all have the same type
552 - sum is the same size of DAD or bigger
553 - sum has been recognized as a reduction variable.
555 This is equivalent to:
556 DDIFF = X w- Y; #widen sub
557 DAD = ABS_EXPR <DDIFF>;
558 sum_1 = DAD w+ sum_0; #widen summation
560 DDIFF = X w- Y; #widen sub
561 DAD = ABS_EXPR <DDIFF>;
562 sum_1 = DAD + sum_0; #summation
565 /* Starting from LAST_STMT, follow the defs of its uses in search
566 of the above pattern. */
568 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
571 tree plus_oprnd0
, plus_oprnd1
;
573 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
575 /* Has been detected as widening-summation? */
577 gimple stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
578 sum_type
= gimple_expr_type (stmt
);
579 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
581 plus_oprnd0
= gimple_assign_rhs1 (stmt
);
582 plus_oprnd1
= gimple_assign_rhs2 (stmt
);
583 half_type
= TREE_TYPE (plus_oprnd0
);
589 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
)
591 plus_oprnd0
= gimple_assign_rhs1 (last_stmt
);
592 plus_oprnd1
= gimple_assign_rhs2 (last_stmt
);
593 if (!types_compatible_p (TREE_TYPE (plus_oprnd0
), sum_type
)
594 || !types_compatible_p (TREE_TYPE (plus_oprnd1
), sum_type
))
597 /* The type conversion could be promotion, demotion,
598 or just signed -> unsigned. */
599 if (type_conversion_p (plus_oprnd0
, last_stmt
, false,
600 &half_type
, &def_stmt
, &promotion
))
601 plus_oprnd0
= gimple_assign_rhs1 (def_stmt
);
603 half_type
= sum_type
;
606 /* So far so good. Since last_stmt was detected as a (summation) reduction,
607 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
608 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
609 Then check that plus_oprnd0 is defined by an abs_expr. */
611 if (TREE_CODE (plus_oprnd0
) != SSA_NAME
)
614 tree abs_type
= half_type
;
615 gimple abs_stmt
= SSA_NAME_DEF_STMT (plus_oprnd0
);
617 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
618 if (!gimple_bb (abs_stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (abs_stmt
)))
621 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
622 inside the loop (in case we are analyzing an outer-loop). */
623 if (!is_gimple_assign (abs_stmt
))
626 stmt_vec_info abs_stmt_vinfo
= vinfo_for_stmt (abs_stmt
);
627 gcc_assert (abs_stmt_vinfo
);
628 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo
) != vect_internal_def
)
630 if (gimple_assign_rhs_code (abs_stmt
) != ABS_EXPR
)
633 tree abs_oprnd
= gimple_assign_rhs1 (abs_stmt
);
634 if (!types_compatible_p (TREE_TYPE (abs_oprnd
), abs_type
))
636 if (TYPE_UNSIGNED (abs_type
))
639 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
641 if (TREE_CODE (abs_oprnd
) != SSA_NAME
)
644 gimple diff_stmt
= SSA_NAME_DEF_STMT (abs_oprnd
);
646 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
647 if (!gimple_bb (diff_stmt
)
648 || !flow_bb_inside_loop_p (loop
, gimple_bb (diff_stmt
)))
651 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
652 inside the loop (in case we are analyzing an outer-loop). */
653 if (!is_gimple_assign (diff_stmt
))
656 stmt_vec_info diff_stmt_vinfo
= vinfo_for_stmt (diff_stmt
);
657 gcc_assert (diff_stmt_vinfo
);
658 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo
) != vect_internal_def
)
660 if (gimple_assign_rhs_code (diff_stmt
) != MINUS_EXPR
)
663 tree half_type0
, half_type1
;
666 tree minus_oprnd0
= gimple_assign_rhs1 (diff_stmt
);
667 tree minus_oprnd1
= gimple_assign_rhs2 (diff_stmt
);
669 if (!types_compatible_p (TREE_TYPE (minus_oprnd0
), abs_type
)
670 || !types_compatible_p (TREE_TYPE (minus_oprnd1
), abs_type
))
672 if (!type_conversion_p (minus_oprnd0
, diff_stmt
, false,
673 &half_type0
, &def_stmt
, &promotion
)
676 sad_oprnd0
= gimple_assign_rhs1 (def_stmt
);
678 if (!type_conversion_p (minus_oprnd1
, diff_stmt
, false,
679 &half_type1
, &def_stmt
, &promotion
)
682 sad_oprnd1
= gimple_assign_rhs1 (def_stmt
);
684 if (!types_compatible_p (half_type0
, half_type1
))
686 if (TYPE_PRECISION (abs_type
) < TYPE_PRECISION (half_type0
) * 2
687 || TYPE_PRECISION (sum_type
) < TYPE_PRECISION (half_type0
) * 2)
690 *type_in
= TREE_TYPE (sad_oprnd0
);
691 *type_out
= sum_type
;
693 /* Pattern detected. Create a stmt to be used to replace the pattern: */
694 tree var
= vect_recog_temp_ssa_var (sum_type
, NULL
);
695 gimple pattern_stmt
= gimple_build_assign (var
, SAD_EXPR
, sad_oprnd0
,
696 sad_oprnd1
, plus_oprnd1
);
698 if (dump_enabled_p ())
700 dump_printf_loc (MSG_NOTE
, vect_location
,
701 "vect_recog_sad_pattern: detected: ");
702 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
703 dump_printf (MSG_NOTE
, "\n");
706 /* We don't allow changing the order of the computation in the inner-loop
707 when doing outer-loop vectorization. */
708 gcc_assert (!nested_in_vect_loop_p (loop
, last_stmt
));
714 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
717 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
718 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
720 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
721 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
722 that satisfies the above restrictions, we can perform a widening opeartion
723 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
724 with a_it = (interm_type) a_t; */
727 vect_handle_widen_op_by_const (gimple stmt
, enum tree_code code
,
728 tree const_oprnd
, tree
*oprnd
,
729 vec
<gimple
> *stmts
, tree type
,
730 tree
*half_type
, gimple def_stmt
)
732 tree new_type
, new_oprnd
;
735 if (code
!= MULT_EXPR
&& code
!= LSHIFT_EXPR
)
738 if (((code
== MULT_EXPR
&& int_fits_type_p (const_oprnd
, *half_type
))
739 || (code
== LSHIFT_EXPR
740 && compare_tree_int (const_oprnd
, TYPE_PRECISION (*half_type
))
742 && TYPE_PRECISION (type
) == (TYPE_PRECISION (*half_type
) * 2))
744 /* CONST_OPRND is a constant of HALF_TYPE. */
745 *oprnd
= gimple_assign_rhs1 (def_stmt
);
749 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (*half_type
) * 4))
752 if (!vect_same_loop_or_bb_p (stmt
, def_stmt
))
755 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
756 a type 2 times bigger than HALF_TYPE. */
757 new_type
= build_nonstandard_integer_type (TYPE_PRECISION (type
) / 2,
758 TYPE_UNSIGNED (type
));
759 if ((code
== MULT_EXPR
&& !int_fits_type_p (const_oprnd
, new_type
))
760 || (code
== LSHIFT_EXPR
761 && compare_tree_int (const_oprnd
, TYPE_PRECISION (new_type
)) == 1))
764 /* Use NEW_TYPE for widening operation. */
765 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
767 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
768 /* Check if the already created pattern stmt is what we need. */
769 if (!is_gimple_assign (new_stmt
)
770 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt
))
771 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != new_type
)
774 stmts
->safe_push (def_stmt
);
775 *oprnd
= gimple_assign_lhs (new_stmt
);
779 /* Create a_T = (NEW_TYPE) a_t; */
780 *oprnd
= gimple_assign_rhs1 (def_stmt
);
781 new_oprnd
= make_ssa_name (new_type
);
782 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, *oprnd
);
783 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
784 stmts
->safe_push (def_stmt
);
788 *half_type
= new_type
;
793 /* Function vect_recog_widen_mult_pattern
795 Try to find the following pattern:
799 TYPE a_T, b_T, prod_T;
805 S5 prod_T = a_T * b_T;
807 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
809 Also detect unsigned cases:
813 unsigned TYPE u_prod_T;
814 TYPE a_T, b_T, prod_T;
820 S5 prod_T = a_T * b_T;
821 S6 u_prod_T = (unsigned TYPE) prod_T;
823 and multiplication by constants:
830 S5 prod_T = a_T * CONST;
832 A special case of multiplication by constants is when 'TYPE' is 4 times
833 bigger than 'type', but CONST fits an intermediate type 2 times smaller
834 than 'TYPE'. In that case we create an additional pattern stmt for S3
835 to create a variable of the intermediate type, and perform widen-mult
836 on the intermediate type as well:
840 TYPE a_T, prod_T, prod_T';
844 '--> a_it = (interm_type) a_t;
845 S5 prod_T = a_T * CONST;
846 '--> prod_T' = a_it w* CONST;
850 * STMTS: Contains a stmt from which the pattern search begins. In the
851 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
852 is detected. In case of unsigned widen-mult, the original stmt (S5) is
853 replaced with S6 in STMTS. In case of multiplication by a constant
854 of an intermediate type (the last case above), STMTS also contains S3
855 (inserted before S5).
859 * TYPE_IN: The type of the input arguments to the pattern.
861 * TYPE_OUT: The type of the output of this pattern.
863 * Return value: A new stmt that will be used to replace the sequence of
864 stmts that constitute the pattern. In this case it will be:
865 WIDEN_MULT <a_t, b_t>
866 If the result of WIDEN_MULT needs to be converted to a larger type, the
867 returned stmt will be this type conversion stmt.
871 vect_recog_widen_mult_pattern (vec
<gimple
> *stmts
,
872 tree
*type_in
, tree
*type_out
)
874 gimple last_stmt
= stmts
->pop ();
875 gimple def_stmt0
, def_stmt1
;
877 tree type
, half_type0
, half_type1
;
878 gimple new_stmt
= NULL
, pattern_stmt
= NULL
;
879 tree vectype
, vecitype
;
881 enum tree_code dummy_code
;
887 if (!is_gimple_assign (last_stmt
))
890 type
= gimple_expr_type (last_stmt
);
892 /* Starting from LAST_STMT, follow the defs of its uses in search
893 of the above pattern. */
895 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
898 oprnd0
= gimple_assign_rhs1 (last_stmt
);
899 oprnd1
= gimple_assign_rhs2 (last_stmt
);
900 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
901 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
904 /* Check argument 0. */
905 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
909 /* Check argument 1. */
910 op1_ok
= type_conversion_p (oprnd1
, last_stmt
, false, &half_type1
,
911 &def_stmt1
, &promotion
);
913 if (op1_ok
&& promotion
)
915 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
916 oprnd1
= gimple_assign_rhs1 (def_stmt1
);
920 if (TREE_CODE (oprnd1
) == INTEGER_CST
921 && TREE_CODE (half_type0
) == INTEGER_TYPE
922 && vect_handle_widen_op_by_const (last_stmt
, MULT_EXPR
, oprnd1
,
923 &oprnd0
, stmts
, type
,
924 &half_type0
, def_stmt0
))
926 half_type1
= half_type0
;
927 oprnd1
= fold_convert (half_type1
, oprnd1
);
933 /* If the two arguments have different sizes, convert the one with
934 the smaller type into the larger type. */
935 if (TYPE_PRECISION (half_type0
) != TYPE_PRECISION (half_type1
))
938 gimple def_stmt
= NULL
;
940 if (TYPE_PRECISION (half_type0
) < TYPE_PRECISION (half_type1
))
942 def_stmt
= def_stmt0
;
943 half_type0
= half_type1
;
948 def_stmt
= def_stmt1
;
949 half_type1
= half_type0
;
953 tree old_oprnd
= gimple_assign_rhs1 (def_stmt
);
954 tree new_oprnd
= make_ssa_name (half_type0
);
955 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, old_oprnd
);
959 /* Handle unsigned case. Look for
960 S6 u_prod_T = (unsigned TYPE) prod_T;
961 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
962 if (TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type0
))
968 if (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (half_type1
))
971 use_stmt
= vect_single_imm_use (last_stmt
);
972 if (!use_stmt
|| !is_gimple_assign (use_stmt
)
973 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
976 use_lhs
= gimple_assign_lhs (use_stmt
);
977 use_type
= TREE_TYPE (use_lhs
);
978 if (!INTEGRAL_TYPE_P (use_type
)
979 || (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (use_type
))
980 || (TYPE_PRECISION (type
) != TYPE_PRECISION (use_type
)))
984 last_stmt
= use_stmt
;
987 if (!types_compatible_p (half_type0
, half_type1
))
990 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
991 to get an intermediate result of type ITYPE. In this case we need
992 to build a statement to convert this intermediate result to type TYPE. */
994 if (TYPE_PRECISION (type
) > TYPE_PRECISION (half_type0
) * 2)
995 itype
= build_nonstandard_integer_type
996 (GET_MODE_BITSIZE (TYPE_MODE (half_type0
)) * 2,
997 TYPE_UNSIGNED (type
));
999 /* Pattern detected. */
1000 if (dump_enabled_p ())
1001 dump_printf_loc (MSG_NOTE
, vect_location
,
1002 "vect_recog_widen_mult_pattern: detected:\n");
1004 /* Check target support */
1005 vectype
= get_vectype_for_scalar_type (half_type0
);
1006 vecitype
= get_vectype_for_scalar_type (itype
);
1009 || !supportable_widening_operation (WIDEN_MULT_EXPR
, last_stmt
,
1011 &dummy_code
, &dummy_code
,
1012 &dummy_int
, &dummy_vec
))
1016 *type_out
= get_vectype_for_scalar_type (type
);
1018 /* Pattern supported. Create a stmt to be used to replace the pattern: */
1019 var
= vect_recog_temp_ssa_var (itype
, NULL
);
1020 pattern_stmt
= gimple_build_assign (var
, WIDEN_MULT_EXPR
, oprnd0
, oprnd1
);
1022 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1023 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1024 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1025 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
1027 /* If the original two operands have different sizes, we may need to convert
1028 the smaller one into the larget type. If this is the case, at this point
1029 the new stmt is already built. */
1032 append_pattern_def_seq (stmt_vinfo
, new_stmt
);
1033 stmt_vec_info new_stmt_info
1034 = new_stmt_vec_info (new_stmt
, loop_vinfo
, bb_vinfo
);
1035 set_vinfo_for_stmt (new_stmt
, new_stmt_info
);
1036 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
1039 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
1040 the result of the widen-mult operation into type TYPE. */
1043 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
1044 stmt_vec_info pattern_stmt_info
1045 = new_stmt_vec_info (pattern_stmt
, loop_vinfo
, bb_vinfo
);
1046 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
1047 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vecitype
;
1048 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
1050 gimple_assign_lhs (pattern_stmt
));
1053 if (dump_enabled_p ())
1054 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1056 stmts
->safe_push (last_stmt
);
1057 return pattern_stmt
;
1061 /* Function vect_recog_pow_pattern
1063 Try to find the following pattern:
1067 with POW being one of pow, powf, powi, powif and N being
1072 * LAST_STMT: A stmt from which the pattern search begins.
1076 * TYPE_IN: The type of the input arguments to the pattern.
1078 * TYPE_OUT: The type of the output of this pattern.
1080 * Return value: A new stmt that will be used to replace the sequence of
1081 stmts that constitute the pattern. In this case it will be:
1088 vect_recog_pow_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
1091 gimple last_stmt
= (*stmts
)[0];
1092 tree fn
, base
, exp
= NULL
;
1096 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
1099 fn
= gimple_call_fndecl (last_stmt
);
1100 if (fn
== NULL_TREE
|| DECL_BUILT_IN_CLASS (fn
) != BUILT_IN_NORMAL
)
1103 switch (DECL_FUNCTION_CODE (fn
))
1105 case BUILT_IN_POWIF
:
1109 base
= gimple_call_arg (last_stmt
, 0);
1110 exp
= gimple_call_arg (last_stmt
, 1);
1111 if (TREE_CODE (exp
) != REAL_CST
1112 && TREE_CODE (exp
) != INTEGER_CST
)
1120 /* We now have a pow or powi builtin function call with a constant
1123 *type_out
= NULL_TREE
;
1125 /* Catch squaring. */
1126 if ((tree_fits_shwi_p (exp
)
1127 && tree_to_shwi (exp
) == 2)
1128 || (TREE_CODE (exp
) == REAL_CST
1129 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconst2
)))
1131 *type_in
= TREE_TYPE (base
);
1133 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1134 stmt
= gimple_build_assign (var
, MULT_EXPR
, base
, base
);
1138 /* Catch square root. */
1139 if (TREE_CODE (exp
) == REAL_CST
1140 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconsthalf
))
1142 tree newfn
= mathfn_built_in (TREE_TYPE (base
), BUILT_IN_SQRT
);
1143 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (base
));
1146 gcall
*stmt
= gimple_build_call (newfn
, 1, base
);
1147 if (vectorizable_function (stmt
, *type_in
, *type_in
)
1150 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
1151 gimple_call_set_lhs (stmt
, var
);
1161 /* Function vect_recog_widen_sum_pattern
1163 Try to find the following pattern:
1166 TYPE x_T, sum = init;
1168 sum_0 = phi <init, sum_1>
1170 S2 x_T = (TYPE) x_t;
1171 S3 sum_1 = x_T + sum_0;
1173 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1174 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1175 a special case of a reduction computation.
1179 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1180 when this function is called with S3, the pattern {S2,S3} will be detected.
1184 * TYPE_IN: The type of the input arguments to the pattern.
1186 * TYPE_OUT: The type of the output of this pattern.
1188 * Return value: A new stmt that will be used to replace the sequence of
1189 stmts that constitute the pattern. In this case it will be:
1190 WIDEN_SUM <x_t, sum_0>
1192 Note: The widening-sum idiom is a widening reduction pattern that is
1193 vectorized without preserving all the intermediate results. It
1194 produces only N/2 (widened) results (by summing up pairs of
1195 intermediate results) rather than all N results. Therefore, we
1196 cannot allow this pattern when we want to get all the results and in
1197 the correct order (as is the case when this computation is in an
1198 inner-loop nested in an outer-loop that us being vectorized). */
1201 vect_recog_widen_sum_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
1204 gimple stmt
, last_stmt
= (*stmts
)[0];
1205 tree oprnd0
, oprnd1
;
1206 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1207 tree type
, half_type
;
1208 gimple pattern_stmt
;
1209 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1217 loop
= LOOP_VINFO_LOOP (loop_info
);
1219 if (!is_gimple_assign (last_stmt
))
1222 type
= gimple_expr_type (last_stmt
);
1224 /* Look for the following pattern
1227 In which DX is at least double the size of X, and sum_1 has been
1228 recognized as a reduction variable.
1231 /* Starting from LAST_STMT, follow the defs of its uses in search
1232 of the above pattern. */
1234 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
1237 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
)
1240 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1241 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1242 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
1243 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
1246 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1247 we know that oprnd1 is the reduction variable (defined by a loop-header
1248 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1249 Left to check that oprnd0 is defined by a cast from type 'type' to type
1252 if (!type_conversion_p (oprnd0
, last_stmt
, true, &half_type
, &stmt
,
1257 oprnd0
= gimple_assign_rhs1 (stmt
);
1258 *type_in
= half_type
;
1261 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1262 var
= vect_recog_temp_ssa_var (type
, NULL
);
1263 pattern_stmt
= gimple_build_assign (var
, WIDEN_SUM_EXPR
, oprnd0
, oprnd1
);
1265 if (dump_enabled_p ())
1267 dump_printf_loc (MSG_NOTE
, vect_location
,
1268 "vect_recog_widen_sum_pattern: detected: ");
1269 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1270 dump_printf (MSG_NOTE
, "\n");
1273 /* We don't allow changing the order of the computation in the inner-loop
1274 when doing outer-loop vectorization. */
1275 gcc_assert (!nested_in_vect_loop_p (loop
, last_stmt
));
1277 return pattern_stmt
;
1281 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1284 STMT - a statement to check.
1285 DEF - we support operations with two operands, one of which is constant.
1286 The other operand can be defined by a demotion operation, or by a
1287 previous statement in a sequence of over-promoted operations. In the
1288 later case DEF is used to replace that operand. (It is defined by a
1289 pattern statement we created for the previous statement in the
1293 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1294 NULL, it's the type of DEF.
1295 STMTS - additional pattern statements. If a pattern statement (type
1296 conversion) is created in this function, its original statement is
1300 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1301 operands to use in the new pattern statement for STMT (will be created
1302 in vect_recog_over_widening_pattern ()).
1303 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1304 statements for STMT: the first one is a type promotion and the second
1305 one is the operation itself. We return the type promotion statement
1306 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1307 the second pattern statement. */
1310 vect_operation_fits_smaller_type (gimple stmt
, tree def
, tree
*new_type
,
1311 tree
*op0
, tree
*op1
, gimple
*new_def_stmt
,
1314 enum tree_code code
;
1315 tree const_oprnd
, oprnd
;
1316 tree interm_type
= NULL_TREE
, half_type
, new_oprnd
, type
;
1317 gimple def_stmt
, new_stmt
;
1323 *new_def_stmt
= NULL
;
1325 if (!is_gimple_assign (stmt
))
1328 code
= gimple_assign_rhs_code (stmt
);
1329 if (code
!= LSHIFT_EXPR
&& code
!= RSHIFT_EXPR
1330 && code
!= BIT_IOR_EXPR
&& code
!= BIT_XOR_EXPR
&& code
!= BIT_AND_EXPR
)
1333 oprnd
= gimple_assign_rhs1 (stmt
);
1334 const_oprnd
= gimple_assign_rhs2 (stmt
);
1335 type
= gimple_expr_type (stmt
);
1337 if (TREE_CODE (oprnd
) != SSA_NAME
1338 || TREE_CODE (const_oprnd
) != INTEGER_CST
)
1341 /* If oprnd has other uses besides that in stmt we cannot mark it
1342 as being part of a pattern only. */
1343 if (!has_single_use (oprnd
))
1346 /* If we are in the middle of a sequence, we use DEF from a previous
1347 statement. Otherwise, OPRND has to be a result of type promotion. */
1350 half_type
= *new_type
;
1356 if (!type_conversion_p (oprnd
, stmt
, false, &half_type
, &def_stmt
,
1359 || !vect_same_loop_or_bb_p (stmt
, def_stmt
))
1363 /* Can we perform the operation on a smaller type? */
1369 if (!int_fits_type_p (const_oprnd
, half_type
))
1371 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1372 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1375 interm_type
= build_nonstandard_integer_type (
1376 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1377 if (!int_fits_type_p (const_oprnd
, interm_type
))
1384 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1385 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1388 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1389 (e.g., if the original value was char, the shift amount is at most 8
1390 if we want to use short). */
1391 if (compare_tree_int (const_oprnd
, TYPE_PRECISION (half_type
)) == 1)
1394 interm_type
= build_nonstandard_integer_type (
1395 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1397 if (!vect_supportable_shift (code
, interm_type
))
1403 if (vect_supportable_shift (code
, half_type
))
1406 /* Try intermediate type - HALF_TYPE is not supported. */
1407 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1410 interm_type
= build_nonstandard_integer_type (
1411 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1413 if (!vect_supportable_shift (code
, interm_type
))
1422 /* There are four possible cases:
1423 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1424 the first statement in the sequence)
1425 a. The original, HALF_TYPE, is not enough - we replace the promotion
1426 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1427 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1429 2. OPRND is defined by a pattern statement we created.
1430 a. Its type is not sufficient for the operation, we create a new stmt:
1431 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1432 this statement in NEW_DEF_STMT, and it is later put in
1433 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1434 b. OPRND is good to use in the new statement. */
1439 /* Replace the original type conversion HALF_TYPE->TYPE with
1440 HALF_TYPE->INTERM_TYPE. */
1441 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
1443 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
1444 /* Check if the already created pattern stmt is what we need. */
1445 if (!is_gimple_assign (new_stmt
)
1446 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt
))
1447 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != interm_type
)
1450 stmts
->safe_push (def_stmt
);
1451 oprnd
= gimple_assign_lhs (new_stmt
);
1455 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1456 oprnd
= gimple_assign_rhs1 (def_stmt
);
1457 new_oprnd
= make_ssa_name (interm_type
);
1458 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, oprnd
);
1459 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
1460 stmts
->safe_push (def_stmt
);
1466 /* Retrieve the operand before the type promotion. */
1467 oprnd
= gimple_assign_rhs1 (def_stmt
);
1474 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1475 new_oprnd
= make_ssa_name (interm_type
);
1476 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, oprnd
);
1478 *new_def_stmt
= new_stmt
;
1481 /* Otherwise, OPRND is already set. */
1485 *new_type
= interm_type
;
1487 *new_type
= half_type
;
1490 *op1
= fold_convert (*new_type
, const_oprnd
);
1496 /* Try to find a statement or a sequence of statements that can be performed
1500 TYPE x_T, res0_T, res1_T;
1503 S2 x_T = (TYPE) x_t;
1504 S3 res0_T = op (x_T, C0);
1505 S4 res1_T = op (res0_T, C1);
1506 S5 ... = () res1_T; - type demotion
1508 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1510 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1511 be 'type' or some intermediate type. For now, we expect S5 to be a type
1512 demotion operation. We also check that S3 and S4 have only one use. */
1515 vect_recog_over_widening_pattern (vec
<gimple
> *stmts
,
1516 tree
*type_in
, tree
*type_out
)
1518 gimple stmt
= stmts
->pop ();
1519 gimple pattern_stmt
= NULL
, new_def_stmt
, prev_stmt
= NULL
, use_stmt
= NULL
;
1520 tree op0
, op1
, vectype
= NULL_TREE
, use_lhs
, use_type
;
1521 tree var
= NULL_TREE
, new_type
= NULL_TREE
, new_oprnd
;
1528 if (!vinfo_for_stmt (stmt
)
1529 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt
)))
1532 new_def_stmt
= NULL
;
1533 if (!vect_operation_fits_smaller_type (stmt
, var
, &new_type
,
1534 &op0
, &op1
, &new_def_stmt
,
1543 /* STMT can be performed on a smaller type. Check its uses. */
1544 use_stmt
= vect_single_imm_use (stmt
);
1545 if (!use_stmt
|| !is_gimple_assign (use_stmt
))
1548 /* Create pattern statement for STMT. */
1549 vectype
= get_vectype_for_scalar_type (new_type
);
1553 /* We want to collect all the statements for which we create pattern
1554 statetments, except for the case when the last statement in the
1555 sequence doesn't have a corresponding pattern statement. In such
1556 case we associate the last pattern statement with the last statement
1557 in the sequence. Therefore, we only add the original statement to
1558 the list if we know that it is not the last. */
1560 stmts
->safe_push (prev_stmt
);
1562 var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1564 = gimple_build_assign (var
, gimple_assign_rhs_code (stmt
), op0
, op1
);
1565 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
1566 new_pattern_def_seq (vinfo_for_stmt (stmt
), new_def_stmt
);
1568 if (dump_enabled_p ())
1570 dump_printf_loc (MSG_NOTE
, vect_location
,
1571 "created pattern stmt: ");
1572 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1573 dump_printf (MSG_NOTE
, "\n");
1576 type
= gimple_expr_type (stmt
);
1583 /* We got a sequence. We expect it to end with a type demotion operation.
1584 Otherwise, we quit (for now). There are three possible cases: the
1585 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1586 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1587 NEW_TYPE differs (we create a new conversion statement). */
1588 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
1590 use_lhs
= gimple_assign_lhs (use_stmt
);
1591 use_type
= TREE_TYPE (use_lhs
);
1592 /* Support only type demotion or signedess change. */
1593 if (!INTEGRAL_TYPE_P (use_type
)
1594 || TYPE_PRECISION (type
) <= TYPE_PRECISION (use_type
))
1597 /* Check that NEW_TYPE is not bigger than the conversion result. */
1598 if (TYPE_PRECISION (new_type
) > TYPE_PRECISION (use_type
))
1601 if (TYPE_UNSIGNED (new_type
) != TYPE_UNSIGNED (use_type
)
1602 || TYPE_PRECISION (new_type
) != TYPE_PRECISION (use_type
))
1604 /* Create NEW_TYPE->USE_TYPE conversion. */
1605 new_oprnd
= make_ssa_name (use_type
);
1606 pattern_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, var
);
1607 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt
)) = pattern_stmt
;
1609 *type_in
= get_vectype_for_scalar_type (new_type
);
1610 *type_out
= get_vectype_for_scalar_type (use_type
);
1612 /* We created a pattern statement for the last statement in the
1613 sequence, so we don't need to associate it with the pattern
1614 statement created for PREV_STMT. Therefore, we add PREV_STMT
1615 to the list in order to mark it later in vect_pattern_recog_1. */
1617 stmts
->safe_push (prev_stmt
);
1622 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt
))
1623 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt
));
1626 *type_out
= NULL_TREE
;
1629 stmts
->safe_push (use_stmt
);
1632 /* TODO: support general case, create a conversion to the correct type. */
1635 /* Pattern detected. */
1636 if (dump_enabled_p ())
1638 dump_printf_loc (MSG_NOTE
, vect_location
,
1639 "vect_recog_over_widening_pattern: detected: ");
1640 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1641 dump_printf (MSG_NOTE
, "\n");
1644 return pattern_stmt
;
1647 /* Detect widening shift pattern:
1653 S2 a_T = (TYPE) a_t;
1654 S3 res_T = a_T << CONST;
1656 where type 'TYPE' is at least double the size of type 'type'.
1658 Also detect cases where the shift result is immediately converted
1659 to another type 'result_type' that is no larger in size than 'TYPE'.
1660 In those cases we perform a widen-shift that directly results in
1661 'result_type', to avoid a possible over-widening situation:
1665 result_type res_result;
1668 S2 a_T = (TYPE) a_t;
1669 S3 res_T = a_T << CONST;
1670 S4 res_result = (result_type) res_T;
1671 '--> res_result' = a_t w<< CONST;
1673 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1674 create an additional pattern stmt for S2 to create a variable of an
1675 intermediate type, and perform widen-shift on the intermediate type:
1679 TYPE a_T, res_T, res_T';
1682 S2 a_T = (TYPE) a_t;
1683 '--> a_it = (interm_type) a_t;
1684 S3 res_T = a_T << CONST;
1685 '--> res_T' = a_it <<* CONST;
1689 * STMTS: Contains a stmt from which the pattern search begins.
1690 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1691 in STMTS. When an intermediate type is used and a pattern statement is
1692 created for S2, we also put S2 here (before S3).
1696 * TYPE_IN: The type of the input arguments to the pattern.
1698 * TYPE_OUT: The type of the output of this pattern.
1700 * Return value: A new stmt that will be used to replace the sequence of
1701 stmts that constitute the pattern. In this case it will be:
1702 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1705 vect_recog_widen_shift_pattern (vec
<gimple
> *stmts
,
1706 tree
*type_in
, tree
*type_out
)
1708 gimple last_stmt
= stmts
->pop ();
1710 tree oprnd0
, oprnd1
;
1711 tree type
, half_type0
;
1712 gimple pattern_stmt
;
1713 tree vectype
, vectype_out
= NULL_TREE
;
1715 enum tree_code dummy_code
;
1717 vec
<tree
> dummy_vec
;
1721 if (!is_gimple_assign (last_stmt
) || !vinfo_for_stmt (last_stmt
))
1724 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt
)))
1727 if (gimple_assign_rhs_code (last_stmt
) != LSHIFT_EXPR
)
1730 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1731 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1732 if (TREE_CODE (oprnd0
) != SSA_NAME
|| TREE_CODE (oprnd1
) != INTEGER_CST
)
1735 /* Check operand 0: it has to be defined by a type promotion. */
1736 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
1741 /* Check operand 1: has to be positive. We check that it fits the type
1742 in vect_handle_widen_op_by_const (). */
1743 if (tree_int_cst_compare (oprnd1
, size_zero_node
) <= 0)
1746 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
1747 type
= gimple_expr_type (last_stmt
);
1749 /* Check for subsequent conversion to another type. */
1750 use_stmt
= vect_single_imm_use (last_stmt
);
1751 if (use_stmt
&& is_gimple_assign (use_stmt
)
1752 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
))
1753 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
1755 tree use_lhs
= gimple_assign_lhs (use_stmt
);
1756 tree use_type
= TREE_TYPE (use_lhs
);
1758 if (INTEGRAL_TYPE_P (use_type
)
1759 && TYPE_PRECISION (use_type
) <= TYPE_PRECISION (type
))
1761 last_stmt
= use_stmt
;
1766 /* Check if this a widening operation. */
1767 if (!vect_handle_widen_op_by_const (last_stmt
, LSHIFT_EXPR
, oprnd1
,
1769 type
, &half_type0
, def_stmt0
))
1772 /* Pattern detected. */
1773 if (dump_enabled_p ())
1774 dump_printf_loc (MSG_NOTE
, vect_location
,
1775 "vect_recog_widen_shift_pattern: detected:\n");
1777 /* Check target support. */
1778 vectype
= get_vectype_for_scalar_type (half_type0
);
1779 vectype_out
= get_vectype_for_scalar_type (type
);
1783 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR
, last_stmt
,
1784 vectype_out
, vectype
,
1785 &dummy_code
, &dummy_code
,
1786 &dummy_int
, &dummy_vec
))
1790 *type_out
= vectype_out
;
1792 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1793 var
= vect_recog_temp_ssa_var (type
, NULL
);
1795 gimple_build_assign (var
, WIDEN_LSHIFT_EXPR
, oprnd0
, oprnd1
);
1797 if (dump_enabled_p ())
1798 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1800 stmts
->safe_push (last_stmt
);
1801 return pattern_stmt
;
1804 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1808 S0 a_t = b_t r<< c_t;
1812 * STMTS: Contains a stmt from which the pattern search begins,
1813 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1817 S2 e_t = d_t & (B - 1);
1818 S3 f_t = b_t << c_t;
1819 S4 g_t = b_t >> e_t;
1822 where B is element bitsize of type.
1826 * TYPE_IN: The type of the input arguments to the pattern.
1828 * TYPE_OUT: The type of the output of this pattern.
1830 * Return value: A new stmt that will be used to replace the rotate
1834 vect_recog_rotate_pattern (vec
<gimple
> *stmts
, tree
*type_in
, tree
*type_out
)
1836 gimple last_stmt
= stmts
->pop ();
1837 tree oprnd0
, oprnd1
, lhs
, var
, var1
, var2
, vectype
, type
, stype
, def
, def2
;
1838 gimple pattern_stmt
, def_stmt
;
1839 enum tree_code rhs_code
;
1840 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1841 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1842 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1843 enum vect_def_type dt
;
1844 optab optab1
, optab2
;
1845 edge ext_def
= NULL
;
1847 if (!is_gimple_assign (last_stmt
))
1850 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1860 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
1863 lhs
= gimple_assign_lhs (last_stmt
);
1864 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1865 type
= TREE_TYPE (oprnd0
);
1866 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1867 if (TREE_CODE (oprnd0
) != SSA_NAME
1868 || TYPE_PRECISION (TREE_TYPE (lhs
)) != TYPE_PRECISION (type
)
1869 || !INTEGRAL_TYPE_P (type
)
1870 || !TYPE_UNSIGNED (type
))
1873 if (!vect_is_simple_use (oprnd1
, last_stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
1877 if (dt
!= vect_internal_def
1878 && dt
!= vect_constant_def
1879 && dt
!= vect_external_def
)
1882 vectype
= get_vectype_for_scalar_type (type
);
1883 if (vectype
== NULL_TREE
)
1886 /* If vector/vector or vector/scalar rotate is supported by the target,
1887 don't do anything here. */
1888 optab1
= optab_for_tree_code (rhs_code
, vectype
, optab_vector
);
1890 && optab_handler (optab1
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1893 if (bb_vinfo
!= NULL
|| dt
!= vect_internal_def
)
1895 optab2
= optab_for_tree_code (rhs_code
, vectype
, optab_scalar
);
1897 && optab_handler (optab2
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1901 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1902 don't do anything here either. */
1903 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_vector
);
1904 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_vector
);
1906 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1908 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1910 if (bb_vinfo
== NULL
&& dt
== vect_internal_def
)
1912 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_scalar
);
1913 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_scalar
);
1915 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1917 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1922 *type_out
= vectype
;
1923 if (*type_in
== NULL_TREE
)
1926 if (dt
== vect_external_def
1927 && TREE_CODE (oprnd1
) == SSA_NAME
1930 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1931 ext_def
= loop_preheader_edge (loop
);
1932 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1
))
1934 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (oprnd1
));
1936 || !dominated_by_p (CDI_DOMINATORS
, ext_def
->dest
, bb
))
1942 if (TREE_CODE (oprnd1
) == INTEGER_CST
1943 || TYPE_MODE (TREE_TYPE (oprnd1
)) == TYPE_MODE (type
))
1945 else if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
1947 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
1948 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (type
)
1949 && TYPE_PRECISION (TREE_TYPE (rhs1
))
1950 == TYPE_PRECISION (type
))
1954 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
1955 if (def
== NULL_TREE
)
1957 def
= vect_recog_temp_ssa_var (type
, NULL
);
1958 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
1962 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1963 gcc_assert (!new_bb
);
1966 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1968 stype
= TREE_TYPE (def
);
1970 if (TREE_CODE (def
) == INTEGER_CST
)
1972 if (!tree_fits_uhwi_p (def
)
1973 || tree_to_uhwi (def
) >= GET_MODE_PRECISION (TYPE_MODE (type
))
1974 || integer_zerop (def
))
1976 def2
= build_int_cst (stype
,
1977 GET_MODE_PRECISION (TYPE_MODE (type
))
1978 - tree_to_uhwi (def
));
1982 tree vecstype
= get_vectype_for_scalar_type (stype
);
1983 stmt_vec_info def_stmt_vinfo
;
1985 if (vecstype
== NULL_TREE
)
1987 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1988 def_stmt
= gimple_build_assign (def2
, NEGATE_EXPR
, def
);
1992 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1993 gcc_assert (!new_bb
);
1997 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
1998 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1999 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
2000 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2003 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
2005 = build_int_cst (stype
, GET_MODE_PRECISION (TYPE_MODE (stype
)) - 1);
2006 def_stmt
= gimple_build_assign (def2
, BIT_AND_EXPR
,
2007 gimple_assign_lhs (def_stmt
), mask
);
2011 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
2012 gcc_assert (!new_bb
);
2016 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2017 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2018 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
2019 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2023 var1
= vect_recog_temp_ssa_var (type
, NULL
);
2024 def_stmt
= gimple_build_assign (var1
, rhs_code
== LROTATE_EXPR
2025 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
2027 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2029 var2
= vect_recog_temp_ssa_var (type
, NULL
);
2030 def_stmt
= gimple_build_assign (var2
, rhs_code
== LROTATE_EXPR
2031 ? RSHIFT_EXPR
: LSHIFT_EXPR
,
2033 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2035 /* Pattern detected. */
2036 if (dump_enabled_p ())
2037 dump_printf_loc (MSG_NOTE
, vect_location
,
2038 "vect_recog_rotate_pattern: detected:\n");
2040 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2041 var
= vect_recog_temp_ssa_var (type
, NULL
);
2042 pattern_stmt
= gimple_build_assign (var
, BIT_IOR_EXPR
, var1
, var2
);
2044 if (dump_enabled_p ())
2045 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
2047 stmts
->safe_push (last_stmt
);
2048 return pattern_stmt
;
2051 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2059 S3 res_T = b_T op a_t;
2061 where type 'TYPE' is a type with different size than 'type',
2062 and op is <<, >> or rotate.
2067 TYPE b_T, c_T, res_T;
2070 S1 a_t = (type) c_T;
2072 S3 res_T = b_T op a_t;
2076 * STMTS: Contains a stmt from which the pattern search begins,
2077 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2078 with a shift/rotate which has same type on both operands, in the
2079 second case just b_T op c_T, in the first case with added cast
2080 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2084 * TYPE_IN: The type of the input arguments to the pattern.
2086 * TYPE_OUT: The type of the output of this pattern.
2088 * Return value: A new stmt that will be used to replace the shift/rotate
2092 vect_recog_vector_vector_shift_pattern (vec
<gimple
> *stmts
,
2093 tree
*type_in
, tree
*type_out
)
2095 gimple last_stmt
= stmts
->pop ();
2096 tree oprnd0
, oprnd1
, lhs
, var
;
2097 gimple pattern_stmt
, def_stmt
;
2098 enum tree_code rhs_code
;
2099 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2100 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2101 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2102 enum vect_def_type dt
;
2105 if (!is_gimple_assign (last_stmt
))
2108 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2120 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2123 lhs
= gimple_assign_lhs (last_stmt
);
2124 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2125 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2126 if (TREE_CODE (oprnd0
) != SSA_NAME
2127 || TREE_CODE (oprnd1
) != SSA_NAME
2128 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
2129 || TYPE_PRECISION (TREE_TYPE (oprnd1
))
2130 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1
)))
2131 || TYPE_PRECISION (TREE_TYPE (lhs
))
2132 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2135 if (!vect_is_simple_use (oprnd1
, last_stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
2139 if (dt
!= vect_internal_def
)
2142 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (oprnd0
));
2143 *type_out
= *type_in
;
2144 if (*type_in
== NULL_TREE
)
2148 if (gimple_assign_cast_p (def_stmt
))
2150 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
2151 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
2152 && TYPE_PRECISION (TREE_TYPE (rhs1
))
2153 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2157 if (def
== NULL_TREE
)
2159 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2160 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
2161 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2164 /* Pattern detected. */
2165 if (dump_enabled_p ())
2166 dump_printf_loc (MSG_NOTE
, vect_location
,
2167 "vect_recog_vector_vector_shift_pattern: detected:\n");
2169 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2170 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2171 pattern_stmt
= gimple_build_assign (var
, rhs_code
, oprnd0
, def
);
2173 if (dump_enabled_p ())
2174 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
2176 stmts
->safe_push (last_stmt
);
2177 return pattern_stmt
;
2180 /* Detect a signed division by a constant that wouldn't be
2181 otherwise vectorized:
2187 where type 'type' is an integral type and N is a constant.
2189 Similarly handle modulo by a constant:
2195 * STMTS: Contains a stmt from which the pattern search begins,
2196 i.e. the division stmt. S1 is replaced by if N is a power
2197 of two constant and type is signed:
2198 S3 y_t = b_t < 0 ? N - 1 : 0;
2200 S1' a_t = x_t >> log2 (N);
2202 S4 is replaced if N is a power of two constant and
2203 type is signed by (where *_T temporaries have unsigned type):
2204 S9 y_T = b_t < 0 ? -1U : 0U;
2205 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2206 S7 z_t = (type) z_T;
2208 S5 x_t = w_t & (N - 1);
2209 S4' a_t = x_t - z_t;
2213 * TYPE_IN: The type of the input arguments to the pattern.
2215 * TYPE_OUT: The type of the output of this pattern.
2217 * Return value: A new stmt that will be used to replace the division
2218 S1 or modulo S4 stmt. */
2221 vect_recog_divmod_pattern (vec
<gimple
> *stmts
,
2222 tree
*type_in
, tree
*type_out
)
2224 gimple last_stmt
= stmts
->pop ();
2225 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
2226 gimple pattern_stmt
, def_stmt
;
2227 enum tree_code rhs_code
;
2228 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2229 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2230 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2233 int dummy_int
, prec
;
2234 stmt_vec_info def_stmt_vinfo
;
2236 if (!is_gimple_assign (last_stmt
))
2239 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2242 case TRUNC_DIV_EXPR
:
2243 case TRUNC_MOD_EXPR
:
2249 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2252 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2253 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2254 itype
= TREE_TYPE (oprnd0
);
2255 if (TREE_CODE (oprnd0
) != SSA_NAME
2256 || TREE_CODE (oprnd1
) != INTEGER_CST
2257 || TREE_CODE (itype
) != INTEGER_TYPE
2258 || TYPE_PRECISION (itype
) != GET_MODE_PRECISION (TYPE_MODE (itype
)))
2261 vectype
= get_vectype_for_scalar_type (itype
);
2262 if (vectype
== NULL_TREE
)
2265 /* If the target can handle vectorized division or modulo natively,
2266 don't attempt to optimize this. */
2267 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
2268 if (optab
!= unknown_optab
)
2270 machine_mode vec_mode
= TYPE_MODE (vectype
);
2271 int icode
= (int) optab_handler (optab
, vec_mode
);
2272 if (icode
!= CODE_FOR_nothing
)
2276 prec
= TYPE_PRECISION (itype
);
2277 if (integer_pow2p (oprnd1
))
2279 if (TYPE_UNSIGNED (itype
) || tree_int_cst_sgn (oprnd1
) != 1)
2282 /* Pattern detected. */
2283 if (dump_enabled_p ())
2284 dump_printf_loc (MSG_NOTE
, vect_location
,
2285 "vect_recog_divmod_pattern: detected:\n");
2287 cond
= build2 (LT_EXPR
, boolean_type_node
, oprnd0
,
2288 build_int_cst (itype
, 0));
2289 if (rhs_code
== TRUNC_DIV_EXPR
)
2291 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
2294 = gimple_build_assign (var
, COND_EXPR
, cond
,
2295 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2296 build_int_cst (itype
, 1)),
2297 build_int_cst (itype
, 0));
2298 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2299 var
= vect_recog_temp_ssa_var (itype
, NULL
);
2301 = gimple_build_assign (var
, PLUS_EXPR
, oprnd0
,
2302 gimple_assign_lhs (def_stmt
));
2303 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2305 shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
2307 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2308 RSHIFT_EXPR
, var
, shift
);
2313 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2314 if (compare_tree_int (oprnd1
, 2) == 0)
2316 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2317 def_stmt
= gimple_build_assign (signmask
, COND_EXPR
, cond
,
2318 build_int_cst (itype
, 1),
2319 build_int_cst (itype
, 0));
2320 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2325 = build_nonstandard_integer_type (prec
, 1);
2326 tree vecutype
= get_vectype_for_scalar_type (utype
);
2328 = build_int_cst (utype
, GET_MODE_BITSIZE (TYPE_MODE (itype
))
2329 - tree_log2 (oprnd1
));
2330 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
2332 def_stmt
= gimple_build_assign (var
, COND_EXPR
, cond
,
2333 build_int_cst (utype
, -1),
2334 build_int_cst (utype
, 0));
2336 = new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2337 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2338 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2339 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2340 var
= vect_recog_temp_ssa_var (utype
, NULL
);
2341 def_stmt
= gimple_build_assign (var
, RSHIFT_EXPR
,
2342 gimple_assign_lhs (def_stmt
),
2345 = new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2346 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2347 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2348 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2349 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2351 = gimple_build_assign (signmask
, NOP_EXPR
, var
);
2352 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2355 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2356 PLUS_EXPR
, oprnd0
, signmask
);
2357 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2359 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2360 BIT_AND_EXPR
, gimple_assign_lhs (def_stmt
),
2361 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2362 build_int_cst (itype
, 1)));
2363 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2366 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2367 MINUS_EXPR
, gimple_assign_lhs (def_stmt
),
2371 if (dump_enabled_p ())
2372 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
,
2375 stmts
->safe_push (last_stmt
);
2378 *type_out
= vectype
;
2379 return pattern_stmt
;
2382 if (prec
> HOST_BITS_PER_WIDE_INT
2383 || integer_zerop (oprnd1
))
2386 if (!can_mult_highpart_p (TYPE_MODE (vectype
), TYPE_UNSIGNED (itype
)))
2389 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2391 if (TYPE_UNSIGNED (itype
))
2393 unsigned HOST_WIDE_INT mh
, ml
;
2394 int pre_shift
, post_shift
;
2395 unsigned HOST_WIDE_INT d
= (TREE_INT_CST_LOW (oprnd1
)
2396 & GET_MODE_MASK (TYPE_MODE (itype
)));
2397 tree t1
, t2
, t3
, t4
;
2399 if (d
>= ((unsigned HOST_WIDE_INT
) 1 << (prec
- 1)))
2400 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2403 /* Find a suitable multiplier and right shift count
2404 instead of multiplying with D. */
2405 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
2407 /* If the suggested multiplier is more than SIZE bits, we can do better
2408 for even divisors, using an initial right shift. */
2409 if (mh
!= 0 && (d
& 1) == 0)
2411 pre_shift
= floor_log2 (d
& -d
);
2412 mh
= choose_multiplier (d
>> pre_shift
, prec
, prec
- pre_shift
,
2413 &ml
, &post_shift
, &dummy_int
);
2421 if (post_shift
- 1 >= prec
)
2424 /* t1 = oprnd0 h* ml;
2428 q = t4 >> (post_shift - 1); */
2429 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2430 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2431 build_int_cst (itype
, ml
));
2432 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2434 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2436 = gimple_build_assign (t2
, MINUS_EXPR
, oprnd0
, t1
);
2437 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2439 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2441 = gimple_build_assign (t3
, RSHIFT_EXPR
, t2
, integer_one_node
);
2442 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2444 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2446 = gimple_build_assign (t4
, PLUS_EXPR
, t1
, t3
);
2448 if (post_shift
!= 1)
2450 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2452 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2454 = gimple_build_assign (q
, RSHIFT_EXPR
, t4
,
2455 build_int_cst (itype
, post_shift
- 1));
2460 pattern_stmt
= def_stmt
;
2465 if (pre_shift
>= prec
|| post_shift
>= prec
)
2468 /* t1 = oprnd0 >> pre_shift;
2470 q = t2 >> post_shift; */
2473 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2475 = gimple_build_assign (t1
, RSHIFT_EXPR
, oprnd0
,
2476 build_int_cst (NULL
, pre_shift
));
2477 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2482 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2483 def_stmt
= gimple_build_assign (t2
, MULT_HIGHPART_EXPR
, t1
,
2484 build_int_cst (itype
, ml
));
2488 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2490 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2492 = gimple_build_assign (q
, RSHIFT_EXPR
, t2
,
2493 build_int_cst (itype
, post_shift
));
2498 pattern_stmt
= def_stmt
;
2503 unsigned HOST_WIDE_INT ml
;
2505 HOST_WIDE_INT d
= TREE_INT_CST_LOW (oprnd1
);
2506 unsigned HOST_WIDE_INT abs_d
;
2508 tree t1
, t2
, t3
, t4
;
2510 /* Give up for -1. */
2514 /* Since d might be INT_MIN, we have to cast to
2515 unsigned HOST_WIDE_INT before negating to avoid
2516 undefined signed overflow. */
2518 ? (unsigned HOST_WIDE_INT
) d
2519 : - (unsigned HOST_WIDE_INT
) d
);
2521 /* n rem d = n rem -d */
2522 if (rhs_code
== TRUNC_MOD_EXPR
&& d
< 0)
2525 oprnd1
= build_int_cst (itype
, abs_d
);
2527 else if (HOST_BITS_PER_WIDE_INT
>= prec
2528 && abs_d
== (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2529 /* This case is not handled correctly below. */
2532 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
, &post_shift
, &dummy_int
);
2533 if (ml
>= (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2536 ml
|= (~(unsigned HOST_WIDE_INT
) 0) << (prec
- 1);
2538 if (post_shift
>= prec
)
2541 /* t1 = oprnd0 h* ml; */
2542 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2543 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2544 build_int_cst (itype
, ml
));
2548 /* t2 = t1 + oprnd0; */
2549 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2550 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2551 def_stmt
= gimple_build_assign (t2
, PLUS_EXPR
, t1
, oprnd0
);
2558 /* t3 = t2 >> post_shift; */
2559 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2560 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2561 def_stmt
= gimple_build_assign (t3
, RSHIFT_EXPR
, t2
,
2562 build_int_cst (itype
, post_shift
));
2567 wide_int oprnd0_min
, oprnd0_max
;
2569 if (get_range_info (oprnd0
, &oprnd0_min
, &oprnd0_max
) == VR_RANGE
)
2571 if (!wi::neg_p (oprnd0_min
, TYPE_SIGN (itype
)))
2573 else if (wi::neg_p (oprnd0_max
, TYPE_SIGN (itype
)))
2577 if (msb
== 0 && d
>= 0)
2581 pattern_stmt
= def_stmt
;
2585 /* t4 = oprnd0 >> (prec - 1);
2586 or if we know from VRP that oprnd0 >= 0
2588 or if we know from VRP that oprnd0 < 0
2590 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2591 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2593 def_stmt
= gimple_build_assign (t4
, INTEGER_CST
,
2594 build_int_cst (itype
, msb
));
2596 def_stmt
= gimple_build_assign (t4
, RSHIFT_EXPR
, oprnd0
,
2597 build_int_cst (itype
, prec
- 1));
2598 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2600 /* q = t3 - t4; or q = t4 - t3; */
2601 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2602 pattern_stmt
= gimple_build_assign (q
, MINUS_EXPR
, d
< 0 ? t4
: t3
,
2607 if (rhs_code
== TRUNC_MOD_EXPR
)
2611 /* We divided. Now finish by:
2614 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2616 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2617 def_stmt
= gimple_build_assign (t1
, MULT_EXPR
, q
, oprnd1
);
2618 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2620 r
= vect_recog_temp_ssa_var (itype
, NULL
);
2621 pattern_stmt
= gimple_build_assign (r
, MINUS_EXPR
, oprnd0
, t1
);
2624 /* Pattern detected. */
2625 if (dump_enabled_p ())
2627 dump_printf_loc (MSG_NOTE
, vect_location
,
2628 "vect_recog_divmod_pattern: detected: ");
2629 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
2630 dump_printf (MSG_NOTE
, "\n");
2633 stmts
->safe_push (last_stmt
);
2636 *type_out
= vectype
;
2637 return pattern_stmt
;
2640 /* Function vect_recog_mixed_size_cond_pattern
2642 Try to find the following pattern:
2647 S1 a_T = x_t CMP y_t ? b_T : c_T;
2649 where type 'TYPE' is an integral type which has different size
2650 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2651 than 'type', the constants need to fit into an integer type
2652 with the same width as 'type') or results of conversion from 'type'.
2656 * LAST_STMT: A stmt from which the pattern search begins.
2660 * TYPE_IN: The type of the input arguments to the pattern.
2662 * TYPE_OUT: The type of the output of this pattern.
2664 * Return value: A new stmt that will be used to replace the pattern.
2665 Additionally a def_stmt is added.
2667 a_it = x_t CMP y_t ? b_it : c_it;
2668 a_T = (TYPE) a_it; */
2671 vect_recog_mixed_size_cond_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
2674 gimple last_stmt
= (*stmts
)[0];
2675 tree cond_expr
, then_clause
, else_clause
;
2676 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
), def_stmt_info
;
2677 tree type
, vectype
, comp_vectype
, itype
= NULL_TREE
, vecitype
;
2678 machine_mode cmpmode
;
2679 gimple pattern_stmt
, def_stmt
;
2680 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2681 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2682 tree orig_type0
= NULL_TREE
, orig_type1
= NULL_TREE
;
2683 gimple def_stmt0
= NULL
, def_stmt1
= NULL
;
2685 tree comp_scalar_type
;
2687 if (!is_gimple_assign (last_stmt
)
2688 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
2689 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
2692 cond_expr
= gimple_assign_rhs1 (last_stmt
);
2693 then_clause
= gimple_assign_rhs2 (last_stmt
);
2694 else_clause
= gimple_assign_rhs3 (last_stmt
);
2696 if (!COMPARISON_CLASS_P (cond_expr
))
2699 comp_scalar_type
= TREE_TYPE (TREE_OPERAND (cond_expr
, 0));
2700 comp_vectype
= get_vectype_for_scalar_type (comp_scalar_type
);
2701 if (comp_vectype
== NULL_TREE
)
2704 type
= gimple_expr_type (last_stmt
);
2705 if (types_compatible_p (type
, comp_scalar_type
)
2706 || ((TREE_CODE (then_clause
) != INTEGER_CST
2707 || TREE_CODE (else_clause
) != INTEGER_CST
)
2708 && !INTEGRAL_TYPE_P (comp_scalar_type
))
2709 || !INTEGRAL_TYPE_P (type
))
2712 if ((TREE_CODE (then_clause
) != INTEGER_CST
2713 && !type_conversion_p (then_clause
, last_stmt
, false, &orig_type0
,
2714 &def_stmt0
, &promotion
))
2715 || (TREE_CODE (else_clause
) != INTEGER_CST
2716 && !type_conversion_p (else_clause
, last_stmt
, false, &orig_type1
,
2717 &def_stmt1
, &promotion
)))
2720 if (orig_type0
&& orig_type1
2721 && !types_compatible_p (orig_type0
, orig_type1
))
2726 if (!types_compatible_p (orig_type0
, comp_scalar_type
))
2728 then_clause
= gimple_assign_rhs1 (def_stmt0
);
2734 if (!types_compatible_p (orig_type1
, comp_scalar_type
))
2736 else_clause
= gimple_assign_rhs1 (def_stmt1
);
2740 cmpmode
= GET_MODE_INNER (TYPE_MODE (comp_vectype
));
2742 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) == GET_MODE_BITSIZE (cmpmode
))
2745 vectype
= get_vectype_for_scalar_type (type
);
2746 if (vectype
== NULL_TREE
)
2749 if (expand_vec_cond_expr_p (vectype
, comp_vectype
))
2752 if (itype
== NULL_TREE
)
2753 itype
= build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode
),
2754 TYPE_UNSIGNED (type
));
2756 if (itype
== NULL_TREE
2757 || GET_MODE_BITSIZE (TYPE_MODE (itype
)) != GET_MODE_BITSIZE (cmpmode
))
2760 vecitype
= get_vectype_for_scalar_type (itype
);
2761 if (vecitype
== NULL_TREE
)
2764 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
))
2767 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) > GET_MODE_BITSIZE (cmpmode
))
2769 if ((TREE_CODE (then_clause
) == INTEGER_CST
2770 && !int_fits_type_p (then_clause
, itype
))
2771 || (TREE_CODE (else_clause
) == INTEGER_CST
2772 && !int_fits_type_p (else_clause
, itype
)))
2776 def_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2777 COND_EXPR
, unshare_expr (cond_expr
),
2778 fold_convert (itype
, then_clause
),
2779 fold_convert (itype
, else_clause
));
2780 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
2781 NOP_EXPR
, gimple_assign_lhs (def_stmt
));
2783 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2784 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2785 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
2786 STMT_VINFO_VECTYPE (def_stmt_info
) = vecitype
;
2787 *type_in
= vecitype
;
2788 *type_out
= vectype
;
2790 if (dump_enabled_p ())
2791 dump_printf_loc (MSG_NOTE
, vect_location
,
2792 "vect_recog_mixed_size_cond_pattern: detected:\n");
2794 return pattern_stmt
;
2798 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2799 true if bool VAR can be optimized that way. */
2802 check_bool_pattern (tree var
, loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
2805 enum vect_def_type dt
;
2807 enum tree_code rhs_code
;
2809 if (!vect_is_simple_use (var
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
, &def
,
2813 if (dt
!= vect_internal_def
)
2816 if (!is_gimple_assign (def_stmt
))
2819 if (!has_single_use (def
))
2822 rhs1
= gimple_assign_rhs1 (def_stmt
);
2823 rhs_code
= gimple_assign_rhs_code (def_stmt
);
2827 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2830 if ((TYPE_PRECISION (TREE_TYPE (rhs1
)) != 1
2831 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
2832 && TREE_CODE (TREE_TYPE (rhs1
)) != BOOLEAN_TYPE
)
2834 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2837 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2842 if (!check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
))
2844 return check_bool_pattern (gimple_assign_rhs2 (def_stmt
), loop_vinfo
,
2848 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
2850 tree vecitype
, comp_vectype
;
2852 /* If the comparison can throw, then is_gimple_condexpr will be
2853 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2854 if (stmt_could_throw_p (def_stmt
))
2857 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
2858 if (comp_vectype
== NULL_TREE
)
2861 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
2863 machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
2865 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
2866 vecitype
= get_vectype_for_scalar_type (itype
);
2867 if (vecitype
== NULL_TREE
)
2871 vecitype
= comp_vectype
;
2872 return expand_vec_cond_expr_p (vecitype
, comp_vectype
);
2879 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2880 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2881 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2884 adjust_bool_pattern_cast (tree type
, tree var
)
2886 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (SSA_NAME_DEF_STMT (var
));
2887 gimple cast_stmt
, pattern_stmt
;
2889 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
));
2890 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
2891 new_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2892 cast_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
2893 NOP_EXPR
, gimple_assign_lhs (pattern_stmt
));
2894 STMT_VINFO_RELATED_STMT (stmt_vinfo
) = cast_stmt
;
2895 return gimple_assign_lhs (cast_stmt
);
2899 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2900 recursively. VAR is an SSA_NAME that should be transformed from bool
2901 to a wider integer type, OUT_TYPE is the desired final integer type of
2902 the whole pattern, TRUEVAL should be NULL unless optimizing
2903 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2904 in the then_clause, STMTS is where statements with added pattern stmts
2905 should be pushed to. */
2908 adjust_bool_pattern (tree var
, tree out_type
, tree trueval
,
2911 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2912 enum tree_code rhs_code
, def_rhs_code
;
2913 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
2915 gimple pattern_stmt
, def_stmt
;
2917 rhs1
= gimple_assign_rhs1 (stmt
);
2918 rhs2
= gimple_assign_rhs2 (stmt
);
2919 rhs_code
= gimple_assign_rhs_code (stmt
);
2920 loc
= gimple_location (stmt
);
2925 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2926 itype
= TREE_TYPE (irhs1
);
2928 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2933 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2934 itype
= TREE_TYPE (irhs1
);
2936 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2937 BIT_XOR_EXPR
, irhs1
, build_int_cst (itype
, 1));
2941 /* Try to optimize x = y & (a < b ? 1 : 0); into
2942 x = (a < b ? y : 0);
2948 S1 a_b = x1 CMP1 y1;
2949 S2 b_b = x2 CMP2 y2;
2951 S4 d_T = (TYPE) c_b;
2953 we would normally emit:
2955 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2956 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2957 S3' c_T = a_T & b_T;
2960 but we can save one stmt by using the
2961 result of one of the COND_EXPRs in the other COND_EXPR and leave
2962 BIT_AND_EXPR stmt out:
2964 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2965 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2968 At least when VEC_COND_EXPR is implemented using masks
2969 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2970 computes the comparison masks and ands it, in one case with
2971 all ones vector, in the other case with a vector register.
2972 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2973 often more expensive. */
2974 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
2975 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
2976 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
2978 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
2979 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2980 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
2981 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
2984 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
2985 irhs2
= adjust_bool_pattern (rhs2
, out_type
, irhs1
, stmts
);
2986 tstmt
= stmts
->pop ();
2987 gcc_assert (tstmt
== def_stmt
);
2988 stmts
->quick_push (stmt
);
2989 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
2990 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
2991 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
2992 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
2996 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
2999 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
3000 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3001 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3003 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3004 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
3005 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
3006 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
3009 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
3010 irhs1
= adjust_bool_pattern (rhs1
, out_type
, irhs2
, stmts
);
3011 tstmt
= stmts
->pop ();
3012 gcc_assert (tstmt
== def_stmt
);
3013 stmts
->quick_push (stmt
);
3014 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
3015 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
3016 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
3017 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
3021 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3027 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3028 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
3030 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3031 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
3033 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
3034 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
3035 int out_prec
= TYPE_PRECISION (out_type
);
3036 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
3037 irhs2
= adjust_bool_pattern_cast (TREE_TYPE (irhs1
), rhs2
);
3038 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
3039 irhs1
= adjust_bool_pattern_cast (TREE_TYPE (irhs2
), rhs1
);
3042 irhs1
= adjust_bool_pattern_cast (out_type
, rhs1
);
3043 irhs2
= adjust_bool_pattern_cast (out_type
, rhs2
);
3046 itype
= TREE_TYPE (irhs1
);
3048 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3049 rhs_code
, irhs1
, irhs2
);
3053 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
3054 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
3055 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
))
3056 || (TYPE_PRECISION (TREE_TYPE (rhs1
))
3057 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1
)))))
3059 machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
3061 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3064 itype
= TREE_TYPE (rhs1
);
3065 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
3066 if (trueval
== NULL_TREE
)
3067 trueval
= build_int_cst (itype
, 1);
3069 gcc_checking_assert (useless_type_conversion_p (itype
,
3070 TREE_TYPE (trueval
)));
3072 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3073 COND_EXPR
, cond_expr
, trueval
,
3074 build_int_cst (itype
, 0));
3078 stmts
->safe_push (stmt
);
3079 gimple_set_location (pattern_stmt
, loc
);
3080 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
3081 return gimple_assign_lhs (pattern_stmt
);
3085 /* Function vect_recog_bool_pattern
3087 Try to find pattern like following:
3089 bool a_b, b_b, c_b, d_b, e_b;
3092 S1 a_b = x1 CMP1 y1;
3093 S2 b_b = x2 CMP2 y2;
3095 S4 d_b = x3 CMP3 y3;
3097 S6 f_T = (TYPE) e_b;
3099 where type 'TYPE' is an integral type. Or a similar pattern
3102 S6 f_Y = e_b ? r_Y : s_Y;
3104 as results from if-conversion of a complex condition.
3108 * LAST_STMT: A stmt at the end from which the pattern
3109 search begins, i.e. cast of a bool to
3114 * TYPE_IN: The type of the input arguments to the pattern.
3116 * TYPE_OUT: The type of the output of this pattern.
3118 * Return value: A new stmt that will be used to replace the pattern.
3120 Assuming size of TYPE is the same as size of all comparisons
3121 (otherwise some casts would be added where needed), the above
3122 sequence we create related pattern stmts:
3123 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3124 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3125 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3126 S5' e_T = c_T | d_T;
3129 Instead of the above S3' we could emit:
3130 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3131 S3' c_T = a_T | b_T;
3132 but the above is more efficient. */
3135 vect_recog_bool_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
3138 gimple last_stmt
= stmts
->pop ();
3139 enum tree_code rhs_code
;
3140 tree var
, lhs
, rhs
, vectype
;
3141 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
3142 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
3143 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
3144 gimple pattern_stmt
;
3146 if (!is_gimple_assign (last_stmt
))
3149 var
= gimple_assign_rhs1 (last_stmt
);
3150 lhs
= gimple_assign_lhs (last_stmt
);
3152 if ((TYPE_PRECISION (TREE_TYPE (var
)) != 1
3153 || !TYPE_UNSIGNED (TREE_TYPE (var
)))
3154 && TREE_CODE (TREE_TYPE (var
)) != BOOLEAN_TYPE
)
3157 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3158 if (CONVERT_EXPR_CODE_P (rhs_code
))
3160 if (TREE_CODE (TREE_TYPE (lhs
)) != INTEGER_TYPE
3161 || TYPE_PRECISION (TREE_TYPE (lhs
)) == 1)
3163 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3164 if (vectype
== NULL_TREE
)
3167 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
3170 rhs
= adjust_bool_pattern (var
, TREE_TYPE (lhs
), NULL_TREE
, stmts
);
3171 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3172 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3173 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3176 = gimple_build_assign (lhs
, NOP_EXPR
, rhs
);
3177 *type_out
= vectype
;
3179 stmts
->safe_push (last_stmt
);
3180 if (dump_enabled_p ())
3181 dump_printf_loc (MSG_NOTE
, vect_location
,
3182 "vect_recog_bool_pattern: detected:\n");
3184 return pattern_stmt
;
3186 else if (rhs_code
== COND_EXPR
3187 && TREE_CODE (var
) == SSA_NAME
)
3189 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3190 if (vectype
== NULL_TREE
)
3193 /* Build a scalar type for the boolean result that when
3194 vectorized matches the vector type of the result in
3195 size and number of elements. */
3197 = wi::udiv_trunc (TYPE_SIZE (vectype
),
3198 TYPE_VECTOR_SUBPARTS (vectype
)).to_uhwi ();
3200 = build_nonstandard_integer_type (prec
,
3201 TYPE_UNSIGNED (TREE_TYPE (var
)));
3202 if (get_vectype_for_scalar_type (type
) == NULL_TREE
)
3205 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
3208 rhs
= adjust_bool_pattern (var
, type
, NULL_TREE
, stmts
);
3209 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3211 = gimple_build_assign (lhs
, COND_EXPR
,
3212 build2 (NE_EXPR
, boolean_type_node
,
3213 rhs
, build_int_cst (type
, 0)),
3214 gimple_assign_rhs2 (last_stmt
),
3215 gimple_assign_rhs3 (last_stmt
));
3216 *type_out
= vectype
;
3218 stmts
->safe_push (last_stmt
);
3219 if (dump_enabled_p ())
3220 dump_printf_loc (MSG_NOTE
, vect_location
,
3221 "vect_recog_bool_pattern: detected:\n");
3223 return pattern_stmt
;
3225 else if (rhs_code
== SSA_NAME
3226 && STMT_VINFO_DATA_REF (stmt_vinfo
))
3228 stmt_vec_info pattern_stmt_info
;
3229 vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
3230 gcc_assert (vectype
!= NULL_TREE
);
3231 if (!VECTOR_MODE_P (TYPE_MODE (vectype
)))
3233 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
3236 rhs
= adjust_bool_pattern (var
, TREE_TYPE (vectype
), NULL_TREE
, stmts
);
3237 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
3238 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3240 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3241 gimple cast_stmt
= gimple_build_assign (rhs2
, NOP_EXPR
, rhs
);
3242 new_pattern_def_seq (stmt_vinfo
, cast_stmt
);
3245 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3246 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
,
3248 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3249 STMT_VINFO_DATA_REF (pattern_stmt_info
)
3250 = STMT_VINFO_DATA_REF (stmt_vinfo
);
3251 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info
)
3252 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo
);
3253 STMT_VINFO_DR_INIT (pattern_stmt_info
) = STMT_VINFO_DR_INIT (stmt_vinfo
);
3254 STMT_VINFO_DR_OFFSET (pattern_stmt_info
)
3255 = STMT_VINFO_DR_OFFSET (stmt_vinfo
);
3256 STMT_VINFO_DR_STEP (pattern_stmt_info
) = STMT_VINFO_DR_STEP (stmt_vinfo
);
3257 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info
)
3258 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo
);
3259 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo
)) = pattern_stmt
;
3260 *type_out
= vectype
;
3262 stmts
->safe_push (last_stmt
);
3263 if (dump_enabled_p ())
3264 dump_printf_loc (MSG_NOTE
, vect_location
,
3265 "vect_recog_bool_pattern: detected:\n");
3266 return pattern_stmt
;
3273 /* Mark statements that are involved in a pattern. */
3276 vect_mark_pattern_stmts (gimple orig_stmt
, gimple pattern_stmt
,
3277 tree pattern_vectype
)
3279 stmt_vec_info pattern_stmt_info
, def_stmt_info
;
3280 stmt_vec_info orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
3281 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (orig_stmt_info
);
3282 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (orig_stmt_info
);
3285 pattern_stmt_info
= vinfo_for_stmt (pattern_stmt
);
3286 if (pattern_stmt_info
== NULL
)
3288 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
,
3290 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3292 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt
));
3294 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt
;
3295 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
3296 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
3297 STMT_VINFO_VECTYPE (pattern_stmt_info
) = pattern_vectype
;
3298 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
3299 STMT_VINFO_RELATED_STMT (orig_stmt_info
) = pattern_stmt
;
3300 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
)
3301 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
3302 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
))
3304 gimple_stmt_iterator si
;
3305 for (si
= gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
));
3306 !gsi_end_p (si
); gsi_next (&si
))
3308 def_stmt
= gsi_stmt (si
);
3309 def_stmt_info
= vinfo_for_stmt (def_stmt
);
3310 if (def_stmt_info
== NULL
)
3312 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
,
3314 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
3316 gimple_set_bb (def_stmt
, gimple_bb (orig_stmt
));
3317 STMT_VINFO_RELATED_STMT (def_stmt_info
) = orig_stmt
;
3318 STMT_VINFO_DEF_TYPE (def_stmt_info
) = vect_internal_def
;
3319 if (STMT_VINFO_VECTYPE (def_stmt_info
) == NULL_TREE
)
3320 STMT_VINFO_VECTYPE (def_stmt_info
) = pattern_vectype
;
3325 /* Function vect_pattern_recog_1
3328 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3329 computation pattern.
3330 STMT: A stmt from which the pattern search should start.
3332 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3333 expression that computes the same functionality and can be used to
3334 replace the sequence of stmts that are involved in the pattern.
3337 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3338 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3339 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3340 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3341 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3342 to the available target pattern.
3344 This function also does some bookkeeping, as explained in the documentation
3345 for vect_recog_pattern. */
3348 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func
,
3349 gimple_stmt_iterator si
,
3350 vec
<gimple
> *stmts_to_replace
)
3352 gimple stmt
= gsi_stmt (si
), pattern_stmt
;
3353 stmt_vec_info stmt_info
;
3354 loop_vec_info loop_vinfo
;
3355 tree pattern_vectype
;
3356 tree type_in
, type_out
;
3357 enum tree_code code
;
3361 stmts_to_replace
->truncate (0);
3362 stmts_to_replace
->quick_push (stmt
);
3363 pattern_stmt
= (* vect_recog_func
) (stmts_to_replace
, &type_in
, &type_out
);
3367 stmt
= stmts_to_replace
->last ();
3368 stmt_info
= vinfo_for_stmt (stmt
);
3369 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3371 if (VECTOR_MODE_P (TYPE_MODE (type_in
)))
3373 /* No need to check target support (already checked by the pattern
3374 recognition function). */
3375 pattern_vectype
= type_out
? type_out
: type_in
;
3379 machine_mode vec_mode
;
3380 enum insn_code icode
;
3383 /* Check target support */
3384 type_in
= get_vectype_for_scalar_type (type_in
);
3388 type_out
= get_vectype_for_scalar_type (type_out
);
3393 pattern_vectype
= type_out
;
3395 if (is_gimple_assign (pattern_stmt
))
3396 code
= gimple_assign_rhs_code (pattern_stmt
);
3399 gcc_assert (is_gimple_call (pattern_stmt
));
3403 optab
= optab_for_tree_code (code
, type_in
, optab_default
);
3404 vec_mode
= TYPE_MODE (type_in
);
3406 || (icode
= optab_handler (optab
, vec_mode
)) == CODE_FOR_nothing
3407 || (insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (type_out
)))
3411 /* Found a vectorizable pattern. */
3412 if (dump_enabled_p ())
3414 dump_printf_loc (MSG_NOTE
, vect_location
,
3415 "pattern recognized: ");
3416 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3417 dump_printf (MSG_NOTE
, "\n");
3420 /* Mark the stmts that are involved in the pattern. */
3421 vect_mark_pattern_stmts (stmt
, pattern_stmt
, pattern_vectype
);
3423 /* Patterns cannot be vectorized using SLP, because they change the order of
3426 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
, next
)
3428 LOOP_VINFO_REDUCTIONS (loop_vinfo
).ordered_remove (i
);
3430 /* It is possible that additional pattern stmts are created and inserted in
3431 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3432 relevant statements. */
3433 for (i
= 0; stmts_to_replace
->iterate (i
, &stmt
)
3434 && (unsigned) i
< (stmts_to_replace
->length () - 1);
3437 stmt_info
= vinfo_for_stmt (stmt
);
3438 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
3439 if (dump_enabled_p ())
3441 dump_printf_loc (MSG_NOTE
, vect_location
,
3442 "additional pattern stmt: ");
3443 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3444 dump_printf (MSG_NOTE
, "\n");
3447 vect_mark_pattern_stmts (stmt
, pattern_stmt
, NULL_TREE
);
3452 /* Function vect_pattern_recog
3455 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3458 Output - for each computation idiom that is detected we create a new stmt
3459 that provides the same functionality and that can be vectorized. We
3460 also record some information in the struct_stmt_info of the relevant
3461 stmts, as explained below:
3463 At the entry to this function we have the following stmts, with the
3464 following initial value in the STMT_VINFO fields:
3466 stmt in_pattern_p related_stmt vec_stmt
3467 S1: a_i = .... - - -
3468 S2: a_2 = ..use(a_i).. - - -
3469 S3: a_1 = ..use(a_2).. - - -
3470 S4: a_0 = ..use(a_1).. - - -
3471 S5: ... = ..use(a_0).. - - -
3473 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3474 represented by a single stmt. We then:
3475 - create a new stmt S6 equivalent to the pattern (the stmt is not
3476 inserted into the code)
3477 - fill in the STMT_VINFO fields as follows:
3479 in_pattern_p related_stmt vec_stmt
3480 S1: a_i = .... - - -
3481 S2: a_2 = ..use(a_i).. - - -
3482 S3: a_1 = ..use(a_2).. - - -
3483 S4: a_0 = ..use(a_1).. true S6 -
3484 '---> S6: a_new = .... - S4 -
3485 S5: ... = ..use(a_0).. - - -
3487 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3488 to each other through the RELATED_STMT field).
3490 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3491 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3492 remain irrelevant unless used by stmts other than S4.
3494 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3495 (because they are marked as irrelevant). It will vectorize S6, and record
3496 a pointer to the new vector stmt VS6 from S6 (as usual).
3497 S4 will be skipped, and S5 will be vectorized as usual:
3499 in_pattern_p related_stmt vec_stmt
3500 S1: a_i = .... - - -
3501 S2: a_2 = ..use(a_i).. - - -
3502 S3: a_1 = ..use(a_2).. - - -
3503 > VS6: va_new = .... - - -
3504 S4: a_0 = ..use(a_1).. true S6 VS6
3505 '---> S6: a_new = .... - S4 VS6
3506 > VS5: ... = ..vuse(va_new).. - - -
3507 S5: ... = ..use(a_0).. - - -
3509 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3510 elsewhere), and we'll end up with:
3513 VS5: ... = ..vuse(va_new)..
3515 In case of more than one pattern statements, e.g., widen-mult with
3519 S2 a_T = (TYPE) a_t;
3520 '--> S3: a_it = (interm_type) a_t;
3521 S4 prod_T = a_T * CONST;
3522 '--> S5: prod_T' = a_it w* CONST;
3524 there may be other users of a_T outside the pattern. In that case S2 will
3525 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3526 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3527 be recorded in S3. */
3530 vect_pattern_recog (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
3535 gimple_stmt_iterator si
;
3537 vect_recog_func_ptr vect_recog_func
;
3538 auto_vec
<gimple
, 1> stmts_to_replace
;
3541 if (dump_enabled_p ())
3542 dump_printf_loc (MSG_NOTE
, vect_location
,
3543 "=== vect_pattern_recog ===\n");
3547 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3548 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
3549 nbbs
= loop
->num_nodes
;
3553 bbs
= &BB_VINFO_BB (bb_vinfo
);
3557 /* Scan through the loop stmts, applying the pattern recognition
3558 functions starting at each stmt visited: */
3559 for (i
= 0; i
< nbbs
; i
++)
3561 basic_block bb
= bbs
[i
];
3562 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
3564 if (bb_vinfo
&& (stmt
= gsi_stmt (si
))
3565 && vinfo_for_stmt (stmt
)
3566 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
3569 /* Scan over all generic vect_recog_xxx_pattern functions. */
3570 for (j
= 0; j
< NUM_PATTERNS
; j
++)
3572 vect_recog_func
= vect_vect_recog_func_ptrs
[j
];
3573 vect_pattern_recog_1 (vect_recog_func
, si
,