1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
28 #include "double-int.h"
35 #include "fold-const.h"
36 #include "stor-layout.h"
39 #include "hard-reg-set.h"
41 #include "dominance.h"
42 #include "basic-block.h"
43 #include "gimple-pretty-print.h"
44 #include "tree-ssa-alias.h"
45 #include "internal-fn.h"
47 #include "gimple-expr.h"
51 #include "gimple-iterator.h"
52 #include "gimple-ssa.h"
53 #include "tree-phinodes.h"
54 #include "ssa-iterators.h"
55 #include "stringpool.h"
56 #include "tree-ssanames.h"
61 #include "statistics.h"
63 #include "fixed-value.h"
64 #include "insn-config.h"
73 #include "insn-codes.h"
76 #include "tree-data-ref.h"
77 #include "tree-vectorizer.h"
78 #include "recog.h" /* FIXME: for insn_data */
79 #include "diagnostic-core.h"
83 /* Pattern recognition functions */
84 static gimple
vect_recog_widen_sum_pattern (vec
<gimple
> *, tree
*,
86 static gimple
vect_recog_widen_mult_pattern (vec
<gimple
> *, tree
*,
88 static gimple
vect_recog_dot_prod_pattern (vec
<gimple
> *, tree
*,
90 static gimple
vect_recog_sad_pattern (vec
<gimple
> *, tree
*,
92 static gimple
vect_recog_pow_pattern (vec
<gimple
> *, tree
*, tree
*);
93 static gimple
vect_recog_over_widening_pattern (vec
<gimple
> *, tree
*,
95 static gimple
vect_recog_widen_shift_pattern (vec
<gimple
> *,
97 static gimple
vect_recog_rotate_pattern (vec
<gimple
> *, tree
*, tree
*);
98 static gimple
vect_recog_vector_vector_shift_pattern (vec
<gimple
> *,
100 static gimple
vect_recog_divmod_pattern (vec
<gimple
> *,
102 static gimple
vect_recog_mixed_size_cond_pattern (vec
<gimple
> *,
104 static gimple
vect_recog_bool_pattern (vec
<gimple
> *, tree
*, tree
*);
105 static vect_recog_func_ptr vect_vect_recog_func_ptrs
[NUM_PATTERNS
] = {
106 vect_recog_widen_mult_pattern
,
107 vect_recog_widen_sum_pattern
,
108 vect_recog_dot_prod_pattern
,
109 vect_recog_sad_pattern
,
110 vect_recog_pow_pattern
,
111 vect_recog_widen_shift_pattern
,
112 vect_recog_over_widening_pattern
,
113 vect_recog_rotate_pattern
,
114 vect_recog_vector_vector_shift_pattern
,
115 vect_recog_divmod_pattern
,
116 vect_recog_mixed_size_cond_pattern
,
117 vect_recog_bool_pattern
};
120 append_pattern_def_seq (stmt_vec_info stmt_info
, gimple stmt
)
122 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
127 new_pattern_def_seq (stmt_vec_info stmt_info
, gimple stmt
)
129 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
130 append_pattern_def_seq (stmt_info
, stmt
);
133 /* Check whether STMT2 is in the same loop or basic block as STMT1.
134 Which of the two applies depends on whether we're currently doing
135 loop-based or basic-block-based vectorization, as determined by
136 the vinfo_for_stmt for STMT1 (which must be defined).
138 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
139 to be defined as well. */
142 vect_same_loop_or_bb_p (gimple stmt1
, gimple stmt2
)
144 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt1
);
145 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
146 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
148 if (!gimple_bb (stmt2
))
153 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
154 if (!flow_bb_inside_loop_p (loop
, gimple_bb (stmt2
)))
159 if (gimple_bb (stmt2
) != BB_VINFO_BB (bb_vinfo
)
160 || gimple_code (stmt2
) == GIMPLE_PHI
)
164 gcc_assert (vinfo_for_stmt (stmt2
));
168 /* If the LHS of DEF_STMT has a single use, and that statement is
169 in the same loop or basic block, return it. */
172 vect_single_imm_use (gimple def_stmt
)
174 tree lhs
= gimple_assign_lhs (def_stmt
);
178 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
181 if (!vect_same_loop_or_bb_p (def_stmt
, use_stmt
))
187 /* Check whether NAME, an ssa-name used in USE_STMT,
188 is a result of a type promotion, such that:
189 DEF_STMT: NAME = NOP (name0)
190 If CHECK_SIGN is TRUE, check that either both types are signed or both are
194 type_conversion_p (tree name
, gimple use_stmt
, bool check_sign
,
195 tree
*orig_type
, gimple
*def_stmt
, bool *promotion
)
199 loop_vec_info loop_vinfo
;
200 stmt_vec_info stmt_vinfo
;
201 tree type
= TREE_TYPE (name
);
203 enum vect_def_type dt
;
205 bb_vec_info bb_vinfo
;
207 stmt_vinfo
= vinfo_for_stmt (use_stmt
);
208 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
209 bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
210 if (!vect_is_simple_use (name
, use_stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
214 if (dt
!= vect_internal_def
215 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
221 if (!is_gimple_assign (*def_stmt
))
224 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt
)))
227 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
229 *orig_type
= TREE_TYPE (oprnd0
);
230 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*orig_type
)
231 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*orig_type
)) && check_sign
))
234 if (TYPE_PRECISION (type
) >= (TYPE_PRECISION (*orig_type
) * 2))
239 if (!vect_is_simple_use (oprnd0
, *def_stmt
, loop_vinfo
,
240 bb_vinfo
, &dummy_gimple
, &dummy
, &dt
))
246 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
247 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
250 vect_recog_temp_ssa_var (tree type
, gimple stmt
)
252 return make_temp_ssa_name (type
, stmt
, "patt");
255 /* Function vect_recog_dot_prod_pattern
257 Try to find the following pattern:
263 sum_0 = phi <init, sum_1>
266 S3 x_T = (TYPE1) x_t;
267 S4 y_T = (TYPE1) y_t;
269 [S6 prod = (TYPE2) prod; #optional]
270 S7 sum_1 = prod + sum_0;
272 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
273 same size of 'TYPE1' or bigger. This is a special case of a reduction
278 * STMTS: Contains a stmt from which the pattern search begins. In the
279 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
284 * TYPE_IN: The type of the input arguments to the pattern.
286 * TYPE_OUT: The type of the output of this pattern.
288 * Return value: A new stmt that will be used to replace the sequence of
289 stmts that constitute the pattern. In this case it will be:
290 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
292 Note: The dot-prod idiom is a widening reduction pattern that is
293 vectorized without preserving all the intermediate results. It
294 produces only N/2 (widened) results (by summing up pairs of
295 intermediate results) rather than all N results. Therefore, we
296 cannot allow this pattern when we want to get all the results and in
297 the correct order (as is the case when this computation is in an
298 inner-loop nested in an outer-loop that us being vectorized). */
301 vect_recog_dot_prod_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
304 gimple stmt
, last_stmt
= (*stmts
)[0];
306 tree oprnd00
, oprnd01
;
307 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
308 tree type
, half_type
;
311 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
319 loop
= LOOP_VINFO_LOOP (loop_info
);
321 if (!is_gimple_assign (last_stmt
))
324 type
= gimple_expr_type (last_stmt
);
326 /* Look for the following pattern
330 DDPROD = (TYPE2) DPROD;
331 sum_1 = DDPROD + sum_0;
333 - DX is double the size of X
334 - DY is double the size of Y
335 - DX, DY, DPROD all have the same type
336 - sum is the same size of DPROD or bigger
337 - sum has been recognized as a reduction variable.
339 This is equivalent to:
340 DPROD = X w* Y; #widen mult
341 sum_1 = DPROD w+ sum_0; #widen summation
343 DPROD = X w* Y; #widen mult
344 sum_1 = DPROD + sum_0; #summation
347 /* Starting from LAST_STMT, follow the defs of its uses in search
348 of the above pattern. */
350 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
353 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
355 /* Has been detected as widening-summation? */
357 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
358 type
= gimple_expr_type (stmt
);
359 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
361 oprnd0
= gimple_assign_rhs1 (stmt
);
362 oprnd1
= gimple_assign_rhs2 (stmt
);
363 half_type
= TREE_TYPE (oprnd0
);
369 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
)
371 oprnd0
= gimple_assign_rhs1 (last_stmt
);
372 oprnd1
= gimple_assign_rhs2 (last_stmt
);
373 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
374 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
378 if (type_conversion_p (oprnd0
, stmt
, true, &half_type
, &def_stmt
,
383 oprnd0
= gimple_assign_rhs1 (stmt
);
389 /* So far so good. Since last_stmt was detected as a (summation) reduction,
390 we know that oprnd1 is the reduction variable (defined by a loop-header
391 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
392 Left to check that oprnd0 is defined by a (widen_)mult_expr */
393 if (TREE_CODE (oprnd0
) != SSA_NAME
)
396 prod_type
= half_type
;
397 stmt
= SSA_NAME_DEF_STMT (oprnd0
);
399 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
400 if (!gimple_bb (stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
403 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
404 inside the loop (in case we are analyzing an outer-loop). */
405 if (!is_gimple_assign (stmt
))
407 stmt_vinfo
= vinfo_for_stmt (stmt
);
408 gcc_assert (stmt_vinfo
);
409 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
411 if (gimple_assign_rhs_code (stmt
) != MULT_EXPR
)
413 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
415 /* Has been detected as a widening multiplication? */
417 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
418 if (gimple_assign_rhs_code (stmt
) != WIDEN_MULT_EXPR
)
420 stmt_vinfo
= vinfo_for_stmt (stmt
);
421 gcc_assert (stmt_vinfo
);
422 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_internal_def
);
423 oprnd00
= gimple_assign_rhs1 (stmt
);
424 oprnd01
= gimple_assign_rhs2 (stmt
);
425 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt
))
426 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
);
430 tree half_type0
, half_type1
;
434 oprnd0
= gimple_assign_rhs1 (stmt
);
435 oprnd1
= gimple_assign_rhs2 (stmt
);
436 if (!types_compatible_p (TREE_TYPE (oprnd0
), prod_type
)
437 || !types_compatible_p (TREE_TYPE (oprnd1
), prod_type
))
439 if (!type_conversion_p (oprnd0
, stmt
, true, &half_type0
, &def_stmt
,
443 oprnd00
= gimple_assign_rhs1 (def_stmt
);
444 if (!type_conversion_p (oprnd1
, stmt
, true, &half_type1
, &def_stmt
,
448 oprnd01
= gimple_assign_rhs1 (def_stmt
);
449 if (!types_compatible_p (half_type0
, half_type1
))
451 if (TYPE_PRECISION (prod_type
) != TYPE_PRECISION (half_type0
) * 2)
455 half_type
= TREE_TYPE (oprnd00
);
456 *type_in
= half_type
;
459 /* Pattern detected. Create a stmt to be used to replace the pattern: */
460 var
= vect_recog_temp_ssa_var (type
, NULL
);
461 pattern_stmt
= gimple_build_assign (var
, DOT_PROD_EXPR
,
462 oprnd00
, oprnd01
, oprnd1
);
464 if (dump_enabled_p ())
466 dump_printf_loc (MSG_NOTE
, vect_location
,
467 "vect_recog_dot_prod_pattern: detected: ");
468 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
469 dump_printf (MSG_NOTE
, "\n");
472 /* We don't allow changing the order of the computation in the inner-loop
473 when doing outer-loop vectorization. */
474 gcc_assert (!nested_in_vect_loop_p (loop
, last_stmt
));
480 /* Function vect_recog_sad_pattern
482 Try to find the following Sum of Absolute Difference (SAD) pattern:
485 signed TYPE1 diff, abs_diff;
488 sum_0 = phi <init, sum_1>
491 S3 x_T = (TYPE1) x_t;
492 S4 y_T = (TYPE1) y_t;
494 S6 abs_diff = ABS_EXPR <diff>;
495 [S7 abs_diff = (TYPE2) abs_diff; #optional]
496 S8 sum_1 = abs_diff + sum_0;
498 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
499 same size of 'TYPE1' or bigger. This is a special case of a reduction
504 * STMTS: Contains a stmt from which the pattern search begins. In the
505 example, when this function is called with S8, the pattern
506 {S3,S4,S5,S6,S7,S8} will be detected.
510 * TYPE_IN: The type of the input arguments to the pattern.
512 * TYPE_OUT: The type of the output of this pattern.
514 * Return value: A new stmt that will be used to replace the sequence of
515 stmts that constitute the pattern. In this case it will be:
516 SAD_EXPR <x_t, y_t, sum_0>
520 vect_recog_sad_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
523 gimple last_stmt
= (*stmts
)[0];
524 tree sad_oprnd0
, sad_oprnd1
;
525 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
527 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
534 loop
= LOOP_VINFO_LOOP (loop_info
);
536 if (!is_gimple_assign (last_stmt
))
539 tree sum_type
= gimple_expr_type (last_stmt
);
541 /* Look for the following pattern
545 DAD = ABS_EXPR <DDIFF>;
546 DDPROD = (TYPE2) DPROD;
549 - DX is at least double the size of X
550 - DY is at least double the size of Y
551 - DX, DY, DDIFF, DAD all have the same type
552 - sum is the same size of DAD or bigger
553 - sum has been recognized as a reduction variable.
555 This is equivalent to:
556 DDIFF = X w- Y; #widen sub
557 DAD = ABS_EXPR <DDIFF>;
558 sum_1 = DAD w+ sum_0; #widen summation
560 DDIFF = X w- Y; #widen sub
561 DAD = ABS_EXPR <DDIFF>;
562 sum_1 = DAD + sum_0; #summation
565 /* Starting from LAST_STMT, follow the defs of its uses in search
566 of the above pattern. */
568 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
571 tree plus_oprnd0
, plus_oprnd1
;
573 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
575 /* Has been detected as widening-summation? */
577 gimple stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
578 sum_type
= gimple_expr_type (stmt
);
579 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
581 plus_oprnd0
= gimple_assign_rhs1 (stmt
);
582 plus_oprnd1
= gimple_assign_rhs2 (stmt
);
583 half_type
= TREE_TYPE (plus_oprnd0
);
589 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
)
591 plus_oprnd0
= gimple_assign_rhs1 (last_stmt
);
592 plus_oprnd1
= gimple_assign_rhs2 (last_stmt
);
593 if (!types_compatible_p (TREE_TYPE (plus_oprnd0
), sum_type
)
594 || !types_compatible_p (TREE_TYPE (plus_oprnd1
), sum_type
))
597 /* The type conversion could be promotion, demotion,
598 or just signed -> unsigned. */
599 if (type_conversion_p (plus_oprnd0
, last_stmt
, false,
600 &half_type
, &def_stmt
, &promotion
))
601 plus_oprnd0
= gimple_assign_rhs1 (def_stmt
);
603 half_type
= sum_type
;
606 /* So far so good. Since last_stmt was detected as a (summation) reduction,
607 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
608 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
609 Then check that plus_oprnd0 is defined by an abs_expr. */
611 if (TREE_CODE (plus_oprnd0
) != SSA_NAME
)
614 tree abs_type
= half_type
;
615 gimple abs_stmt
= SSA_NAME_DEF_STMT (plus_oprnd0
);
617 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
618 if (!gimple_bb (abs_stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (abs_stmt
)))
621 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
622 inside the loop (in case we are analyzing an outer-loop). */
623 if (!is_gimple_assign (abs_stmt
))
626 stmt_vec_info abs_stmt_vinfo
= vinfo_for_stmt (abs_stmt
);
627 gcc_assert (abs_stmt_vinfo
);
628 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo
) != vect_internal_def
)
630 if (gimple_assign_rhs_code (abs_stmt
) != ABS_EXPR
)
633 tree abs_oprnd
= gimple_assign_rhs1 (abs_stmt
);
634 if (!types_compatible_p (TREE_TYPE (abs_oprnd
), abs_type
))
636 if (TYPE_UNSIGNED (abs_type
))
639 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
641 if (TREE_CODE (abs_oprnd
) != SSA_NAME
)
644 gimple diff_stmt
= SSA_NAME_DEF_STMT (abs_oprnd
);
646 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
647 if (!gimple_bb (diff_stmt
)
648 || !flow_bb_inside_loop_p (loop
, gimple_bb (diff_stmt
)))
651 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
652 inside the loop (in case we are analyzing an outer-loop). */
653 if (!is_gimple_assign (diff_stmt
))
656 stmt_vec_info diff_stmt_vinfo
= vinfo_for_stmt (diff_stmt
);
657 gcc_assert (diff_stmt_vinfo
);
658 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo
) != vect_internal_def
)
660 if (gimple_assign_rhs_code (diff_stmt
) != MINUS_EXPR
)
663 tree half_type0
, half_type1
;
666 tree minus_oprnd0
= gimple_assign_rhs1 (diff_stmt
);
667 tree minus_oprnd1
= gimple_assign_rhs2 (diff_stmt
);
669 if (!types_compatible_p (TREE_TYPE (minus_oprnd0
), abs_type
)
670 || !types_compatible_p (TREE_TYPE (minus_oprnd1
), abs_type
))
672 if (!type_conversion_p (minus_oprnd0
, diff_stmt
, false,
673 &half_type0
, &def_stmt
, &promotion
)
676 sad_oprnd0
= gimple_assign_rhs1 (def_stmt
);
678 if (!type_conversion_p (minus_oprnd1
, diff_stmt
, false,
679 &half_type1
, &def_stmt
, &promotion
)
682 sad_oprnd1
= gimple_assign_rhs1 (def_stmt
);
684 if (!types_compatible_p (half_type0
, half_type1
))
686 if (TYPE_PRECISION (abs_type
) < TYPE_PRECISION (half_type0
) * 2
687 || TYPE_PRECISION (sum_type
) < TYPE_PRECISION (half_type0
) * 2)
690 *type_in
= TREE_TYPE (sad_oprnd0
);
691 *type_out
= sum_type
;
693 /* Pattern detected. Create a stmt to be used to replace the pattern: */
694 tree var
= vect_recog_temp_ssa_var (sum_type
, NULL
);
695 gimple pattern_stmt
= gimple_build_assign (var
, SAD_EXPR
, sad_oprnd0
,
696 sad_oprnd1
, plus_oprnd1
);
698 if (dump_enabled_p ())
700 dump_printf_loc (MSG_NOTE
, vect_location
,
701 "vect_recog_sad_pattern: detected: ");
702 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
703 dump_printf (MSG_NOTE
, "\n");
706 /* We don't allow changing the order of the computation in the inner-loop
707 when doing outer-loop vectorization. */
708 gcc_assert (!nested_in_vect_loop_p (loop
, last_stmt
));
714 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
717 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
718 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
720 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
721 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
722 that satisfies the above restrictions, we can perform a widening opeartion
723 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
724 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
727 vect_handle_widen_op_by_const (gimple stmt
, enum tree_code code
,
728 tree const_oprnd
, tree
*oprnd
,
729 gimple
*wstmt
, tree type
,
730 tree
*half_type
, gimple def_stmt
)
732 tree new_type
, new_oprnd
;
734 if (code
!= MULT_EXPR
&& code
!= LSHIFT_EXPR
)
737 if (((code
== MULT_EXPR
&& int_fits_type_p (const_oprnd
, *half_type
))
738 || (code
== LSHIFT_EXPR
739 && compare_tree_int (const_oprnd
, TYPE_PRECISION (*half_type
))
741 && TYPE_PRECISION (type
) == (TYPE_PRECISION (*half_type
) * 2))
743 /* CONST_OPRND is a constant of HALF_TYPE. */
744 *oprnd
= gimple_assign_rhs1 (def_stmt
);
748 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (*half_type
) * 4))
751 if (!vect_same_loop_or_bb_p (stmt
, def_stmt
))
754 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
755 a type 2 times bigger than HALF_TYPE. */
756 new_type
= build_nonstandard_integer_type (TYPE_PRECISION (type
) / 2,
757 TYPE_UNSIGNED (type
));
758 if ((code
== MULT_EXPR
&& !int_fits_type_p (const_oprnd
, new_type
))
759 || (code
== LSHIFT_EXPR
760 && compare_tree_int (const_oprnd
, TYPE_PRECISION (new_type
)) == 1))
763 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
764 *oprnd
= gimple_assign_rhs1 (def_stmt
);
765 new_oprnd
= make_ssa_name (new_type
);
766 *wstmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, *oprnd
);
769 *half_type
= new_type
;
774 /* Function vect_recog_widen_mult_pattern
776 Try to find the following pattern:
780 TYPE a_T, b_T, prod_T;
786 S5 prod_T = a_T * b_T;
788 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
790 Also detect unsigned cases:
794 unsigned TYPE u_prod_T;
795 TYPE a_T, b_T, prod_T;
801 S5 prod_T = a_T * b_T;
802 S6 u_prod_T = (unsigned TYPE) prod_T;
804 and multiplication by constants:
811 S5 prod_T = a_T * CONST;
813 A special case of multiplication by constants is when 'TYPE' is 4 times
814 bigger than 'type', but CONST fits an intermediate type 2 times smaller
815 than 'TYPE'. In that case we create an additional pattern stmt for S3
816 to create a variable of the intermediate type, and perform widen-mult
817 on the intermediate type as well:
821 TYPE a_T, prod_T, prod_T';
825 '--> a_it = (interm_type) a_t;
826 S5 prod_T = a_T * CONST;
827 '--> prod_T' = a_it w* CONST;
831 * STMTS: Contains a stmt from which the pattern search begins. In the
832 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
833 is detected. In case of unsigned widen-mult, the original stmt (S5) is
834 replaced with S6 in STMTS. In case of multiplication by a constant
835 of an intermediate type (the last case above), STMTS also contains S3
836 (inserted before S5).
840 * TYPE_IN: The type of the input arguments to the pattern.
842 * TYPE_OUT: The type of the output of this pattern.
844 * Return value: A new stmt that will be used to replace the sequence of
845 stmts that constitute the pattern. In this case it will be:
846 WIDEN_MULT <a_t, b_t>
847 If the result of WIDEN_MULT needs to be converted to a larger type, the
848 returned stmt will be this type conversion stmt.
852 vect_recog_widen_mult_pattern (vec
<gimple
> *stmts
,
853 tree
*type_in
, tree
*type_out
)
855 gimple last_stmt
= stmts
->pop ();
856 gimple def_stmt0
, def_stmt1
;
858 tree type
, half_type0
, half_type1
;
859 gimple new_stmt
= NULL
, pattern_stmt
= NULL
;
860 tree vectype
, vecitype
;
862 enum tree_code dummy_code
;
868 if (!is_gimple_assign (last_stmt
))
871 type
= gimple_expr_type (last_stmt
);
873 /* Starting from LAST_STMT, follow the defs of its uses in search
874 of the above pattern. */
876 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
879 oprnd0
= gimple_assign_rhs1 (last_stmt
);
880 oprnd1
= gimple_assign_rhs2 (last_stmt
);
881 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
882 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
885 /* Check argument 0. */
886 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
890 /* Check argument 1. */
891 op1_ok
= type_conversion_p (oprnd1
, last_stmt
, false, &half_type1
,
892 &def_stmt1
, &promotion
);
894 if (op1_ok
&& promotion
)
896 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
897 oprnd1
= gimple_assign_rhs1 (def_stmt1
);
901 if (TREE_CODE (oprnd1
) == INTEGER_CST
902 && TREE_CODE (half_type0
) == INTEGER_TYPE
903 && vect_handle_widen_op_by_const (last_stmt
, MULT_EXPR
, oprnd1
,
904 &oprnd0
, &new_stmt
, type
,
905 &half_type0
, def_stmt0
))
907 half_type1
= half_type0
;
908 oprnd1
= fold_convert (half_type1
, oprnd1
);
914 /* If the two arguments have different sizes, convert the one with
915 the smaller type into the larger type. */
916 if (TYPE_PRECISION (half_type0
) != TYPE_PRECISION (half_type1
))
918 /* If we already used up the single-stmt slot give up. */
923 gimple def_stmt
= NULL
;
925 if (TYPE_PRECISION (half_type0
) < TYPE_PRECISION (half_type1
))
927 def_stmt
= def_stmt0
;
928 half_type0
= half_type1
;
933 def_stmt
= def_stmt1
;
934 half_type1
= half_type0
;
938 tree old_oprnd
= gimple_assign_rhs1 (def_stmt
);
939 tree new_oprnd
= make_ssa_name (half_type0
);
940 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, old_oprnd
);
944 /* Handle unsigned case. Look for
945 S6 u_prod_T = (unsigned TYPE) prod_T;
946 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
947 if (TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type0
))
953 if (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (half_type1
))
956 use_stmt
= vect_single_imm_use (last_stmt
);
957 if (!use_stmt
|| !is_gimple_assign (use_stmt
)
958 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
961 use_lhs
= gimple_assign_lhs (use_stmt
);
962 use_type
= TREE_TYPE (use_lhs
);
963 if (!INTEGRAL_TYPE_P (use_type
)
964 || (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (use_type
))
965 || (TYPE_PRECISION (type
) != TYPE_PRECISION (use_type
)))
969 last_stmt
= use_stmt
;
972 if (!types_compatible_p (half_type0
, half_type1
))
975 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
976 to get an intermediate result of type ITYPE. In this case we need
977 to build a statement to convert this intermediate result to type TYPE. */
979 if (TYPE_PRECISION (type
) > TYPE_PRECISION (half_type0
) * 2)
980 itype
= build_nonstandard_integer_type
981 (GET_MODE_BITSIZE (TYPE_MODE (half_type0
)) * 2,
982 TYPE_UNSIGNED (type
));
984 /* Pattern detected. */
985 if (dump_enabled_p ())
986 dump_printf_loc (MSG_NOTE
, vect_location
,
987 "vect_recog_widen_mult_pattern: detected:\n");
989 /* Check target support */
990 vectype
= get_vectype_for_scalar_type (half_type0
);
991 vecitype
= get_vectype_for_scalar_type (itype
);
994 || !supportable_widening_operation (WIDEN_MULT_EXPR
, last_stmt
,
996 &dummy_code
, &dummy_code
,
997 &dummy_int
, &dummy_vec
))
1001 *type_out
= get_vectype_for_scalar_type (type
);
1003 /* Pattern supported. Create a stmt to be used to replace the pattern: */
1004 var
= vect_recog_temp_ssa_var (itype
, NULL
);
1005 pattern_stmt
= gimple_build_assign (var
, WIDEN_MULT_EXPR
, oprnd0
, oprnd1
);
1007 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1008 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1009 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1010 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
1012 /* If the original two operands have different sizes, we may need to convert
1013 the smaller one into the larget type. If this is the case, at this point
1014 the new stmt is already built. */
1017 append_pattern_def_seq (stmt_vinfo
, new_stmt
);
1018 stmt_vec_info new_stmt_info
1019 = new_stmt_vec_info (new_stmt
, loop_vinfo
, bb_vinfo
);
1020 set_vinfo_for_stmt (new_stmt
, new_stmt_info
);
1021 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
1024 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
1025 the result of the widen-mult operation into type TYPE. */
1028 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
1029 stmt_vec_info pattern_stmt_info
1030 = new_stmt_vec_info (pattern_stmt
, loop_vinfo
, bb_vinfo
);
1031 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
1032 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vecitype
;
1033 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
1035 gimple_assign_lhs (pattern_stmt
));
1038 if (dump_enabled_p ())
1039 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1041 stmts
->safe_push (last_stmt
);
1042 return pattern_stmt
;
1046 /* Function vect_recog_pow_pattern
1048 Try to find the following pattern:
1052 with POW being one of pow, powf, powi, powif and N being
1057 * LAST_STMT: A stmt from which the pattern search begins.
1061 * TYPE_IN: The type of the input arguments to the pattern.
1063 * TYPE_OUT: The type of the output of this pattern.
1065 * Return value: A new stmt that will be used to replace the sequence of
1066 stmts that constitute the pattern. In this case it will be:
1073 vect_recog_pow_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
1076 gimple last_stmt
= (*stmts
)[0];
1077 tree fn
, base
, exp
= NULL
;
1081 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
1084 fn
= gimple_call_fndecl (last_stmt
);
1085 if (fn
== NULL_TREE
|| DECL_BUILT_IN_CLASS (fn
) != BUILT_IN_NORMAL
)
1088 switch (DECL_FUNCTION_CODE (fn
))
1090 case BUILT_IN_POWIF
:
1094 base
= gimple_call_arg (last_stmt
, 0);
1095 exp
= gimple_call_arg (last_stmt
, 1);
1096 if (TREE_CODE (exp
) != REAL_CST
1097 && TREE_CODE (exp
) != INTEGER_CST
)
1105 /* We now have a pow or powi builtin function call with a constant
1108 *type_out
= NULL_TREE
;
1110 /* Catch squaring. */
1111 if ((tree_fits_shwi_p (exp
)
1112 && tree_to_shwi (exp
) == 2)
1113 || (TREE_CODE (exp
) == REAL_CST
1114 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconst2
)))
1116 *type_in
= TREE_TYPE (base
);
1118 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1119 stmt
= gimple_build_assign (var
, MULT_EXPR
, base
, base
);
1123 /* Catch square root. */
1124 if (TREE_CODE (exp
) == REAL_CST
1125 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconsthalf
))
1127 tree newfn
= mathfn_built_in (TREE_TYPE (base
), BUILT_IN_SQRT
);
1128 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (base
));
1131 gcall
*stmt
= gimple_build_call (newfn
, 1, base
);
1132 if (vectorizable_function (stmt
, *type_in
, *type_in
)
1135 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
1136 gimple_call_set_lhs (stmt
, var
);
1146 /* Function vect_recog_widen_sum_pattern
1148 Try to find the following pattern:
1151 TYPE x_T, sum = init;
1153 sum_0 = phi <init, sum_1>
1155 S2 x_T = (TYPE) x_t;
1156 S3 sum_1 = x_T + sum_0;
1158 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1159 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1160 a special case of a reduction computation.
1164 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1165 when this function is called with S3, the pattern {S2,S3} will be detected.
1169 * TYPE_IN: The type of the input arguments to the pattern.
1171 * TYPE_OUT: The type of the output of this pattern.
1173 * Return value: A new stmt that will be used to replace the sequence of
1174 stmts that constitute the pattern. In this case it will be:
1175 WIDEN_SUM <x_t, sum_0>
1177 Note: The widening-sum idiom is a widening reduction pattern that is
1178 vectorized without preserving all the intermediate results. It
1179 produces only N/2 (widened) results (by summing up pairs of
1180 intermediate results) rather than all N results. Therefore, we
1181 cannot allow this pattern when we want to get all the results and in
1182 the correct order (as is the case when this computation is in an
1183 inner-loop nested in an outer-loop that us being vectorized). */
1186 vect_recog_widen_sum_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
1189 gimple stmt
, last_stmt
= (*stmts
)[0];
1190 tree oprnd0
, oprnd1
;
1191 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1192 tree type
, half_type
;
1193 gimple pattern_stmt
;
1194 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1202 loop
= LOOP_VINFO_LOOP (loop_info
);
1204 if (!is_gimple_assign (last_stmt
))
1207 type
= gimple_expr_type (last_stmt
);
1209 /* Look for the following pattern
1212 In which DX is at least double the size of X, and sum_1 has been
1213 recognized as a reduction variable.
1216 /* Starting from LAST_STMT, follow the defs of its uses in search
1217 of the above pattern. */
1219 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
1222 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
)
1225 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1226 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1227 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
1228 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
1231 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1232 we know that oprnd1 is the reduction variable (defined by a loop-header
1233 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1234 Left to check that oprnd0 is defined by a cast from type 'type' to type
1237 if (!type_conversion_p (oprnd0
, last_stmt
, true, &half_type
, &stmt
,
1242 oprnd0
= gimple_assign_rhs1 (stmt
);
1243 *type_in
= half_type
;
1246 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1247 var
= vect_recog_temp_ssa_var (type
, NULL
);
1248 pattern_stmt
= gimple_build_assign (var
, WIDEN_SUM_EXPR
, oprnd0
, oprnd1
);
1250 if (dump_enabled_p ())
1252 dump_printf_loc (MSG_NOTE
, vect_location
,
1253 "vect_recog_widen_sum_pattern: detected: ");
1254 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1255 dump_printf (MSG_NOTE
, "\n");
1258 /* We don't allow changing the order of the computation in the inner-loop
1259 when doing outer-loop vectorization. */
1260 gcc_assert (!nested_in_vect_loop_p (loop
, last_stmt
));
1262 return pattern_stmt
;
1266 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1269 STMT - a statement to check.
1270 DEF - we support operations with two operands, one of which is constant.
1271 The other operand can be defined by a demotion operation, or by a
1272 previous statement in a sequence of over-promoted operations. In the
1273 later case DEF is used to replace that operand. (It is defined by a
1274 pattern statement we created for the previous statement in the
1278 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1279 NULL, it's the type of DEF.
1280 STMTS - additional pattern statements. If a pattern statement (type
1281 conversion) is created in this function, its original statement is
1285 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1286 operands to use in the new pattern statement for STMT (will be created
1287 in vect_recog_over_widening_pattern ()).
1288 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1289 statements for STMT: the first one is a type promotion and the second
1290 one is the operation itself. We return the type promotion statement
1291 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1292 the second pattern statement. */
1295 vect_operation_fits_smaller_type (gimple stmt
, tree def
, tree
*new_type
,
1296 tree
*op0
, tree
*op1
, gimple
*new_def_stmt
,
1299 enum tree_code code
;
1300 tree const_oprnd
, oprnd
;
1301 tree interm_type
= NULL_TREE
, half_type
, new_oprnd
, type
;
1302 gimple def_stmt
, new_stmt
;
1308 *new_def_stmt
= NULL
;
1310 if (!is_gimple_assign (stmt
))
1313 code
= gimple_assign_rhs_code (stmt
);
1314 if (code
!= LSHIFT_EXPR
&& code
!= RSHIFT_EXPR
1315 && code
!= BIT_IOR_EXPR
&& code
!= BIT_XOR_EXPR
&& code
!= BIT_AND_EXPR
)
1318 oprnd
= gimple_assign_rhs1 (stmt
);
1319 const_oprnd
= gimple_assign_rhs2 (stmt
);
1320 type
= gimple_expr_type (stmt
);
1322 if (TREE_CODE (oprnd
) != SSA_NAME
1323 || TREE_CODE (const_oprnd
) != INTEGER_CST
)
1326 /* If oprnd has other uses besides that in stmt we cannot mark it
1327 as being part of a pattern only. */
1328 if (!has_single_use (oprnd
))
1331 /* If we are in the middle of a sequence, we use DEF from a previous
1332 statement. Otherwise, OPRND has to be a result of type promotion. */
1335 half_type
= *new_type
;
1341 if (!type_conversion_p (oprnd
, stmt
, false, &half_type
, &def_stmt
,
1344 || !vect_same_loop_or_bb_p (stmt
, def_stmt
))
1348 /* Can we perform the operation on a smaller type? */
1354 if (!int_fits_type_p (const_oprnd
, half_type
))
1356 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1357 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1360 interm_type
= build_nonstandard_integer_type (
1361 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1362 if (!int_fits_type_p (const_oprnd
, interm_type
))
1369 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1370 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1373 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1374 (e.g., if the original value was char, the shift amount is at most 8
1375 if we want to use short). */
1376 if (compare_tree_int (const_oprnd
, TYPE_PRECISION (half_type
)) == 1)
1379 interm_type
= build_nonstandard_integer_type (
1380 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1382 if (!vect_supportable_shift (code
, interm_type
))
1388 if (vect_supportable_shift (code
, half_type
))
1391 /* Try intermediate type - HALF_TYPE is not supported. */
1392 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1395 interm_type
= build_nonstandard_integer_type (
1396 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1398 if (!vect_supportable_shift (code
, interm_type
))
1407 /* There are four possible cases:
1408 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1409 the first statement in the sequence)
1410 a. The original, HALF_TYPE, is not enough - we replace the promotion
1411 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1412 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1414 2. OPRND is defined by a pattern statement we created.
1415 a. Its type is not sufficient for the operation, we create a new stmt:
1416 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1417 this statement in NEW_DEF_STMT, and it is later put in
1418 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1419 b. OPRND is good to use in the new statement. */
1424 /* Replace the original type conversion HALF_TYPE->TYPE with
1425 HALF_TYPE->INTERM_TYPE. */
1426 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
1428 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
1429 /* Check if the already created pattern stmt is what we need. */
1430 if (!is_gimple_assign (new_stmt
)
1431 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt
))
1432 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != interm_type
)
1435 stmts
->safe_push (def_stmt
);
1436 oprnd
= gimple_assign_lhs (new_stmt
);
1440 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1441 oprnd
= gimple_assign_rhs1 (def_stmt
);
1442 new_oprnd
= make_ssa_name (interm_type
);
1443 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, oprnd
);
1444 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
1445 stmts
->safe_push (def_stmt
);
1451 /* Retrieve the operand before the type promotion. */
1452 oprnd
= gimple_assign_rhs1 (def_stmt
);
1459 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1460 new_oprnd
= make_ssa_name (interm_type
);
1461 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, oprnd
);
1463 *new_def_stmt
= new_stmt
;
1466 /* Otherwise, OPRND is already set. */
1470 *new_type
= interm_type
;
1472 *new_type
= half_type
;
1475 *op1
= fold_convert (*new_type
, const_oprnd
);
1481 /* Try to find a statement or a sequence of statements that can be performed
1485 TYPE x_T, res0_T, res1_T;
1488 S2 x_T = (TYPE) x_t;
1489 S3 res0_T = op (x_T, C0);
1490 S4 res1_T = op (res0_T, C1);
1491 S5 ... = () res1_T; - type demotion
1493 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1495 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1496 be 'type' or some intermediate type. For now, we expect S5 to be a type
1497 demotion operation. We also check that S3 and S4 have only one use. */
1500 vect_recog_over_widening_pattern (vec
<gimple
> *stmts
,
1501 tree
*type_in
, tree
*type_out
)
1503 gimple stmt
= stmts
->pop ();
1504 gimple pattern_stmt
= NULL
, new_def_stmt
, prev_stmt
= NULL
, use_stmt
= NULL
;
1505 tree op0
, op1
, vectype
= NULL_TREE
, use_lhs
, use_type
;
1506 tree var
= NULL_TREE
, new_type
= NULL_TREE
, new_oprnd
;
1513 if (!vinfo_for_stmt (stmt
)
1514 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt
)))
1517 new_def_stmt
= NULL
;
1518 if (!vect_operation_fits_smaller_type (stmt
, var
, &new_type
,
1519 &op0
, &op1
, &new_def_stmt
,
1528 /* STMT can be performed on a smaller type. Check its uses. */
1529 use_stmt
= vect_single_imm_use (stmt
);
1530 if (!use_stmt
|| !is_gimple_assign (use_stmt
))
1533 /* Create pattern statement for STMT. */
1534 vectype
= get_vectype_for_scalar_type (new_type
);
1538 /* We want to collect all the statements for which we create pattern
1539 statetments, except for the case when the last statement in the
1540 sequence doesn't have a corresponding pattern statement. In such
1541 case we associate the last pattern statement with the last statement
1542 in the sequence. Therefore, we only add the original statement to
1543 the list if we know that it is not the last. */
1545 stmts
->safe_push (prev_stmt
);
1547 var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1549 = gimple_build_assign (var
, gimple_assign_rhs_code (stmt
), op0
, op1
);
1550 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
1551 new_pattern_def_seq (vinfo_for_stmt (stmt
), new_def_stmt
);
1553 if (dump_enabled_p ())
1555 dump_printf_loc (MSG_NOTE
, vect_location
,
1556 "created pattern stmt: ");
1557 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1558 dump_printf (MSG_NOTE
, "\n");
1561 type
= gimple_expr_type (stmt
);
1568 /* We got a sequence. We expect it to end with a type demotion operation.
1569 Otherwise, we quit (for now). There are three possible cases: the
1570 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1571 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1572 NEW_TYPE differs (we create a new conversion statement). */
1573 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
1575 use_lhs
= gimple_assign_lhs (use_stmt
);
1576 use_type
= TREE_TYPE (use_lhs
);
1577 /* Support only type demotion or signedess change. */
1578 if (!INTEGRAL_TYPE_P (use_type
)
1579 || TYPE_PRECISION (type
) <= TYPE_PRECISION (use_type
))
1582 /* Check that NEW_TYPE is not bigger than the conversion result. */
1583 if (TYPE_PRECISION (new_type
) > TYPE_PRECISION (use_type
))
1586 if (TYPE_UNSIGNED (new_type
) != TYPE_UNSIGNED (use_type
)
1587 || TYPE_PRECISION (new_type
) != TYPE_PRECISION (use_type
))
1589 /* Create NEW_TYPE->USE_TYPE conversion. */
1590 new_oprnd
= make_ssa_name (use_type
);
1591 pattern_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, var
);
1592 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt
)) = pattern_stmt
;
1594 *type_in
= get_vectype_for_scalar_type (new_type
);
1595 *type_out
= get_vectype_for_scalar_type (use_type
);
1597 /* We created a pattern statement for the last statement in the
1598 sequence, so we don't need to associate it with the pattern
1599 statement created for PREV_STMT. Therefore, we add PREV_STMT
1600 to the list in order to mark it later in vect_pattern_recog_1. */
1602 stmts
->safe_push (prev_stmt
);
1607 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt
))
1608 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt
));
1611 *type_out
= NULL_TREE
;
1614 stmts
->safe_push (use_stmt
);
1617 /* TODO: support general case, create a conversion to the correct type. */
1620 /* Pattern detected. */
1621 if (dump_enabled_p ())
1623 dump_printf_loc (MSG_NOTE
, vect_location
,
1624 "vect_recog_over_widening_pattern: detected: ");
1625 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1626 dump_printf (MSG_NOTE
, "\n");
1629 return pattern_stmt
;
1632 /* Detect widening shift pattern:
1638 S2 a_T = (TYPE) a_t;
1639 S3 res_T = a_T << CONST;
1641 where type 'TYPE' is at least double the size of type 'type'.
1643 Also detect cases where the shift result is immediately converted
1644 to another type 'result_type' that is no larger in size than 'TYPE'.
1645 In those cases we perform a widen-shift that directly results in
1646 'result_type', to avoid a possible over-widening situation:
1650 result_type res_result;
1653 S2 a_T = (TYPE) a_t;
1654 S3 res_T = a_T << CONST;
1655 S4 res_result = (result_type) res_T;
1656 '--> res_result' = a_t w<< CONST;
1658 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1659 create an additional pattern stmt for S2 to create a variable of an
1660 intermediate type, and perform widen-shift on the intermediate type:
1664 TYPE a_T, res_T, res_T';
1667 S2 a_T = (TYPE) a_t;
1668 '--> a_it = (interm_type) a_t;
1669 S3 res_T = a_T << CONST;
1670 '--> res_T' = a_it <<* CONST;
1674 * STMTS: Contains a stmt from which the pattern search begins.
1675 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1676 in STMTS. When an intermediate type is used and a pattern statement is
1677 created for S2, we also put S2 here (before S3).
1681 * TYPE_IN: The type of the input arguments to the pattern.
1683 * TYPE_OUT: The type of the output of this pattern.
1685 * Return value: A new stmt that will be used to replace the sequence of
1686 stmts that constitute the pattern. In this case it will be:
1687 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1690 vect_recog_widen_shift_pattern (vec
<gimple
> *stmts
,
1691 tree
*type_in
, tree
*type_out
)
1693 gimple last_stmt
= stmts
->pop ();
1695 tree oprnd0
, oprnd1
;
1696 tree type
, half_type0
;
1697 gimple pattern_stmt
;
1698 tree vectype
, vectype_out
= NULL_TREE
;
1700 enum tree_code dummy_code
;
1702 vec
<tree
> dummy_vec
;
1706 if (!is_gimple_assign (last_stmt
) || !vinfo_for_stmt (last_stmt
))
1709 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt
)))
1712 if (gimple_assign_rhs_code (last_stmt
) != LSHIFT_EXPR
)
1715 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1716 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1717 if (TREE_CODE (oprnd0
) != SSA_NAME
|| TREE_CODE (oprnd1
) != INTEGER_CST
)
1720 /* Check operand 0: it has to be defined by a type promotion. */
1721 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
1726 /* Check operand 1: has to be positive. We check that it fits the type
1727 in vect_handle_widen_op_by_const (). */
1728 if (tree_int_cst_compare (oprnd1
, size_zero_node
) <= 0)
1731 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
1732 type
= gimple_expr_type (last_stmt
);
1734 /* Check for subsequent conversion to another type. */
1735 use_stmt
= vect_single_imm_use (last_stmt
);
1736 if (use_stmt
&& is_gimple_assign (use_stmt
)
1737 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
))
1738 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
1740 tree use_lhs
= gimple_assign_lhs (use_stmt
);
1741 tree use_type
= TREE_TYPE (use_lhs
);
1743 if (INTEGRAL_TYPE_P (use_type
)
1744 && TYPE_PRECISION (use_type
) <= TYPE_PRECISION (type
))
1746 last_stmt
= use_stmt
;
1751 /* Check if this a widening operation. */
1752 gimple wstmt
= NULL
;
1753 if (!vect_handle_widen_op_by_const (last_stmt
, LSHIFT_EXPR
, oprnd1
,
1755 type
, &half_type0
, def_stmt0
))
1758 /* Pattern detected. */
1759 if (dump_enabled_p ())
1760 dump_printf_loc (MSG_NOTE
, vect_location
,
1761 "vect_recog_widen_shift_pattern: detected:\n");
1763 /* Check target support. */
1764 vectype
= get_vectype_for_scalar_type (half_type0
);
1765 vectype_out
= get_vectype_for_scalar_type (type
);
1769 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR
, last_stmt
,
1770 vectype_out
, vectype
,
1771 &dummy_code
, &dummy_code
,
1772 &dummy_int
, &dummy_vec
))
1776 *type_out
= vectype_out
;
1778 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1779 var
= vect_recog_temp_ssa_var (type
, NULL
);
1781 gimple_build_assign (var
, WIDEN_LSHIFT_EXPR
, oprnd0
, oprnd1
);
1784 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1785 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1786 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1787 new_pattern_def_seq (stmt_vinfo
, wstmt
);
1788 stmt_vec_info new_stmt_info
1789 = new_stmt_vec_info (wstmt
, loop_vinfo
, bb_vinfo
);
1790 set_vinfo_for_stmt (wstmt
, new_stmt_info
);
1791 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
1794 if (dump_enabled_p ())
1795 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1797 stmts
->safe_push (last_stmt
);
1798 return pattern_stmt
;
1801 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1805 S0 a_t = b_t r<< c_t;
1809 * STMTS: Contains a stmt from which the pattern search begins,
1810 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1814 S2 e_t = d_t & (B - 1);
1815 S3 f_t = b_t << c_t;
1816 S4 g_t = b_t >> e_t;
1819 where B is element bitsize of type.
1823 * TYPE_IN: The type of the input arguments to the pattern.
1825 * TYPE_OUT: The type of the output of this pattern.
1827 * Return value: A new stmt that will be used to replace the rotate
1831 vect_recog_rotate_pattern (vec
<gimple
> *stmts
, tree
*type_in
, tree
*type_out
)
1833 gimple last_stmt
= stmts
->pop ();
1834 tree oprnd0
, oprnd1
, lhs
, var
, var1
, var2
, vectype
, type
, stype
, def
, def2
;
1835 gimple pattern_stmt
, def_stmt
;
1836 enum tree_code rhs_code
;
1837 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1838 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1839 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1840 enum vect_def_type dt
;
1841 optab optab1
, optab2
;
1842 edge ext_def
= NULL
;
1844 if (!is_gimple_assign (last_stmt
))
1847 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1857 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
1860 lhs
= gimple_assign_lhs (last_stmt
);
1861 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1862 type
= TREE_TYPE (oprnd0
);
1863 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1864 if (TREE_CODE (oprnd0
) != SSA_NAME
1865 || TYPE_PRECISION (TREE_TYPE (lhs
)) != TYPE_PRECISION (type
)
1866 || !INTEGRAL_TYPE_P (type
)
1867 || !TYPE_UNSIGNED (type
))
1870 if (!vect_is_simple_use (oprnd1
, last_stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
1874 if (dt
!= vect_internal_def
1875 && dt
!= vect_constant_def
1876 && dt
!= vect_external_def
)
1879 vectype
= get_vectype_for_scalar_type (type
);
1880 if (vectype
== NULL_TREE
)
1883 /* If vector/vector or vector/scalar rotate is supported by the target,
1884 don't do anything here. */
1885 optab1
= optab_for_tree_code (rhs_code
, vectype
, optab_vector
);
1887 && optab_handler (optab1
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1890 if (bb_vinfo
!= NULL
|| dt
!= vect_internal_def
)
1892 optab2
= optab_for_tree_code (rhs_code
, vectype
, optab_scalar
);
1894 && optab_handler (optab2
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1898 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1899 don't do anything here either. */
1900 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_vector
);
1901 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_vector
);
1903 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1905 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1907 if (bb_vinfo
== NULL
&& dt
== vect_internal_def
)
1909 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_scalar
);
1910 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_scalar
);
1912 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1914 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1919 *type_out
= vectype
;
1920 if (*type_in
== NULL_TREE
)
1923 if (dt
== vect_external_def
1924 && TREE_CODE (oprnd1
) == SSA_NAME
1927 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1928 ext_def
= loop_preheader_edge (loop
);
1929 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1
))
1931 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (oprnd1
));
1933 || !dominated_by_p (CDI_DOMINATORS
, ext_def
->dest
, bb
))
1939 if (TREE_CODE (oprnd1
) == INTEGER_CST
1940 || TYPE_MODE (TREE_TYPE (oprnd1
)) == TYPE_MODE (type
))
1942 else if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
1944 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
1945 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (type
)
1946 && TYPE_PRECISION (TREE_TYPE (rhs1
))
1947 == TYPE_PRECISION (type
))
1951 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
1952 if (def
== NULL_TREE
)
1954 def
= vect_recog_temp_ssa_var (type
, NULL
);
1955 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
1959 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1960 gcc_assert (!new_bb
);
1963 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1965 stype
= TREE_TYPE (def
);
1967 if (TREE_CODE (def
) == INTEGER_CST
)
1969 if (!tree_fits_uhwi_p (def
)
1970 || tree_to_uhwi (def
) >= GET_MODE_PRECISION (TYPE_MODE (type
))
1971 || integer_zerop (def
))
1973 def2
= build_int_cst (stype
,
1974 GET_MODE_PRECISION (TYPE_MODE (type
))
1975 - tree_to_uhwi (def
));
1979 tree vecstype
= get_vectype_for_scalar_type (stype
);
1980 stmt_vec_info def_stmt_vinfo
;
1982 if (vecstype
== NULL_TREE
)
1984 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1985 def_stmt
= gimple_build_assign (def2
, NEGATE_EXPR
, def
);
1989 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1990 gcc_assert (!new_bb
);
1994 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
1995 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1996 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
1997 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2000 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
2002 = build_int_cst (stype
, GET_MODE_PRECISION (TYPE_MODE (stype
)) - 1);
2003 def_stmt
= gimple_build_assign (def2
, BIT_AND_EXPR
,
2004 gimple_assign_lhs (def_stmt
), mask
);
2008 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
2009 gcc_assert (!new_bb
);
2013 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2014 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2015 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
2016 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2020 var1
= vect_recog_temp_ssa_var (type
, NULL
);
2021 def_stmt
= gimple_build_assign (var1
, rhs_code
== LROTATE_EXPR
2022 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
2024 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2026 var2
= vect_recog_temp_ssa_var (type
, NULL
);
2027 def_stmt
= gimple_build_assign (var2
, rhs_code
== LROTATE_EXPR
2028 ? RSHIFT_EXPR
: LSHIFT_EXPR
,
2030 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2032 /* Pattern detected. */
2033 if (dump_enabled_p ())
2034 dump_printf_loc (MSG_NOTE
, vect_location
,
2035 "vect_recog_rotate_pattern: detected:\n");
2037 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2038 var
= vect_recog_temp_ssa_var (type
, NULL
);
2039 pattern_stmt
= gimple_build_assign (var
, BIT_IOR_EXPR
, var1
, var2
);
2041 if (dump_enabled_p ())
2042 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
2044 stmts
->safe_push (last_stmt
);
2045 return pattern_stmt
;
2048 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2056 S3 res_T = b_T op a_t;
2058 where type 'TYPE' is a type with different size than 'type',
2059 and op is <<, >> or rotate.
2064 TYPE b_T, c_T, res_T;
2067 S1 a_t = (type) c_T;
2069 S3 res_T = b_T op a_t;
2073 * STMTS: Contains a stmt from which the pattern search begins,
2074 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2075 with a shift/rotate which has same type on both operands, in the
2076 second case just b_T op c_T, in the first case with added cast
2077 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2081 * TYPE_IN: The type of the input arguments to the pattern.
2083 * TYPE_OUT: The type of the output of this pattern.
2085 * Return value: A new stmt that will be used to replace the shift/rotate
2089 vect_recog_vector_vector_shift_pattern (vec
<gimple
> *stmts
,
2090 tree
*type_in
, tree
*type_out
)
2092 gimple last_stmt
= stmts
->pop ();
2093 tree oprnd0
, oprnd1
, lhs
, var
;
2094 gimple pattern_stmt
, def_stmt
;
2095 enum tree_code rhs_code
;
2096 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2097 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2098 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2099 enum vect_def_type dt
;
2102 if (!is_gimple_assign (last_stmt
))
2105 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2117 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2120 lhs
= gimple_assign_lhs (last_stmt
);
2121 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2122 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2123 if (TREE_CODE (oprnd0
) != SSA_NAME
2124 || TREE_CODE (oprnd1
) != SSA_NAME
2125 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
2126 || TYPE_PRECISION (TREE_TYPE (oprnd1
))
2127 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1
)))
2128 || TYPE_PRECISION (TREE_TYPE (lhs
))
2129 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2132 if (!vect_is_simple_use (oprnd1
, last_stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
2136 if (dt
!= vect_internal_def
)
2139 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (oprnd0
));
2140 *type_out
= *type_in
;
2141 if (*type_in
== NULL_TREE
)
2145 if (gimple_assign_cast_p (def_stmt
))
2147 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
2148 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
2149 && TYPE_PRECISION (TREE_TYPE (rhs1
))
2150 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2154 if (def
== NULL_TREE
)
2156 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2157 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
2158 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2161 /* Pattern detected. */
2162 if (dump_enabled_p ())
2163 dump_printf_loc (MSG_NOTE
, vect_location
,
2164 "vect_recog_vector_vector_shift_pattern: detected:\n");
2166 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2167 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2168 pattern_stmt
= gimple_build_assign (var
, rhs_code
, oprnd0
, def
);
2170 if (dump_enabled_p ())
2171 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
2173 stmts
->safe_push (last_stmt
);
2174 return pattern_stmt
;
2177 /* Detect a signed division by a constant that wouldn't be
2178 otherwise vectorized:
2184 where type 'type' is an integral type and N is a constant.
2186 Similarly handle modulo by a constant:
2192 * STMTS: Contains a stmt from which the pattern search begins,
2193 i.e. the division stmt. S1 is replaced by if N is a power
2194 of two constant and type is signed:
2195 S3 y_t = b_t < 0 ? N - 1 : 0;
2197 S1' a_t = x_t >> log2 (N);
2199 S4 is replaced if N is a power of two constant and
2200 type is signed by (where *_T temporaries have unsigned type):
2201 S9 y_T = b_t < 0 ? -1U : 0U;
2202 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2203 S7 z_t = (type) z_T;
2205 S5 x_t = w_t & (N - 1);
2206 S4' a_t = x_t - z_t;
2210 * TYPE_IN: The type of the input arguments to the pattern.
2212 * TYPE_OUT: The type of the output of this pattern.
2214 * Return value: A new stmt that will be used to replace the division
2215 S1 or modulo S4 stmt. */
2218 vect_recog_divmod_pattern (vec
<gimple
> *stmts
,
2219 tree
*type_in
, tree
*type_out
)
2221 gimple last_stmt
= stmts
->pop ();
2222 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
2223 gimple pattern_stmt
, def_stmt
;
2224 enum tree_code rhs_code
;
2225 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2226 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2227 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2230 int dummy_int
, prec
;
2231 stmt_vec_info def_stmt_vinfo
;
2233 if (!is_gimple_assign (last_stmt
))
2236 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2239 case TRUNC_DIV_EXPR
:
2240 case TRUNC_MOD_EXPR
:
2246 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2249 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2250 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2251 itype
= TREE_TYPE (oprnd0
);
2252 if (TREE_CODE (oprnd0
) != SSA_NAME
2253 || TREE_CODE (oprnd1
) != INTEGER_CST
2254 || TREE_CODE (itype
) != INTEGER_TYPE
2255 || TYPE_PRECISION (itype
) != GET_MODE_PRECISION (TYPE_MODE (itype
)))
2258 vectype
= get_vectype_for_scalar_type (itype
);
2259 if (vectype
== NULL_TREE
)
2262 /* If the target can handle vectorized division or modulo natively,
2263 don't attempt to optimize this. */
2264 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
2265 if (optab
!= unknown_optab
)
2267 machine_mode vec_mode
= TYPE_MODE (vectype
);
2268 int icode
= (int) optab_handler (optab
, vec_mode
);
2269 if (icode
!= CODE_FOR_nothing
)
2273 prec
= TYPE_PRECISION (itype
);
2274 if (integer_pow2p (oprnd1
))
2276 if (TYPE_UNSIGNED (itype
) || tree_int_cst_sgn (oprnd1
) != 1)
2279 /* Pattern detected. */
2280 if (dump_enabled_p ())
2281 dump_printf_loc (MSG_NOTE
, vect_location
,
2282 "vect_recog_divmod_pattern: detected:\n");
2284 cond
= build2 (LT_EXPR
, boolean_type_node
, oprnd0
,
2285 build_int_cst (itype
, 0));
2286 if (rhs_code
== TRUNC_DIV_EXPR
)
2288 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
2291 = gimple_build_assign (var
, COND_EXPR
, cond
,
2292 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2293 build_int_cst (itype
, 1)),
2294 build_int_cst (itype
, 0));
2295 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2296 var
= vect_recog_temp_ssa_var (itype
, NULL
);
2298 = gimple_build_assign (var
, PLUS_EXPR
, oprnd0
,
2299 gimple_assign_lhs (def_stmt
));
2300 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2302 shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
2304 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2305 RSHIFT_EXPR
, var
, shift
);
2310 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2311 if (compare_tree_int (oprnd1
, 2) == 0)
2313 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2314 def_stmt
= gimple_build_assign (signmask
, COND_EXPR
, cond
,
2315 build_int_cst (itype
, 1),
2316 build_int_cst (itype
, 0));
2317 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2322 = build_nonstandard_integer_type (prec
, 1);
2323 tree vecutype
= get_vectype_for_scalar_type (utype
);
2325 = build_int_cst (utype
, GET_MODE_BITSIZE (TYPE_MODE (itype
))
2326 - tree_log2 (oprnd1
));
2327 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
2329 def_stmt
= gimple_build_assign (var
, COND_EXPR
, cond
,
2330 build_int_cst (utype
, -1),
2331 build_int_cst (utype
, 0));
2333 = new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2334 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2335 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2336 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2337 var
= vect_recog_temp_ssa_var (utype
, NULL
);
2338 def_stmt
= gimple_build_assign (var
, RSHIFT_EXPR
,
2339 gimple_assign_lhs (def_stmt
),
2342 = new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2343 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2344 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2345 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2346 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2348 = gimple_build_assign (signmask
, NOP_EXPR
, var
);
2349 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2352 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2353 PLUS_EXPR
, oprnd0
, signmask
);
2354 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2356 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2357 BIT_AND_EXPR
, gimple_assign_lhs (def_stmt
),
2358 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2359 build_int_cst (itype
, 1)));
2360 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2363 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2364 MINUS_EXPR
, gimple_assign_lhs (def_stmt
),
2368 if (dump_enabled_p ())
2369 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
,
2372 stmts
->safe_push (last_stmt
);
2375 *type_out
= vectype
;
2376 return pattern_stmt
;
2379 if (prec
> HOST_BITS_PER_WIDE_INT
2380 || integer_zerop (oprnd1
))
2383 if (!can_mult_highpart_p (TYPE_MODE (vectype
), TYPE_UNSIGNED (itype
)))
2386 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2388 if (TYPE_UNSIGNED (itype
))
2390 unsigned HOST_WIDE_INT mh
, ml
;
2391 int pre_shift
, post_shift
;
2392 unsigned HOST_WIDE_INT d
= (TREE_INT_CST_LOW (oprnd1
)
2393 & GET_MODE_MASK (TYPE_MODE (itype
)));
2394 tree t1
, t2
, t3
, t4
;
2396 if (d
>= ((unsigned HOST_WIDE_INT
) 1 << (prec
- 1)))
2397 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2400 /* Find a suitable multiplier and right shift count
2401 instead of multiplying with D. */
2402 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
2404 /* If the suggested multiplier is more than SIZE bits, we can do better
2405 for even divisors, using an initial right shift. */
2406 if (mh
!= 0 && (d
& 1) == 0)
2408 pre_shift
= floor_log2 (d
& -d
);
2409 mh
= choose_multiplier (d
>> pre_shift
, prec
, prec
- pre_shift
,
2410 &ml
, &post_shift
, &dummy_int
);
2418 if (post_shift
- 1 >= prec
)
2421 /* t1 = oprnd0 h* ml;
2425 q = t4 >> (post_shift - 1); */
2426 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2427 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2428 build_int_cst (itype
, ml
));
2429 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2431 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2433 = gimple_build_assign (t2
, MINUS_EXPR
, oprnd0
, t1
);
2434 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2436 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2438 = gimple_build_assign (t3
, RSHIFT_EXPR
, t2
, integer_one_node
);
2439 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2441 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2443 = gimple_build_assign (t4
, PLUS_EXPR
, t1
, t3
);
2445 if (post_shift
!= 1)
2447 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2449 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2451 = gimple_build_assign (q
, RSHIFT_EXPR
, t4
,
2452 build_int_cst (itype
, post_shift
- 1));
2457 pattern_stmt
= def_stmt
;
2462 if (pre_shift
>= prec
|| post_shift
>= prec
)
2465 /* t1 = oprnd0 >> pre_shift;
2467 q = t2 >> post_shift; */
2470 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2472 = gimple_build_assign (t1
, RSHIFT_EXPR
, oprnd0
,
2473 build_int_cst (NULL
, pre_shift
));
2474 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2479 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2480 def_stmt
= gimple_build_assign (t2
, MULT_HIGHPART_EXPR
, t1
,
2481 build_int_cst (itype
, ml
));
2485 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2487 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2489 = gimple_build_assign (q
, RSHIFT_EXPR
, t2
,
2490 build_int_cst (itype
, post_shift
));
2495 pattern_stmt
= def_stmt
;
2500 unsigned HOST_WIDE_INT ml
;
2502 HOST_WIDE_INT d
= TREE_INT_CST_LOW (oprnd1
);
2503 unsigned HOST_WIDE_INT abs_d
;
2505 tree t1
, t2
, t3
, t4
;
2507 /* Give up for -1. */
2511 /* Since d might be INT_MIN, we have to cast to
2512 unsigned HOST_WIDE_INT before negating to avoid
2513 undefined signed overflow. */
2515 ? (unsigned HOST_WIDE_INT
) d
2516 : - (unsigned HOST_WIDE_INT
) d
);
2518 /* n rem d = n rem -d */
2519 if (rhs_code
== TRUNC_MOD_EXPR
&& d
< 0)
2522 oprnd1
= build_int_cst (itype
, abs_d
);
2524 else if (HOST_BITS_PER_WIDE_INT
>= prec
2525 && abs_d
== (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2526 /* This case is not handled correctly below. */
2529 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
, &post_shift
, &dummy_int
);
2530 if (ml
>= (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2533 ml
|= (~(unsigned HOST_WIDE_INT
) 0) << (prec
- 1);
2535 if (post_shift
>= prec
)
2538 /* t1 = oprnd0 h* ml; */
2539 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2540 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2541 build_int_cst (itype
, ml
));
2545 /* t2 = t1 + oprnd0; */
2546 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2547 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2548 def_stmt
= gimple_build_assign (t2
, PLUS_EXPR
, t1
, oprnd0
);
2555 /* t3 = t2 >> post_shift; */
2556 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2557 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2558 def_stmt
= gimple_build_assign (t3
, RSHIFT_EXPR
, t2
,
2559 build_int_cst (itype
, post_shift
));
2564 wide_int oprnd0_min
, oprnd0_max
;
2566 if (get_range_info (oprnd0
, &oprnd0_min
, &oprnd0_max
) == VR_RANGE
)
2568 if (!wi::neg_p (oprnd0_min
, TYPE_SIGN (itype
)))
2570 else if (wi::neg_p (oprnd0_max
, TYPE_SIGN (itype
)))
2574 if (msb
== 0 && d
>= 0)
2578 pattern_stmt
= def_stmt
;
2582 /* t4 = oprnd0 >> (prec - 1);
2583 or if we know from VRP that oprnd0 >= 0
2585 or if we know from VRP that oprnd0 < 0
2587 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2588 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2590 def_stmt
= gimple_build_assign (t4
, INTEGER_CST
,
2591 build_int_cst (itype
, msb
));
2593 def_stmt
= gimple_build_assign (t4
, RSHIFT_EXPR
, oprnd0
,
2594 build_int_cst (itype
, prec
- 1));
2595 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2597 /* q = t3 - t4; or q = t4 - t3; */
2598 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2599 pattern_stmt
= gimple_build_assign (q
, MINUS_EXPR
, d
< 0 ? t4
: t3
,
2604 if (rhs_code
== TRUNC_MOD_EXPR
)
2608 /* We divided. Now finish by:
2611 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2613 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2614 def_stmt
= gimple_build_assign (t1
, MULT_EXPR
, q
, oprnd1
);
2615 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2617 r
= vect_recog_temp_ssa_var (itype
, NULL
);
2618 pattern_stmt
= gimple_build_assign (r
, MINUS_EXPR
, oprnd0
, t1
);
2621 /* Pattern detected. */
2622 if (dump_enabled_p ())
2624 dump_printf_loc (MSG_NOTE
, vect_location
,
2625 "vect_recog_divmod_pattern: detected: ");
2626 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
2627 dump_printf (MSG_NOTE
, "\n");
2630 stmts
->safe_push (last_stmt
);
2633 *type_out
= vectype
;
2634 return pattern_stmt
;
2637 /* Function vect_recog_mixed_size_cond_pattern
2639 Try to find the following pattern:
2644 S1 a_T = x_t CMP y_t ? b_T : c_T;
2646 where type 'TYPE' is an integral type which has different size
2647 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2648 than 'type', the constants need to fit into an integer type
2649 with the same width as 'type') or results of conversion from 'type'.
2653 * LAST_STMT: A stmt from which the pattern search begins.
2657 * TYPE_IN: The type of the input arguments to the pattern.
2659 * TYPE_OUT: The type of the output of this pattern.
2661 * Return value: A new stmt that will be used to replace the pattern.
2662 Additionally a def_stmt is added.
2664 a_it = x_t CMP y_t ? b_it : c_it;
2665 a_T = (TYPE) a_it; */
2668 vect_recog_mixed_size_cond_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
2671 gimple last_stmt
= (*stmts
)[0];
2672 tree cond_expr
, then_clause
, else_clause
;
2673 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
), def_stmt_info
;
2674 tree type
, vectype
, comp_vectype
, itype
= NULL_TREE
, vecitype
;
2675 machine_mode cmpmode
;
2676 gimple pattern_stmt
, def_stmt
;
2677 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2678 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2679 tree orig_type0
= NULL_TREE
, orig_type1
= NULL_TREE
;
2680 gimple def_stmt0
= NULL
, def_stmt1
= NULL
;
2682 tree comp_scalar_type
;
2684 if (!is_gimple_assign (last_stmt
)
2685 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
2686 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
2689 cond_expr
= gimple_assign_rhs1 (last_stmt
);
2690 then_clause
= gimple_assign_rhs2 (last_stmt
);
2691 else_clause
= gimple_assign_rhs3 (last_stmt
);
2693 if (!COMPARISON_CLASS_P (cond_expr
))
2696 comp_scalar_type
= TREE_TYPE (TREE_OPERAND (cond_expr
, 0));
2697 comp_vectype
= get_vectype_for_scalar_type (comp_scalar_type
);
2698 if (comp_vectype
== NULL_TREE
)
2701 type
= gimple_expr_type (last_stmt
);
2702 if (types_compatible_p (type
, comp_scalar_type
)
2703 || ((TREE_CODE (then_clause
) != INTEGER_CST
2704 || TREE_CODE (else_clause
) != INTEGER_CST
)
2705 && !INTEGRAL_TYPE_P (comp_scalar_type
))
2706 || !INTEGRAL_TYPE_P (type
))
2709 if ((TREE_CODE (then_clause
) != INTEGER_CST
2710 && !type_conversion_p (then_clause
, last_stmt
, false, &orig_type0
,
2711 &def_stmt0
, &promotion
))
2712 || (TREE_CODE (else_clause
) != INTEGER_CST
2713 && !type_conversion_p (else_clause
, last_stmt
, false, &orig_type1
,
2714 &def_stmt1
, &promotion
)))
2717 if (orig_type0
&& orig_type1
2718 && !types_compatible_p (orig_type0
, orig_type1
))
2723 if (!types_compatible_p (orig_type0
, comp_scalar_type
))
2725 then_clause
= gimple_assign_rhs1 (def_stmt0
);
2731 if (!types_compatible_p (orig_type1
, comp_scalar_type
))
2733 else_clause
= gimple_assign_rhs1 (def_stmt1
);
2737 cmpmode
= GET_MODE_INNER (TYPE_MODE (comp_vectype
));
2739 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) == GET_MODE_BITSIZE (cmpmode
))
2742 vectype
= get_vectype_for_scalar_type (type
);
2743 if (vectype
== NULL_TREE
)
2746 if (expand_vec_cond_expr_p (vectype
, comp_vectype
))
2749 if (itype
== NULL_TREE
)
2750 itype
= build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode
),
2751 TYPE_UNSIGNED (type
));
2753 if (itype
== NULL_TREE
2754 || GET_MODE_BITSIZE (TYPE_MODE (itype
)) != GET_MODE_BITSIZE (cmpmode
))
2757 vecitype
= get_vectype_for_scalar_type (itype
);
2758 if (vecitype
== NULL_TREE
)
2761 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
))
2764 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) > GET_MODE_BITSIZE (cmpmode
))
2766 if ((TREE_CODE (then_clause
) == INTEGER_CST
2767 && !int_fits_type_p (then_clause
, itype
))
2768 || (TREE_CODE (else_clause
) == INTEGER_CST
2769 && !int_fits_type_p (else_clause
, itype
)))
2773 def_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2774 COND_EXPR
, unshare_expr (cond_expr
),
2775 fold_convert (itype
, then_clause
),
2776 fold_convert (itype
, else_clause
));
2777 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
2778 NOP_EXPR
, gimple_assign_lhs (def_stmt
));
2780 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2781 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2782 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
2783 STMT_VINFO_VECTYPE (def_stmt_info
) = vecitype
;
2784 *type_in
= vecitype
;
2785 *type_out
= vectype
;
2787 if (dump_enabled_p ())
2788 dump_printf_loc (MSG_NOTE
, vect_location
,
2789 "vect_recog_mixed_size_cond_pattern: detected:\n");
2791 return pattern_stmt
;
2795 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2796 true if bool VAR can be optimized that way. */
2799 check_bool_pattern (tree var
, loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
2802 enum vect_def_type dt
;
2804 enum tree_code rhs_code
;
2806 if (!vect_is_simple_use (var
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
, &def
,
2810 if (dt
!= vect_internal_def
)
2813 if (!is_gimple_assign (def_stmt
))
2816 if (!has_single_use (def
))
2819 rhs1
= gimple_assign_rhs1 (def_stmt
);
2820 rhs_code
= gimple_assign_rhs_code (def_stmt
);
2824 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2827 if ((TYPE_PRECISION (TREE_TYPE (rhs1
)) != 1
2828 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
2829 && TREE_CODE (TREE_TYPE (rhs1
)) != BOOLEAN_TYPE
)
2831 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2834 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2839 if (!check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
))
2841 return check_bool_pattern (gimple_assign_rhs2 (def_stmt
), loop_vinfo
,
2845 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
2847 tree vecitype
, comp_vectype
;
2849 /* If the comparison can throw, then is_gimple_condexpr will be
2850 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2851 if (stmt_could_throw_p (def_stmt
))
2854 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
2855 if (comp_vectype
== NULL_TREE
)
2858 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
2860 machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
2862 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
2863 vecitype
= get_vectype_for_scalar_type (itype
);
2864 if (vecitype
== NULL_TREE
)
2868 vecitype
= comp_vectype
;
2869 return expand_vec_cond_expr_p (vecitype
, comp_vectype
);
2876 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2877 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2878 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2881 adjust_bool_pattern_cast (tree type
, tree var
)
2883 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (SSA_NAME_DEF_STMT (var
));
2884 gimple cast_stmt
, pattern_stmt
;
2886 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
));
2887 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
2888 new_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2889 cast_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
2890 NOP_EXPR
, gimple_assign_lhs (pattern_stmt
));
2891 STMT_VINFO_RELATED_STMT (stmt_vinfo
) = cast_stmt
;
2892 return gimple_assign_lhs (cast_stmt
);
2896 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2897 recursively. VAR is an SSA_NAME that should be transformed from bool
2898 to a wider integer type, OUT_TYPE is the desired final integer type of
2899 the whole pattern, TRUEVAL should be NULL unless optimizing
2900 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2901 in the then_clause, STMTS is where statements with added pattern stmts
2902 should be pushed to. */
2905 adjust_bool_pattern (tree var
, tree out_type
, tree trueval
,
2908 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2909 enum tree_code rhs_code
, def_rhs_code
;
2910 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
2912 gimple pattern_stmt
, def_stmt
;
2914 rhs1
= gimple_assign_rhs1 (stmt
);
2915 rhs2
= gimple_assign_rhs2 (stmt
);
2916 rhs_code
= gimple_assign_rhs_code (stmt
);
2917 loc
= gimple_location (stmt
);
2922 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2923 itype
= TREE_TYPE (irhs1
);
2925 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2930 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2931 itype
= TREE_TYPE (irhs1
);
2933 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2934 BIT_XOR_EXPR
, irhs1
, build_int_cst (itype
, 1));
2938 /* Try to optimize x = y & (a < b ? 1 : 0); into
2939 x = (a < b ? y : 0);
2945 S1 a_b = x1 CMP1 y1;
2946 S2 b_b = x2 CMP2 y2;
2948 S4 d_T = (TYPE) c_b;
2950 we would normally emit:
2952 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2953 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2954 S3' c_T = a_T & b_T;
2957 but we can save one stmt by using the
2958 result of one of the COND_EXPRs in the other COND_EXPR and leave
2959 BIT_AND_EXPR stmt out:
2961 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2962 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2965 At least when VEC_COND_EXPR is implemented using masks
2966 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2967 computes the comparison masks and ands it, in one case with
2968 all ones vector, in the other case with a vector register.
2969 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2970 often more expensive. */
2971 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
2972 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
2973 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
2975 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
2976 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2977 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
2978 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
2981 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
2982 irhs2
= adjust_bool_pattern (rhs2
, out_type
, irhs1
, stmts
);
2983 tstmt
= stmts
->pop ();
2984 gcc_assert (tstmt
== def_stmt
);
2985 stmts
->quick_push (stmt
);
2986 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
2987 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
2988 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
2989 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
2993 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
2996 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
2997 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
2998 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3000 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3001 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
3002 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
3003 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
3006 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
3007 irhs1
= adjust_bool_pattern (rhs1
, out_type
, irhs2
, stmts
);
3008 tstmt
= stmts
->pop ();
3009 gcc_assert (tstmt
== def_stmt
);
3010 stmts
->quick_push (stmt
);
3011 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
3012 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
3013 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
3014 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
3018 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3024 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3025 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
3027 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3028 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
3030 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
3031 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
3032 int out_prec
= TYPE_PRECISION (out_type
);
3033 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
3034 irhs2
= adjust_bool_pattern_cast (TREE_TYPE (irhs1
), rhs2
);
3035 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
3036 irhs1
= adjust_bool_pattern_cast (TREE_TYPE (irhs2
), rhs1
);
3039 irhs1
= adjust_bool_pattern_cast (out_type
, rhs1
);
3040 irhs2
= adjust_bool_pattern_cast (out_type
, rhs2
);
3043 itype
= TREE_TYPE (irhs1
);
3045 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3046 rhs_code
, irhs1
, irhs2
);
3050 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
3051 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
3052 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
))
3053 || (TYPE_PRECISION (TREE_TYPE (rhs1
))
3054 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1
)))))
3056 machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
3058 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3061 itype
= TREE_TYPE (rhs1
);
3062 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
3063 if (trueval
== NULL_TREE
)
3064 trueval
= build_int_cst (itype
, 1);
3066 gcc_checking_assert (useless_type_conversion_p (itype
,
3067 TREE_TYPE (trueval
)));
3069 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3070 COND_EXPR
, cond_expr
, trueval
,
3071 build_int_cst (itype
, 0));
3075 stmts
->safe_push (stmt
);
3076 gimple_set_location (pattern_stmt
, loc
);
3077 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
3078 return gimple_assign_lhs (pattern_stmt
);
3082 /* Function vect_recog_bool_pattern
3084 Try to find pattern like following:
3086 bool a_b, b_b, c_b, d_b, e_b;
3089 S1 a_b = x1 CMP1 y1;
3090 S2 b_b = x2 CMP2 y2;
3092 S4 d_b = x3 CMP3 y3;
3094 S6 f_T = (TYPE) e_b;
3096 where type 'TYPE' is an integral type. Or a similar pattern
3099 S6 f_Y = e_b ? r_Y : s_Y;
3101 as results from if-conversion of a complex condition.
3105 * LAST_STMT: A stmt at the end from which the pattern
3106 search begins, i.e. cast of a bool to
3111 * TYPE_IN: The type of the input arguments to the pattern.
3113 * TYPE_OUT: The type of the output of this pattern.
3115 * Return value: A new stmt that will be used to replace the pattern.
3117 Assuming size of TYPE is the same as size of all comparisons
3118 (otherwise some casts would be added where needed), the above
3119 sequence we create related pattern stmts:
3120 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3121 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3122 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3123 S5' e_T = c_T | d_T;
3126 Instead of the above S3' we could emit:
3127 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3128 S3' c_T = a_T | b_T;
3129 but the above is more efficient. */
3132 vect_recog_bool_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
3135 gimple last_stmt
= stmts
->pop ();
3136 enum tree_code rhs_code
;
3137 tree var
, lhs
, rhs
, vectype
;
3138 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
3139 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
3140 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
3141 gimple pattern_stmt
;
3143 if (!is_gimple_assign (last_stmt
))
3146 var
= gimple_assign_rhs1 (last_stmt
);
3147 lhs
= gimple_assign_lhs (last_stmt
);
3149 if ((TYPE_PRECISION (TREE_TYPE (var
)) != 1
3150 || !TYPE_UNSIGNED (TREE_TYPE (var
)))
3151 && TREE_CODE (TREE_TYPE (var
)) != BOOLEAN_TYPE
)
3154 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3155 if (CONVERT_EXPR_CODE_P (rhs_code
))
3157 if (TREE_CODE (TREE_TYPE (lhs
)) != INTEGER_TYPE
3158 || TYPE_PRECISION (TREE_TYPE (lhs
)) == 1)
3160 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3161 if (vectype
== NULL_TREE
)
3164 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
3167 rhs
= adjust_bool_pattern (var
, TREE_TYPE (lhs
), NULL_TREE
, stmts
);
3168 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3169 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3170 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3173 = gimple_build_assign (lhs
, NOP_EXPR
, rhs
);
3174 *type_out
= vectype
;
3176 stmts
->safe_push (last_stmt
);
3177 if (dump_enabled_p ())
3178 dump_printf_loc (MSG_NOTE
, vect_location
,
3179 "vect_recog_bool_pattern: detected:\n");
3181 return pattern_stmt
;
3183 else if (rhs_code
== COND_EXPR
3184 && TREE_CODE (var
) == SSA_NAME
)
3186 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3187 if (vectype
== NULL_TREE
)
3190 /* Build a scalar type for the boolean result that when
3191 vectorized matches the vector type of the result in
3192 size and number of elements. */
3194 = wi::udiv_trunc (TYPE_SIZE (vectype
),
3195 TYPE_VECTOR_SUBPARTS (vectype
)).to_uhwi ();
3197 = build_nonstandard_integer_type (prec
,
3198 TYPE_UNSIGNED (TREE_TYPE (var
)));
3199 if (get_vectype_for_scalar_type (type
) == NULL_TREE
)
3202 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
3205 rhs
= adjust_bool_pattern (var
, type
, NULL_TREE
, stmts
);
3206 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3208 = gimple_build_assign (lhs
, COND_EXPR
,
3209 build2 (NE_EXPR
, boolean_type_node
,
3210 rhs
, build_int_cst (type
, 0)),
3211 gimple_assign_rhs2 (last_stmt
),
3212 gimple_assign_rhs3 (last_stmt
));
3213 *type_out
= vectype
;
3215 stmts
->safe_push (last_stmt
);
3216 if (dump_enabled_p ())
3217 dump_printf_loc (MSG_NOTE
, vect_location
,
3218 "vect_recog_bool_pattern: detected:\n");
3220 return pattern_stmt
;
3222 else if (rhs_code
== SSA_NAME
3223 && STMT_VINFO_DATA_REF (stmt_vinfo
))
3225 stmt_vec_info pattern_stmt_info
;
3226 vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
3227 gcc_assert (vectype
!= NULL_TREE
);
3228 if (!VECTOR_MODE_P (TYPE_MODE (vectype
)))
3230 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
3233 rhs
= adjust_bool_pattern (var
, TREE_TYPE (vectype
), NULL_TREE
, stmts
);
3234 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
3235 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3237 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3238 gimple cast_stmt
= gimple_build_assign (rhs2
, NOP_EXPR
, rhs
);
3239 new_pattern_def_seq (stmt_vinfo
, cast_stmt
);
3242 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3243 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
,
3245 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3246 STMT_VINFO_DATA_REF (pattern_stmt_info
)
3247 = STMT_VINFO_DATA_REF (stmt_vinfo
);
3248 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info
)
3249 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo
);
3250 STMT_VINFO_DR_INIT (pattern_stmt_info
) = STMT_VINFO_DR_INIT (stmt_vinfo
);
3251 STMT_VINFO_DR_OFFSET (pattern_stmt_info
)
3252 = STMT_VINFO_DR_OFFSET (stmt_vinfo
);
3253 STMT_VINFO_DR_STEP (pattern_stmt_info
) = STMT_VINFO_DR_STEP (stmt_vinfo
);
3254 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info
)
3255 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo
);
3256 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo
)) = pattern_stmt
;
3257 *type_out
= vectype
;
3259 stmts
->safe_push (last_stmt
);
3260 if (dump_enabled_p ())
3261 dump_printf_loc (MSG_NOTE
, vect_location
,
3262 "vect_recog_bool_pattern: detected:\n");
3263 return pattern_stmt
;
3270 /* Mark statements that are involved in a pattern. */
3273 vect_mark_pattern_stmts (gimple orig_stmt
, gimple pattern_stmt
,
3274 tree pattern_vectype
)
3276 stmt_vec_info pattern_stmt_info
, def_stmt_info
;
3277 stmt_vec_info orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
3278 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (orig_stmt_info
);
3279 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (orig_stmt_info
);
3282 pattern_stmt_info
= vinfo_for_stmt (pattern_stmt
);
3283 if (pattern_stmt_info
== NULL
)
3285 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
,
3287 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3289 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt
));
3291 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt
;
3292 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
3293 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
3294 STMT_VINFO_VECTYPE (pattern_stmt_info
) = pattern_vectype
;
3295 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
3296 STMT_VINFO_RELATED_STMT (orig_stmt_info
) = pattern_stmt
;
3297 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
)
3298 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
3299 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
))
3301 gimple_stmt_iterator si
;
3302 for (si
= gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
));
3303 !gsi_end_p (si
); gsi_next (&si
))
3305 def_stmt
= gsi_stmt (si
);
3306 def_stmt_info
= vinfo_for_stmt (def_stmt
);
3307 if (def_stmt_info
== NULL
)
3309 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
,
3311 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
3313 gimple_set_bb (def_stmt
, gimple_bb (orig_stmt
));
3314 STMT_VINFO_RELATED_STMT (def_stmt_info
) = orig_stmt
;
3315 STMT_VINFO_DEF_TYPE (def_stmt_info
) = vect_internal_def
;
3316 if (STMT_VINFO_VECTYPE (def_stmt_info
) == NULL_TREE
)
3317 STMT_VINFO_VECTYPE (def_stmt_info
) = pattern_vectype
;
3322 /* Function vect_pattern_recog_1
3325 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3326 computation pattern.
3327 STMT: A stmt from which the pattern search should start.
3329 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3330 expression that computes the same functionality and can be used to
3331 replace the sequence of stmts that are involved in the pattern.
3334 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3335 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3336 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3337 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3338 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3339 to the available target pattern.
3341 This function also does some bookkeeping, as explained in the documentation
3342 for vect_recog_pattern. */
3345 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func
,
3346 gimple_stmt_iterator si
,
3347 vec
<gimple
> *stmts_to_replace
)
3349 gimple stmt
= gsi_stmt (si
), pattern_stmt
;
3350 stmt_vec_info stmt_info
;
3351 loop_vec_info loop_vinfo
;
3352 tree pattern_vectype
;
3353 tree type_in
, type_out
;
3354 enum tree_code code
;
3358 stmts_to_replace
->truncate (0);
3359 stmts_to_replace
->quick_push (stmt
);
3360 pattern_stmt
= (* vect_recog_func
) (stmts_to_replace
, &type_in
, &type_out
);
3364 stmt
= stmts_to_replace
->last ();
3365 stmt_info
= vinfo_for_stmt (stmt
);
3366 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3368 if (VECTOR_MODE_P (TYPE_MODE (type_in
)))
3370 /* No need to check target support (already checked by the pattern
3371 recognition function). */
3372 pattern_vectype
= type_out
? type_out
: type_in
;
3376 machine_mode vec_mode
;
3377 enum insn_code icode
;
3380 /* Check target support */
3381 type_in
= get_vectype_for_scalar_type (type_in
);
3385 type_out
= get_vectype_for_scalar_type (type_out
);
3390 pattern_vectype
= type_out
;
3392 if (is_gimple_assign (pattern_stmt
))
3393 code
= gimple_assign_rhs_code (pattern_stmt
);
3396 gcc_assert (is_gimple_call (pattern_stmt
));
3400 optab
= optab_for_tree_code (code
, type_in
, optab_default
);
3401 vec_mode
= TYPE_MODE (type_in
);
3403 || (icode
= optab_handler (optab
, vec_mode
)) == CODE_FOR_nothing
3404 || (insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (type_out
)))
3408 /* Found a vectorizable pattern. */
3409 if (dump_enabled_p ())
3411 dump_printf_loc (MSG_NOTE
, vect_location
,
3412 "pattern recognized: ");
3413 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3416 /* Mark the stmts that are involved in the pattern. */
3417 vect_mark_pattern_stmts (stmt
, pattern_stmt
, pattern_vectype
);
3419 /* Patterns cannot be vectorized using SLP, because they change the order of
3422 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
, next
)
3424 LOOP_VINFO_REDUCTIONS (loop_vinfo
).ordered_remove (i
);
3426 /* It is possible that additional pattern stmts are created and inserted in
3427 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3428 relevant statements. */
3429 for (i
= 0; stmts_to_replace
->iterate (i
, &stmt
)
3430 && (unsigned) i
< (stmts_to_replace
->length () - 1);
3433 stmt_info
= vinfo_for_stmt (stmt
);
3434 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
3435 if (dump_enabled_p ())
3437 dump_printf_loc (MSG_NOTE
, vect_location
,
3438 "additional pattern stmt: ");
3439 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3442 vect_mark_pattern_stmts (stmt
, pattern_stmt
, NULL_TREE
);
3447 /* Function vect_pattern_recog
3450 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3453 Output - for each computation idiom that is detected we create a new stmt
3454 that provides the same functionality and that can be vectorized. We
3455 also record some information in the struct_stmt_info of the relevant
3456 stmts, as explained below:
3458 At the entry to this function we have the following stmts, with the
3459 following initial value in the STMT_VINFO fields:
3461 stmt in_pattern_p related_stmt vec_stmt
3462 S1: a_i = .... - - -
3463 S2: a_2 = ..use(a_i).. - - -
3464 S3: a_1 = ..use(a_2).. - - -
3465 S4: a_0 = ..use(a_1).. - - -
3466 S5: ... = ..use(a_0).. - - -
3468 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3469 represented by a single stmt. We then:
3470 - create a new stmt S6 equivalent to the pattern (the stmt is not
3471 inserted into the code)
3472 - fill in the STMT_VINFO fields as follows:
3474 in_pattern_p related_stmt vec_stmt
3475 S1: a_i = .... - - -
3476 S2: a_2 = ..use(a_i).. - - -
3477 S3: a_1 = ..use(a_2).. - - -
3478 S4: a_0 = ..use(a_1).. true S6 -
3479 '---> S6: a_new = .... - S4 -
3480 S5: ... = ..use(a_0).. - - -
3482 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3483 to each other through the RELATED_STMT field).
3485 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3486 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3487 remain irrelevant unless used by stmts other than S4.
3489 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3490 (because they are marked as irrelevant). It will vectorize S6, and record
3491 a pointer to the new vector stmt VS6 from S6 (as usual).
3492 S4 will be skipped, and S5 will be vectorized as usual:
3494 in_pattern_p related_stmt vec_stmt
3495 S1: a_i = .... - - -
3496 S2: a_2 = ..use(a_i).. - - -
3497 S3: a_1 = ..use(a_2).. - - -
3498 > VS6: va_new = .... - - -
3499 S4: a_0 = ..use(a_1).. true S6 VS6
3500 '---> S6: a_new = .... - S4 VS6
3501 > VS5: ... = ..vuse(va_new).. - - -
3502 S5: ... = ..use(a_0).. - - -
3504 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3505 elsewhere), and we'll end up with:
3508 VS5: ... = ..vuse(va_new)..
3510 In case of more than one pattern statements, e.g., widen-mult with
3514 S2 a_T = (TYPE) a_t;
3515 '--> S3: a_it = (interm_type) a_t;
3516 S4 prod_T = a_T * CONST;
3517 '--> S5: prod_T' = a_it w* CONST;
3519 there may be other users of a_T outside the pattern. In that case S2 will
3520 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3521 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3522 be recorded in S3. */
3525 vect_pattern_recog (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
3530 gimple_stmt_iterator si
;
3532 vect_recog_func_ptr vect_recog_func
;
3533 auto_vec
<gimple
, 1> stmts_to_replace
;
3536 if (dump_enabled_p ())
3537 dump_printf_loc (MSG_NOTE
, vect_location
,
3538 "=== vect_pattern_recog ===\n");
3542 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3543 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
3544 nbbs
= loop
->num_nodes
;
3548 bbs
= &BB_VINFO_BB (bb_vinfo
);
3552 /* Scan through the loop stmts, applying the pattern recognition
3553 functions starting at each stmt visited: */
3554 for (i
= 0; i
< nbbs
; i
++)
3556 basic_block bb
= bbs
[i
];
3557 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
3559 if (bb_vinfo
&& (stmt
= gsi_stmt (si
))
3560 && vinfo_for_stmt (stmt
)
3561 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
3564 /* Scan over all generic vect_recog_xxx_pattern functions. */
3565 for (j
= 0; j
< NUM_PATTERNS
; j
++)
3567 vect_recog_func
= vect_vect_recog_func_ptrs
[j
];
3568 vect_pattern_recog_1 (vect_recog_func
, si
,