1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "fold-const.h"
31 #include "stor-layout.h"
33 #include "dominance.h"
34 #include "gimple-pretty-print.h"
35 #include "internal-fn.h"
38 #include "gimple-iterator.h"
41 #include "insn-config.h"
43 #include "insn-codes.h"
44 #include "optabs-tree.h"
46 #include "tree-data-ref.h"
47 #include "tree-vectorizer.h"
48 #include "recog.h" /* FIXME: for insn_data */
49 #include "diagnostic-core.h"
53 /* Pattern recognition functions */
54 static gimple
*vect_recog_widen_sum_pattern (vec
<gimple
*> *, tree
*,
56 static gimple
*vect_recog_widen_mult_pattern (vec
<gimple
*> *, tree
*,
58 static gimple
*vect_recog_dot_prod_pattern (vec
<gimple
*> *, tree
*,
60 static gimple
*vect_recog_sad_pattern (vec
<gimple
*> *, tree
*,
62 static gimple
*vect_recog_pow_pattern (vec
<gimple
*> *, tree
*, tree
*);
63 static gimple
*vect_recog_over_widening_pattern (vec
<gimple
*> *, tree
*,
65 static gimple
*vect_recog_widen_shift_pattern (vec
<gimple
*> *,
67 static gimple
*vect_recog_rotate_pattern (vec
<gimple
*> *, tree
*, tree
*);
68 static gimple
*vect_recog_vector_vector_shift_pattern (vec
<gimple
*> *,
70 static gimple
*vect_recog_divmod_pattern (vec
<gimple
*> *,
73 static gimple
*vect_recog_mult_pattern (vec
<gimple
*> *,
76 static gimple
*vect_recog_mixed_size_cond_pattern (vec
<gimple
*> *,
78 static gimple
*vect_recog_bool_pattern (vec
<gimple
*> *, tree
*, tree
*);
79 static vect_recog_func_ptr vect_vect_recog_func_ptrs
[NUM_PATTERNS
] = {
80 vect_recog_widen_mult_pattern
,
81 vect_recog_widen_sum_pattern
,
82 vect_recog_dot_prod_pattern
,
83 vect_recog_sad_pattern
,
84 vect_recog_pow_pattern
,
85 vect_recog_widen_shift_pattern
,
86 vect_recog_over_widening_pattern
,
87 vect_recog_rotate_pattern
,
88 vect_recog_vector_vector_shift_pattern
,
89 vect_recog_divmod_pattern
,
90 vect_recog_mult_pattern
,
91 vect_recog_mixed_size_cond_pattern
,
92 vect_recog_bool_pattern
};
95 append_pattern_def_seq (stmt_vec_info stmt_info
, gimple
*stmt
)
97 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
102 new_pattern_def_seq (stmt_vec_info stmt_info
, gimple
*stmt
)
104 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
105 append_pattern_def_seq (stmt_info
, stmt
);
108 /* Check whether STMT2 is in the same loop or basic block as STMT1.
109 Which of the two applies depends on whether we're currently doing
110 loop-based or basic-block-based vectorization, as determined by
111 the vinfo_for_stmt for STMT1 (which must be defined).
113 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
114 to be defined as well. */
117 vect_same_loop_or_bb_p (gimple
*stmt1
, gimple
*stmt2
)
119 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt1
);
120 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
121 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
123 if (!gimple_bb (stmt2
))
128 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
129 if (!flow_bb_inside_loop_p (loop
, gimple_bb (stmt2
)))
134 if (gimple_bb (stmt2
) != BB_VINFO_BB (bb_vinfo
)
135 || gimple_code (stmt2
) == GIMPLE_PHI
)
139 gcc_assert (vinfo_for_stmt (stmt2
));
143 /* If the LHS of DEF_STMT has a single use, and that statement is
144 in the same loop or basic block, return it. */
147 vect_single_imm_use (gimple
*def_stmt
)
149 tree lhs
= gimple_assign_lhs (def_stmt
);
153 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
156 if (!vect_same_loop_or_bb_p (def_stmt
, use_stmt
))
162 /* Check whether NAME, an ssa-name used in USE_STMT,
163 is a result of a type promotion, such that:
164 DEF_STMT: NAME = NOP (name0)
165 If CHECK_SIGN is TRUE, check that either both types are signed or both are
169 type_conversion_p (tree name
, gimple
*use_stmt
, bool check_sign
,
170 tree
*orig_type
, gimple
**def_stmt
, bool *promotion
)
173 gimple
*dummy_gimple
;
174 loop_vec_info loop_vinfo
;
175 stmt_vec_info stmt_vinfo
;
176 tree type
= TREE_TYPE (name
);
178 enum vect_def_type dt
;
180 bb_vec_info bb_vinfo
;
182 stmt_vinfo
= vinfo_for_stmt (use_stmt
);
183 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
184 bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
185 if (!vect_is_simple_use (name
, use_stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
189 if (dt
!= vect_internal_def
190 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
196 if (!is_gimple_assign (*def_stmt
))
199 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt
)))
202 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
204 *orig_type
= TREE_TYPE (oprnd0
);
205 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*orig_type
)
206 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*orig_type
)) && check_sign
))
209 if (TYPE_PRECISION (type
) >= (TYPE_PRECISION (*orig_type
) * 2))
214 if (!vect_is_simple_use (oprnd0
, *def_stmt
, loop_vinfo
,
215 bb_vinfo
, &dummy_gimple
, &dummy
, &dt
))
221 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
222 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
225 vect_recog_temp_ssa_var (tree type
, gimple
*stmt
)
227 return make_temp_ssa_name (type
, stmt
, "patt");
230 /* Function vect_recog_dot_prod_pattern
232 Try to find the following pattern:
238 sum_0 = phi <init, sum_1>
241 S3 x_T = (TYPE1) x_t;
242 S4 y_T = (TYPE1) y_t;
244 [S6 prod = (TYPE2) prod; #optional]
245 S7 sum_1 = prod + sum_0;
247 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
248 same size of 'TYPE1' or bigger. This is a special case of a reduction
253 * STMTS: Contains a stmt from which the pattern search begins. In the
254 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
259 * TYPE_IN: The type of the input arguments to the pattern.
261 * TYPE_OUT: The type of the output of this pattern.
263 * Return value: A new stmt that will be used to replace the sequence of
264 stmts that constitute the pattern. In this case it will be:
265 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
267 Note: The dot-prod idiom is a widening reduction pattern that is
268 vectorized without preserving all the intermediate results. It
269 produces only N/2 (widened) results (by summing up pairs of
270 intermediate results) rather than all N results. Therefore, we
271 cannot allow this pattern when we want to get all the results and in
272 the correct order (as is the case when this computation is in an
273 inner-loop nested in an outer-loop that us being vectorized). */
276 vect_recog_dot_prod_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
279 gimple
*stmt
, *last_stmt
= (*stmts
)[0];
281 tree oprnd00
, oprnd01
;
282 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
283 tree type
, half_type
;
284 gimple
*pattern_stmt
;
286 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
294 loop
= LOOP_VINFO_LOOP (loop_info
);
296 /* We don't allow changing the order of the computation in the inner-loop
297 when doing outer-loop vectorization. */
298 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
301 if (!is_gimple_assign (last_stmt
))
304 type
= gimple_expr_type (last_stmt
);
306 /* Look for the following pattern
310 DDPROD = (TYPE2) DPROD;
311 sum_1 = DDPROD + sum_0;
313 - DX is double the size of X
314 - DY is double the size of Y
315 - DX, DY, DPROD all have the same type
316 - sum is the same size of DPROD or bigger
317 - sum has been recognized as a reduction variable.
319 This is equivalent to:
320 DPROD = X w* Y; #widen mult
321 sum_1 = DPROD w+ sum_0; #widen summation
323 DPROD = X w* Y; #widen mult
324 sum_1 = DPROD + sum_0; #summation
327 /* Starting from LAST_STMT, follow the defs of its uses in search
328 of the above pattern. */
330 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
333 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
335 /* Has been detected as widening-summation? */
337 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
338 type
= gimple_expr_type (stmt
);
339 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
341 oprnd0
= gimple_assign_rhs1 (stmt
);
342 oprnd1
= gimple_assign_rhs2 (stmt
);
343 half_type
= TREE_TYPE (oprnd0
);
349 oprnd0
= gimple_assign_rhs1 (last_stmt
);
350 oprnd1
= gimple_assign_rhs2 (last_stmt
);
351 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
352 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
356 if (type_conversion_p (oprnd0
, stmt
, true, &half_type
, &def_stmt
,
361 oprnd0
= gimple_assign_rhs1 (stmt
);
367 /* So far so good. Since last_stmt was detected as a (summation) reduction,
368 we know that oprnd1 is the reduction variable (defined by a loop-header
369 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
370 Left to check that oprnd0 is defined by a (widen_)mult_expr */
371 if (TREE_CODE (oprnd0
) != SSA_NAME
)
374 prod_type
= half_type
;
375 stmt
= SSA_NAME_DEF_STMT (oprnd0
);
377 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
378 if (!gimple_bb (stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
381 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
382 inside the loop (in case we are analyzing an outer-loop). */
383 if (!is_gimple_assign (stmt
))
385 stmt_vinfo
= vinfo_for_stmt (stmt
);
386 gcc_assert (stmt_vinfo
);
387 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
389 if (gimple_assign_rhs_code (stmt
) != MULT_EXPR
)
391 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
393 /* Has been detected as a widening multiplication? */
395 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
396 if (gimple_assign_rhs_code (stmt
) != WIDEN_MULT_EXPR
)
398 stmt_vinfo
= vinfo_for_stmt (stmt
);
399 gcc_assert (stmt_vinfo
);
400 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_internal_def
);
401 oprnd00
= gimple_assign_rhs1 (stmt
);
402 oprnd01
= gimple_assign_rhs2 (stmt
);
403 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt
))
404 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
);
408 tree half_type0
, half_type1
;
412 oprnd0
= gimple_assign_rhs1 (stmt
);
413 oprnd1
= gimple_assign_rhs2 (stmt
);
414 if (!types_compatible_p (TREE_TYPE (oprnd0
), prod_type
)
415 || !types_compatible_p (TREE_TYPE (oprnd1
), prod_type
))
417 if (!type_conversion_p (oprnd0
, stmt
, true, &half_type0
, &def_stmt
,
421 oprnd00
= gimple_assign_rhs1 (def_stmt
);
422 if (!type_conversion_p (oprnd1
, stmt
, true, &half_type1
, &def_stmt
,
426 oprnd01
= gimple_assign_rhs1 (def_stmt
);
427 if (!types_compatible_p (half_type0
, half_type1
))
429 if (TYPE_PRECISION (prod_type
) != TYPE_PRECISION (half_type0
) * 2)
433 half_type
= TREE_TYPE (oprnd00
);
434 *type_in
= half_type
;
437 /* Pattern detected. Create a stmt to be used to replace the pattern: */
438 var
= vect_recog_temp_ssa_var (type
, NULL
);
439 pattern_stmt
= gimple_build_assign (var
, DOT_PROD_EXPR
,
440 oprnd00
, oprnd01
, oprnd1
);
442 if (dump_enabled_p ())
444 dump_printf_loc (MSG_NOTE
, vect_location
,
445 "vect_recog_dot_prod_pattern: detected: ");
446 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
447 dump_printf (MSG_NOTE
, "\n");
454 /* Function vect_recog_sad_pattern
456 Try to find the following Sum of Absolute Difference (SAD) pattern:
459 signed TYPE1 diff, abs_diff;
462 sum_0 = phi <init, sum_1>
465 S3 x_T = (TYPE1) x_t;
466 S4 y_T = (TYPE1) y_t;
468 S6 abs_diff = ABS_EXPR <diff>;
469 [S7 abs_diff = (TYPE2) abs_diff; #optional]
470 S8 sum_1 = abs_diff + sum_0;
472 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
473 same size of 'TYPE1' or bigger. This is a special case of a reduction
478 * STMTS: Contains a stmt from which the pattern search begins. In the
479 example, when this function is called with S8, the pattern
480 {S3,S4,S5,S6,S7,S8} will be detected.
484 * TYPE_IN: The type of the input arguments to the pattern.
486 * TYPE_OUT: The type of the output of this pattern.
488 * Return value: A new stmt that will be used to replace the sequence of
489 stmts that constitute the pattern. In this case it will be:
490 SAD_EXPR <x_t, y_t, sum_0>
494 vect_recog_sad_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
497 gimple
*last_stmt
= (*stmts
)[0];
498 tree sad_oprnd0
, sad_oprnd1
;
499 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
501 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
508 loop
= LOOP_VINFO_LOOP (loop_info
);
510 /* We don't allow changing the order of the computation in the inner-loop
511 when doing outer-loop vectorization. */
512 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
515 if (!is_gimple_assign (last_stmt
))
518 tree sum_type
= gimple_expr_type (last_stmt
);
520 /* Look for the following pattern
524 DAD = ABS_EXPR <DDIFF>;
525 DDPROD = (TYPE2) DPROD;
528 - DX is at least double the size of X
529 - DY is at least double the size of Y
530 - DX, DY, DDIFF, DAD all have the same type
531 - sum is the same size of DAD or bigger
532 - sum has been recognized as a reduction variable.
534 This is equivalent to:
535 DDIFF = X w- Y; #widen sub
536 DAD = ABS_EXPR <DDIFF>;
537 sum_1 = DAD w+ sum_0; #widen summation
539 DDIFF = X w- Y; #widen sub
540 DAD = ABS_EXPR <DDIFF>;
541 sum_1 = DAD + sum_0; #summation
544 /* Starting from LAST_STMT, follow the defs of its uses in search
545 of the above pattern. */
547 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
550 tree plus_oprnd0
, plus_oprnd1
;
552 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
554 /* Has been detected as widening-summation? */
556 gimple
*stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
557 sum_type
= gimple_expr_type (stmt
);
558 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
560 plus_oprnd0
= gimple_assign_rhs1 (stmt
);
561 plus_oprnd1
= gimple_assign_rhs2 (stmt
);
562 half_type
= TREE_TYPE (plus_oprnd0
);
568 plus_oprnd0
= gimple_assign_rhs1 (last_stmt
);
569 plus_oprnd1
= gimple_assign_rhs2 (last_stmt
);
570 if (!types_compatible_p (TREE_TYPE (plus_oprnd0
), sum_type
)
571 || !types_compatible_p (TREE_TYPE (plus_oprnd1
), sum_type
))
574 /* The type conversion could be promotion, demotion,
575 or just signed -> unsigned. */
576 if (type_conversion_p (plus_oprnd0
, last_stmt
, false,
577 &half_type
, &def_stmt
, &promotion
))
578 plus_oprnd0
= gimple_assign_rhs1 (def_stmt
);
580 half_type
= sum_type
;
583 /* So far so good. Since last_stmt was detected as a (summation) reduction,
584 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
585 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
586 Then check that plus_oprnd0 is defined by an abs_expr. */
588 if (TREE_CODE (plus_oprnd0
) != SSA_NAME
)
591 tree abs_type
= half_type
;
592 gimple
*abs_stmt
= SSA_NAME_DEF_STMT (plus_oprnd0
);
594 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
595 if (!gimple_bb (abs_stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (abs_stmt
)))
598 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
599 inside the loop (in case we are analyzing an outer-loop). */
600 if (!is_gimple_assign (abs_stmt
))
603 stmt_vec_info abs_stmt_vinfo
= vinfo_for_stmt (abs_stmt
);
604 gcc_assert (abs_stmt_vinfo
);
605 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo
) != vect_internal_def
)
607 if (gimple_assign_rhs_code (abs_stmt
) != ABS_EXPR
)
610 tree abs_oprnd
= gimple_assign_rhs1 (abs_stmt
);
611 if (!types_compatible_p (TREE_TYPE (abs_oprnd
), abs_type
))
613 if (TYPE_UNSIGNED (abs_type
))
616 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
618 if (TREE_CODE (abs_oprnd
) != SSA_NAME
)
621 gimple
*diff_stmt
= SSA_NAME_DEF_STMT (abs_oprnd
);
623 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
624 if (!gimple_bb (diff_stmt
)
625 || !flow_bb_inside_loop_p (loop
, gimple_bb (diff_stmt
)))
628 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
629 inside the loop (in case we are analyzing an outer-loop). */
630 if (!is_gimple_assign (diff_stmt
))
633 stmt_vec_info diff_stmt_vinfo
= vinfo_for_stmt (diff_stmt
);
634 gcc_assert (diff_stmt_vinfo
);
635 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo
) != vect_internal_def
)
637 if (gimple_assign_rhs_code (diff_stmt
) != MINUS_EXPR
)
640 tree half_type0
, half_type1
;
643 tree minus_oprnd0
= gimple_assign_rhs1 (diff_stmt
);
644 tree minus_oprnd1
= gimple_assign_rhs2 (diff_stmt
);
646 if (!types_compatible_p (TREE_TYPE (minus_oprnd0
), abs_type
)
647 || !types_compatible_p (TREE_TYPE (minus_oprnd1
), abs_type
))
649 if (!type_conversion_p (minus_oprnd0
, diff_stmt
, false,
650 &half_type0
, &def_stmt
, &promotion
)
653 sad_oprnd0
= gimple_assign_rhs1 (def_stmt
);
655 if (!type_conversion_p (minus_oprnd1
, diff_stmt
, false,
656 &half_type1
, &def_stmt
, &promotion
)
659 sad_oprnd1
= gimple_assign_rhs1 (def_stmt
);
661 if (!types_compatible_p (half_type0
, half_type1
))
663 if (TYPE_PRECISION (abs_type
) < TYPE_PRECISION (half_type0
) * 2
664 || TYPE_PRECISION (sum_type
) < TYPE_PRECISION (half_type0
) * 2)
667 *type_in
= TREE_TYPE (sad_oprnd0
);
668 *type_out
= sum_type
;
670 /* Pattern detected. Create a stmt to be used to replace the pattern: */
671 tree var
= vect_recog_temp_ssa_var (sum_type
, NULL
);
672 gimple
*pattern_stmt
= gimple_build_assign (var
, SAD_EXPR
, sad_oprnd0
,
673 sad_oprnd1
, plus_oprnd1
);
675 if (dump_enabled_p ())
677 dump_printf_loc (MSG_NOTE
, vect_location
,
678 "vect_recog_sad_pattern: detected: ");
679 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
680 dump_printf (MSG_NOTE
, "\n");
687 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
690 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
691 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
693 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
694 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
695 that satisfies the above restrictions, we can perform a widening opeartion
696 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
697 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
700 vect_handle_widen_op_by_const (gimple
*stmt
, enum tree_code code
,
701 tree const_oprnd
, tree
*oprnd
,
702 gimple
**wstmt
, tree type
,
703 tree
*half_type
, gimple
*def_stmt
)
705 tree new_type
, new_oprnd
;
707 if (code
!= MULT_EXPR
&& code
!= LSHIFT_EXPR
)
710 if (((code
== MULT_EXPR
&& int_fits_type_p (const_oprnd
, *half_type
))
711 || (code
== LSHIFT_EXPR
712 && compare_tree_int (const_oprnd
, TYPE_PRECISION (*half_type
))
714 && TYPE_PRECISION (type
) == (TYPE_PRECISION (*half_type
) * 2))
716 /* CONST_OPRND is a constant of HALF_TYPE. */
717 *oprnd
= gimple_assign_rhs1 (def_stmt
);
721 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (*half_type
) * 4))
724 if (!vect_same_loop_or_bb_p (stmt
, def_stmt
))
727 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
728 a type 2 times bigger than HALF_TYPE. */
729 new_type
= build_nonstandard_integer_type (TYPE_PRECISION (type
) / 2,
730 TYPE_UNSIGNED (type
));
731 if ((code
== MULT_EXPR
&& !int_fits_type_p (const_oprnd
, new_type
))
732 || (code
== LSHIFT_EXPR
733 && compare_tree_int (const_oprnd
, TYPE_PRECISION (new_type
)) == 1))
736 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
737 *oprnd
= gimple_assign_rhs1 (def_stmt
);
738 new_oprnd
= make_ssa_name (new_type
);
739 *wstmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, *oprnd
);
742 *half_type
= new_type
;
747 /* Function vect_recog_widen_mult_pattern
749 Try to find the following pattern:
753 TYPE a_T, b_T, prod_T;
759 S5 prod_T = a_T * b_T;
761 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
763 Also detect unsigned cases:
767 unsigned TYPE u_prod_T;
768 TYPE a_T, b_T, prod_T;
774 S5 prod_T = a_T * b_T;
775 S6 u_prod_T = (unsigned TYPE) prod_T;
777 and multiplication by constants:
784 S5 prod_T = a_T * CONST;
786 A special case of multiplication by constants is when 'TYPE' is 4 times
787 bigger than 'type', but CONST fits an intermediate type 2 times smaller
788 than 'TYPE'. In that case we create an additional pattern stmt for S3
789 to create a variable of the intermediate type, and perform widen-mult
790 on the intermediate type as well:
794 TYPE a_T, prod_T, prod_T';
798 '--> a_it = (interm_type) a_t;
799 S5 prod_T = a_T * CONST;
800 '--> prod_T' = a_it w* CONST;
804 * STMTS: Contains a stmt from which the pattern search begins. In the
805 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
806 is detected. In case of unsigned widen-mult, the original stmt (S5) is
807 replaced with S6 in STMTS. In case of multiplication by a constant
808 of an intermediate type (the last case above), STMTS also contains S3
809 (inserted before S5).
813 * TYPE_IN: The type of the input arguments to the pattern.
815 * TYPE_OUT: The type of the output of this pattern.
817 * Return value: A new stmt that will be used to replace the sequence of
818 stmts that constitute the pattern. In this case it will be:
819 WIDEN_MULT <a_t, b_t>
820 If the result of WIDEN_MULT needs to be converted to a larger type, the
821 returned stmt will be this type conversion stmt.
825 vect_recog_widen_mult_pattern (vec
<gimple
*> *stmts
,
826 tree
*type_in
, tree
*type_out
)
828 gimple
*last_stmt
= stmts
->pop ();
829 gimple
*def_stmt0
, *def_stmt1
;
831 tree type
, half_type0
, half_type1
;
832 gimple
*new_stmt
= NULL
, *pattern_stmt
= NULL
;
833 tree vectype
, vecitype
;
835 enum tree_code dummy_code
;
841 if (!is_gimple_assign (last_stmt
))
844 type
= gimple_expr_type (last_stmt
);
846 /* Starting from LAST_STMT, follow the defs of its uses in search
847 of the above pattern. */
849 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
852 oprnd0
= gimple_assign_rhs1 (last_stmt
);
853 oprnd1
= gimple_assign_rhs2 (last_stmt
);
854 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
855 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
858 /* Check argument 0. */
859 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
863 /* Check argument 1. */
864 op1_ok
= type_conversion_p (oprnd1
, last_stmt
, false, &half_type1
,
865 &def_stmt1
, &promotion
);
867 if (op1_ok
&& promotion
)
869 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
870 oprnd1
= gimple_assign_rhs1 (def_stmt1
);
874 if (TREE_CODE (oprnd1
) == INTEGER_CST
875 && TREE_CODE (half_type0
) == INTEGER_TYPE
876 && vect_handle_widen_op_by_const (last_stmt
, MULT_EXPR
, oprnd1
,
877 &oprnd0
, &new_stmt
, type
,
878 &half_type0
, def_stmt0
))
880 half_type1
= half_type0
;
881 oprnd1
= fold_convert (half_type1
, oprnd1
);
887 /* If the two arguments have different sizes, convert the one with
888 the smaller type into the larger type. */
889 if (TYPE_PRECISION (half_type0
) != TYPE_PRECISION (half_type1
))
891 /* If we already used up the single-stmt slot give up. */
896 gimple
*def_stmt
= NULL
;
898 if (TYPE_PRECISION (half_type0
) < TYPE_PRECISION (half_type1
))
900 def_stmt
= def_stmt0
;
901 half_type0
= half_type1
;
906 def_stmt
= def_stmt1
;
907 half_type1
= half_type0
;
911 tree old_oprnd
= gimple_assign_rhs1 (def_stmt
);
912 tree new_oprnd
= make_ssa_name (half_type0
);
913 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, old_oprnd
);
917 /* Handle unsigned case. Look for
918 S6 u_prod_T = (unsigned TYPE) prod_T;
919 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
920 if (TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type0
))
926 if (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (half_type1
))
929 use_stmt
= vect_single_imm_use (last_stmt
);
930 if (!use_stmt
|| !is_gimple_assign (use_stmt
)
931 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
934 use_lhs
= gimple_assign_lhs (use_stmt
);
935 use_type
= TREE_TYPE (use_lhs
);
936 if (!INTEGRAL_TYPE_P (use_type
)
937 || (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (use_type
))
938 || (TYPE_PRECISION (type
) != TYPE_PRECISION (use_type
)))
942 last_stmt
= use_stmt
;
945 if (!types_compatible_p (half_type0
, half_type1
))
948 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
949 to get an intermediate result of type ITYPE. In this case we need
950 to build a statement to convert this intermediate result to type TYPE. */
952 if (TYPE_PRECISION (type
) > TYPE_PRECISION (half_type0
) * 2)
953 itype
= build_nonstandard_integer_type
954 (GET_MODE_BITSIZE (TYPE_MODE (half_type0
)) * 2,
955 TYPE_UNSIGNED (type
));
957 /* Pattern detected. */
958 if (dump_enabled_p ())
959 dump_printf_loc (MSG_NOTE
, vect_location
,
960 "vect_recog_widen_mult_pattern: detected:\n");
962 /* Check target support */
963 vectype
= get_vectype_for_scalar_type (half_type0
);
964 vecitype
= get_vectype_for_scalar_type (itype
);
967 || !supportable_widening_operation (WIDEN_MULT_EXPR
, last_stmt
,
969 &dummy_code
, &dummy_code
,
970 &dummy_int
, &dummy_vec
))
974 *type_out
= get_vectype_for_scalar_type (type
);
976 /* Pattern supported. Create a stmt to be used to replace the pattern: */
977 var
= vect_recog_temp_ssa_var (itype
, NULL
);
978 pattern_stmt
= gimple_build_assign (var
, WIDEN_MULT_EXPR
, oprnd0
, oprnd1
);
980 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
981 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
982 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
983 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
985 /* If the original two operands have different sizes, we may need to convert
986 the smaller one into the larget type. If this is the case, at this point
987 the new stmt is already built. */
990 append_pattern_def_seq (stmt_vinfo
, new_stmt
);
991 stmt_vec_info new_stmt_info
992 = new_stmt_vec_info (new_stmt
, loop_vinfo
, bb_vinfo
);
993 set_vinfo_for_stmt (new_stmt
, new_stmt_info
);
994 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
997 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
998 the result of the widen-mult operation into type TYPE. */
1001 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
1002 stmt_vec_info pattern_stmt_info
1003 = new_stmt_vec_info (pattern_stmt
, loop_vinfo
, bb_vinfo
);
1004 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
1005 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vecitype
;
1006 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
1008 gimple_assign_lhs (pattern_stmt
));
1011 if (dump_enabled_p ())
1012 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1014 stmts
->safe_push (last_stmt
);
1015 return pattern_stmt
;
1019 /* Function vect_recog_pow_pattern
1021 Try to find the following pattern:
1025 with POW being one of pow, powf, powi, powif and N being
1030 * LAST_STMT: A stmt from which the pattern search begins.
1034 * TYPE_IN: The type of the input arguments to the pattern.
1036 * TYPE_OUT: The type of the output of this pattern.
1038 * Return value: A new stmt that will be used to replace the sequence of
1039 stmts that constitute the pattern. In this case it will be:
1046 vect_recog_pow_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
1049 gimple
*last_stmt
= (*stmts
)[0];
1050 tree fn
, base
, exp
= NULL
;
1054 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
1057 fn
= gimple_call_fndecl (last_stmt
);
1058 if (fn
== NULL_TREE
|| DECL_BUILT_IN_CLASS (fn
) != BUILT_IN_NORMAL
)
1061 switch (DECL_FUNCTION_CODE (fn
))
1063 case BUILT_IN_POWIF
:
1067 base
= gimple_call_arg (last_stmt
, 0);
1068 exp
= gimple_call_arg (last_stmt
, 1);
1069 if (TREE_CODE (exp
) != REAL_CST
1070 && TREE_CODE (exp
) != INTEGER_CST
)
1078 /* We now have a pow or powi builtin function call with a constant
1081 *type_out
= NULL_TREE
;
1083 /* Catch squaring. */
1084 if ((tree_fits_shwi_p (exp
)
1085 && tree_to_shwi (exp
) == 2)
1086 || (TREE_CODE (exp
) == REAL_CST
1087 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconst2
)))
1089 *type_in
= TREE_TYPE (base
);
1091 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1092 stmt
= gimple_build_assign (var
, MULT_EXPR
, base
, base
);
1096 /* Catch square root. */
1097 if (TREE_CODE (exp
) == REAL_CST
1098 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconsthalf
))
1100 tree newfn
= mathfn_built_in (TREE_TYPE (base
), BUILT_IN_SQRT
);
1101 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (base
));
1104 gcall
*stmt
= gimple_build_call (newfn
, 1, base
);
1105 if (vectorizable_function (stmt
, *type_in
, *type_in
)
1108 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
1109 gimple_call_set_lhs (stmt
, var
);
1119 /* Function vect_recog_widen_sum_pattern
1121 Try to find the following pattern:
1124 TYPE x_T, sum = init;
1126 sum_0 = phi <init, sum_1>
1128 S2 x_T = (TYPE) x_t;
1129 S3 sum_1 = x_T + sum_0;
1131 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1132 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1133 a special case of a reduction computation.
1137 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1138 when this function is called with S3, the pattern {S2,S3} will be detected.
1142 * TYPE_IN: The type of the input arguments to the pattern.
1144 * TYPE_OUT: The type of the output of this pattern.
1146 * Return value: A new stmt that will be used to replace the sequence of
1147 stmts that constitute the pattern. In this case it will be:
1148 WIDEN_SUM <x_t, sum_0>
1150 Note: The widening-sum idiom is a widening reduction pattern that is
1151 vectorized without preserving all the intermediate results. It
1152 produces only N/2 (widened) results (by summing up pairs of
1153 intermediate results) rather than all N results. Therefore, we
1154 cannot allow this pattern when we want to get all the results and in
1155 the correct order (as is the case when this computation is in an
1156 inner-loop nested in an outer-loop that us being vectorized). */
1159 vect_recog_widen_sum_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
1162 gimple
*stmt
, *last_stmt
= (*stmts
)[0];
1163 tree oprnd0
, oprnd1
;
1164 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1165 tree type
, half_type
;
1166 gimple
*pattern_stmt
;
1167 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1175 loop
= LOOP_VINFO_LOOP (loop_info
);
1177 /* We don't allow changing the order of the computation in the inner-loop
1178 when doing outer-loop vectorization. */
1179 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
1182 if (!is_gimple_assign (last_stmt
))
1185 type
= gimple_expr_type (last_stmt
);
1187 /* Look for the following pattern
1190 In which DX is at least double the size of X, and sum_1 has been
1191 recognized as a reduction variable.
1194 /* Starting from LAST_STMT, follow the defs of its uses in search
1195 of the above pattern. */
1197 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
1200 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1201 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1202 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
1203 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
1206 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1207 we know that oprnd1 is the reduction variable (defined by a loop-header
1208 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1209 Left to check that oprnd0 is defined by a cast from type 'type' to type
1212 if (!type_conversion_p (oprnd0
, last_stmt
, true, &half_type
, &stmt
,
1217 oprnd0
= gimple_assign_rhs1 (stmt
);
1218 *type_in
= half_type
;
1221 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1222 var
= vect_recog_temp_ssa_var (type
, NULL
);
1223 pattern_stmt
= gimple_build_assign (var
, WIDEN_SUM_EXPR
, oprnd0
, oprnd1
);
1225 if (dump_enabled_p ())
1227 dump_printf_loc (MSG_NOTE
, vect_location
,
1228 "vect_recog_widen_sum_pattern: detected: ");
1229 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1230 dump_printf (MSG_NOTE
, "\n");
1233 return pattern_stmt
;
1237 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1240 STMT - a statement to check.
1241 DEF - we support operations with two operands, one of which is constant.
1242 The other operand can be defined by a demotion operation, or by a
1243 previous statement in a sequence of over-promoted operations. In the
1244 later case DEF is used to replace that operand. (It is defined by a
1245 pattern statement we created for the previous statement in the
1249 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1250 NULL, it's the type of DEF.
1251 STMTS - additional pattern statements. If a pattern statement (type
1252 conversion) is created in this function, its original statement is
1256 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1257 operands to use in the new pattern statement for STMT (will be created
1258 in vect_recog_over_widening_pattern ()).
1259 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1260 statements for STMT: the first one is a type promotion and the second
1261 one is the operation itself. We return the type promotion statement
1262 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1263 the second pattern statement. */
1266 vect_operation_fits_smaller_type (gimple
*stmt
, tree def
, tree
*new_type
,
1267 tree
*op0
, tree
*op1
, gimple
**new_def_stmt
,
1268 vec
<gimple
*> *stmts
)
1270 enum tree_code code
;
1271 tree const_oprnd
, oprnd
;
1272 tree interm_type
= NULL_TREE
, half_type
, new_oprnd
, type
;
1273 gimple
*def_stmt
, *new_stmt
;
1279 *new_def_stmt
= NULL
;
1281 if (!is_gimple_assign (stmt
))
1284 code
= gimple_assign_rhs_code (stmt
);
1285 if (code
!= LSHIFT_EXPR
&& code
!= RSHIFT_EXPR
1286 && code
!= BIT_IOR_EXPR
&& code
!= BIT_XOR_EXPR
&& code
!= BIT_AND_EXPR
)
1289 oprnd
= gimple_assign_rhs1 (stmt
);
1290 const_oprnd
= gimple_assign_rhs2 (stmt
);
1291 type
= gimple_expr_type (stmt
);
1293 if (TREE_CODE (oprnd
) != SSA_NAME
1294 || TREE_CODE (const_oprnd
) != INTEGER_CST
)
1297 /* If oprnd has other uses besides that in stmt we cannot mark it
1298 as being part of a pattern only. */
1299 if (!has_single_use (oprnd
))
1302 /* If we are in the middle of a sequence, we use DEF from a previous
1303 statement. Otherwise, OPRND has to be a result of type promotion. */
1306 half_type
= *new_type
;
1312 if (!type_conversion_p (oprnd
, stmt
, false, &half_type
, &def_stmt
,
1315 || !vect_same_loop_or_bb_p (stmt
, def_stmt
))
1319 /* Can we perform the operation on a smaller type? */
1325 if (!int_fits_type_p (const_oprnd
, half_type
))
1327 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1328 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1331 interm_type
= build_nonstandard_integer_type (
1332 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1333 if (!int_fits_type_p (const_oprnd
, interm_type
))
1340 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1341 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1344 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1345 (e.g., if the original value was char, the shift amount is at most 8
1346 if we want to use short). */
1347 if (compare_tree_int (const_oprnd
, TYPE_PRECISION (half_type
)) == 1)
1350 interm_type
= build_nonstandard_integer_type (
1351 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1353 if (!vect_supportable_shift (code
, interm_type
))
1359 if (vect_supportable_shift (code
, half_type
))
1362 /* Try intermediate type - HALF_TYPE is not supported. */
1363 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1366 interm_type
= build_nonstandard_integer_type (
1367 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1369 if (!vect_supportable_shift (code
, interm_type
))
1378 /* There are four possible cases:
1379 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1380 the first statement in the sequence)
1381 a. The original, HALF_TYPE, is not enough - we replace the promotion
1382 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1383 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1385 2. OPRND is defined by a pattern statement we created.
1386 a. Its type is not sufficient for the operation, we create a new stmt:
1387 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1388 this statement in NEW_DEF_STMT, and it is later put in
1389 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1390 b. OPRND is good to use in the new statement. */
1395 /* Replace the original type conversion HALF_TYPE->TYPE with
1396 HALF_TYPE->INTERM_TYPE. */
1397 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
1399 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
1400 /* Check if the already created pattern stmt is what we need. */
1401 if (!is_gimple_assign (new_stmt
)
1402 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt
))
1403 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != interm_type
)
1406 stmts
->safe_push (def_stmt
);
1407 oprnd
= gimple_assign_lhs (new_stmt
);
1411 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1412 oprnd
= gimple_assign_rhs1 (def_stmt
);
1413 new_oprnd
= make_ssa_name (interm_type
);
1414 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, oprnd
);
1415 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
1416 stmts
->safe_push (def_stmt
);
1422 /* Retrieve the operand before the type promotion. */
1423 oprnd
= gimple_assign_rhs1 (def_stmt
);
1430 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1431 new_oprnd
= make_ssa_name (interm_type
);
1432 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, oprnd
);
1434 *new_def_stmt
= new_stmt
;
1437 /* Otherwise, OPRND is already set. */
1441 *new_type
= interm_type
;
1443 *new_type
= half_type
;
1446 *op1
= fold_convert (*new_type
, const_oprnd
);
1452 /* Try to find a statement or a sequence of statements that can be performed
1456 TYPE x_T, res0_T, res1_T;
1459 S2 x_T = (TYPE) x_t;
1460 S3 res0_T = op (x_T, C0);
1461 S4 res1_T = op (res0_T, C1);
1462 S5 ... = () res1_T; - type demotion
1464 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1466 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1467 be 'type' or some intermediate type. For now, we expect S5 to be a type
1468 demotion operation. We also check that S3 and S4 have only one use. */
1471 vect_recog_over_widening_pattern (vec
<gimple
*> *stmts
,
1472 tree
*type_in
, tree
*type_out
)
1474 gimple
*stmt
= stmts
->pop ();
1475 gimple
*pattern_stmt
= NULL
, *new_def_stmt
, *prev_stmt
= NULL
,
1477 tree op0
, op1
, vectype
= NULL_TREE
, use_lhs
, use_type
;
1478 tree var
= NULL_TREE
, new_type
= NULL_TREE
, new_oprnd
;
1485 if (!vinfo_for_stmt (stmt
)
1486 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt
)))
1489 new_def_stmt
= NULL
;
1490 if (!vect_operation_fits_smaller_type (stmt
, var
, &new_type
,
1491 &op0
, &op1
, &new_def_stmt
,
1500 /* STMT can be performed on a smaller type. Check its uses. */
1501 use_stmt
= vect_single_imm_use (stmt
);
1502 if (!use_stmt
|| !is_gimple_assign (use_stmt
))
1505 /* Create pattern statement for STMT. */
1506 vectype
= get_vectype_for_scalar_type (new_type
);
1510 /* We want to collect all the statements for which we create pattern
1511 statetments, except for the case when the last statement in the
1512 sequence doesn't have a corresponding pattern statement. In such
1513 case we associate the last pattern statement with the last statement
1514 in the sequence. Therefore, we only add the original statement to
1515 the list if we know that it is not the last. */
1517 stmts
->safe_push (prev_stmt
);
1519 var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1521 = gimple_build_assign (var
, gimple_assign_rhs_code (stmt
), op0
, op1
);
1522 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
1523 new_pattern_def_seq (vinfo_for_stmt (stmt
), new_def_stmt
);
1525 if (dump_enabled_p ())
1527 dump_printf_loc (MSG_NOTE
, vect_location
,
1528 "created pattern stmt: ");
1529 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1530 dump_printf (MSG_NOTE
, "\n");
1533 type
= gimple_expr_type (stmt
);
1540 /* We got a sequence. We expect it to end with a type demotion operation.
1541 Otherwise, we quit (for now). There are three possible cases: the
1542 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1543 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1544 NEW_TYPE differs (we create a new conversion statement). */
1545 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
1547 use_lhs
= gimple_assign_lhs (use_stmt
);
1548 use_type
= TREE_TYPE (use_lhs
);
1549 /* Support only type demotion or signedess change. */
1550 if (!INTEGRAL_TYPE_P (use_type
)
1551 || TYPE_PRECISION (type
) <= TYPE_PRECISION (use_type
))
1554 /* Check that NEW_TYPE is not bigger than the conversion result. */
1555 if (TYPE_PRECISION (new_type
) > TYPE_PRECISION (use_type
))
1558 if (TYPE_UNSIGNED (new_type
) != TYPE_UNSIGNED (use_type
)
1559 || TYPE_PRECISION (new_type
) != TYPE_PRECISION (use_type
))
1561 /* Create NEW_TYPE->USE_TYPE conversion. */
1562 new_oprnd
= make_ssa_name (use_type
);
1563 pattern_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, var
);
1564 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt
)) = pattern_stmt
;
1566 *type_in
= get_vectype_for_scalar_type (new_type
);
1567 *type_out
= get_vectype_for_scalar_type (use_type
);
1569 /* We created a pattern statement for the last statement in the
1570 sequence, so we don't need to associate it with the pattern
1571 statement created for PREV_STMT. Therefore, we add PREV_STMT
1572 to the list in order to mark it later in vect_pattern_recog_1. */
1574 stmts
->safe_push (prev_stmt
);
1579 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt
))
1580 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt
));
1583 *type_out
= NULL_TREE
;
1586 stmts
->safe_push (use_stmt
);
1589 /* TODO: support general case, create a conversion to the correct type. */
1592 /* Pattern detected. */
1593 if (dump_enabled_p ())
1595 dump_printf_loc (MSG_NOTE
, vect_location
,
1596 "vect_recog_over_widening_pattern: detected: ");
1597 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1598 dump_printf (MSG_NOTE
, "\n");
1601 return pattern_stmt
;
1604 /* Detect widening shift pattern:
1610 S2 a_T = (TYPE) a_t;
1611 S3 res_T = a_T << CONST;
1613 where type 'TYPE' is at least double the size of type 'type'.
1615 Also detect cases where the shift result is immediately converted
1616 to another type 'result_type' that is no larger in size than 'TYPE'.
1617 In those cases we perform a widen-shift that directly results in
1618 'result_type', to avoid a possible over-widening situation:
1622 result_type res_result;
1625 S2 a_T = (TYPE) a_t;
1626 S3 res_T = a_T << CONST;
1627 S4 res_result = (result_type) res_T;
1628 '--> res_result' = a_t w<< CONST;
1630 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1631 create an additional pattern stmt for S2 to create a variable of an
1632 intermediate type, and perform widen-shift on the intermediate type:
1636 TYPE a_T, res_T, res_T';
1639 S2 a_T = (TYPE) a_t;
1640 '--> a_it = (interm_type) a_t;
1641 S3 res_T = a_T << CONST;
1642 '--> res_T' = a_it <<* CONST;
1646 * STMTS: Contains a stmt from which the pattern search begins.
1647 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1648 in STMTS. When an intermediate type is used and a pattern statement is
1649 created for S2, we also put S2 here (before S3).
1653 * TYPE_IN: The type of the input arguments to the pattern.
1655 * TYPE_OUT: The type of the output of this pattern.
1657 * Return value: A new stmt that will be used to replace the sequence of
1658 stmts that constitute the pattern. In this case it will be:
1659 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1662 vect_recog_widen_shift_pattern (vec
<gimple
*> *stmts
,
1663 tree
*type_in
, tree
*type_out
)
1665 gimple
*last_stmt
= stmts
->pop ();
1667 tree oprnd0
, oprnd1
;
1668 tree type
, half_type0
;
1669 gimple
*pattern_stmt
;
1670 tree vectype
, vectype_out
= NULL_TREE
;
1672 enum tree_code dummy_code
;
1674 vec
<tree
> dummy_vec
;
1678 if (!is_gimple_assign (last_stmt
) || !vinfo_for_stmt (last_stmt
))
1681 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt
)))
1684 if (gimple_assign_rhs_code (last_stmt
) != LSHIFT_EXPR
)
1687 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1688 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1689 if (TREE_CODE (oprnd0
) != SSA_NAME
|| TREE_CODE (oprnd1
) != INTEGER_CST
)
1692 /* Check operand 0: it has to be defined by a type promotion. */
1693 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
1698 /* Check operand 1: has to be positive. We check that it fits the type
1699 in vect_handle_widen_op_by_const (). */
1700 if (tree_int_cst_compare (oprnd1
, size_zero_node
) <= 0)
1703 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
1704 type
= gimple_expr_type (last_stmt
);
1706 /* Check for subsequent conversion to another type. */
1707 use_stmt
= vect_single_imm_use (last_stmt
);
1708 if (use_stmt
&& is_gimple_assign (use_stmt
)
1709 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
))
1710 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
1712 tree use_lhs
= gimple_assign_lhs (use_stmt
);
1713 tree use_type
= TREE_TYPE (use_lhs
);
1715 if (INTEGRAL_TYPE_P (use_type
)
1716 && TYPE_PRECISION (use_type
) <= TYPE_PRECISION (type
))
1718 last_stmt
= use_stmt
;
1723 /* Check if this a widening operation. */
1724 gimple
*wstmt
= NULL
;
1725 if (!vect_handle_widen_op_by_const (last_stmt
, LSHIFT_EXPR
, oprnd1
,
1727 type
, &half_type0
, def_stmt0
))
1730 /* Pattern detected. */
1731 if (dump_enabled_p ())
1732 dump_printf_loc (MSG_NOTE
, vect_location
,
1733 "vect_recog_widen_shift_pattern: detected:\n");
1735 /* Check target support. */
1736 vectype
= get_vectype_for_scalar_type (half_type0
);
1737 vectype_out
= get_vectype_for_scalar_type (type
);
1741 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR
, last_stmt
,
1742 vectype_out
, vectype
,
1743 &dummy_code
, &dummy_code
,
1744 &dummy_int
, &dummy_vec
))
1748 *type_out
= vectype_out
;
1750 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1751 var
= vect_recog_temp_ssa_var (type
, NULL
);
1753 gimple_build_assign (var
, WIDEN_LSHIFT_EXPR
, oprnd0
, oprnd1
);
1756 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1757 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1758 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1759 new_pattern_def_seq (stmt_vinfo
, wstmt
);
1760 stmt_vec_info new_stmt_info
1761 = new_stmt_vec_info (wstmt
, loop_vinfo
, bb_vinfo
);
1762 set_vinfo_for_stmt (wstmt
, new_stmt_info
);
1763 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
1766 if (dump_enabled_p ())
1767 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1769 stmts
->safe_push (last_stmt
);
1770 return pattern_stmt
;
1773 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1777 S0 a_t = b_t r<< c_t;
1781 * STMTS: Contains a stmt from which the pattern search begins,
1782 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1786 S2 e_t = d_t & (B - 1);
1787 S3 f_t = b_t << c_t;
1788 S4 g_t = b_t >> e_t;
1791 where B is element bitsize of type.
1795 * TYPE_IN: The type of the input arguments to the pattern.
1797 * TYPE_OUT: The type of the output of this pattern.
1799 * Return value: A new stmt that will be used to replace the rotate
1803 vect_recog_rotate_pattern (vec
<gimple
*> *stmts
, tree
*type_in
, tree
*type_out
)
1805 gimple
*last_stmt
= stmts
->pop ();
1806 tree oprnd0
, oprnd1
, lhs
, var
, var1
, var2
, vectype
, type
, stype
, def
, def2
;
1807 gimple
*pattern_stmt
, *def_stmt
;
1808 enum tree_code rhs_code
;
1809 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1810 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1811 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1812 enum vect_def_type dt
;
1813 optab optab1
, optab2
;
1814 edge ext_def
= NULL
;
1816 if (!is_gimple_assign (last_stmt
))
1819 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1829 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
1832 lhs
= gimple_assign_lhs (last_stmt
);
1833 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1834 type
= TREE_TYPE (oprnd0
);
1835 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1836 if (TREE_CODE (oprnd0
) != SSA_NAME
1837 || TYPE_PRECISION (TREE_TYPE (lhs
)) != TYPE_PRECISION (type
)
1838 || !INTEGRAL_TYPE_P (type
)
1839 || !TYPE_UNSIGNED (type
))
1842 if (!vect_is_simple_use (oprnd1
, last_stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
1846 if (dt
!= vect_internal_def
1847 && dt
!= vect_constant_def
1848 && dt
!= vect_external_def
)
1851 vectype
= get_vectype_for_scalar_type (type
);
1852 if (vectype
== NULL_TREE
)
1855 /* If vector/vector or vector/scalar rotate is supported by the target,
1856 don't do anything here. */
1857 optab1
= optab_for_tree_code (rhs_code
, vectype
, optab_vector
);
1859 && optab_handler (optab1
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1862 if (bb_vinfo
!= NULL
|| dt
!= vect_internal_def
)
1864 optab2
= optab_for_tree_code (rhs_code
, vectype
, optab_scalar
);
1866 && optab_handler (optab2
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1870 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1871 don't do anything here either. */
1872 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_vector
);
1873 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_vector
);
1875 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1877 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1879 if (bb_vinfo
== NULL
&& dt
== vect_internal_def
)
1881 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_scalar
);
1882 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_scalar
);
1884 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1886 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1891 *type_out
= vectype
;
1892 if (*type_in
== NULL_TREE
)
1895 if (dt
== vect_external_def
1896 && TREE_CODE (oprnd1
) == SSA_NAME
1899 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1900 ext_def
= loop_preheader_edge (loop
);
1901 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1
))
1903 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (oprnd1
));
1905 || !dominated_by_p (CDI_DOMINATORS
, ext_def
->dest
, bb
))
1911 if (TREE_CODE (oprnd1
) == INTEGER_CST
1912 || TYPE_MODE (TREE_TYPE (oprnd1
)) == TYPE_MODE (type
))
1914 else if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
1916 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
1917 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (type
)
1918 && TYPE_PRECISION (TREE_TYPE (rhs1
))
1919 == TYPE_PRECISION (type
))
1923 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
1924 if (def
== NULL_TREE
)
1926 def
= vect_recog_temp_ssa_var (type
, NULL
);
1927 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
1931 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1932 gcc_assert (!new_bb
);
1935 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1937 stype
= TREE_TYPE (def
);
1939 if (TREE_CODE (def
) == INTEGER_CST
)
1941 if (!tree_fits_uhwi_p (def
)
1942 || tree_to_uhwi (def
) >= GET_MODE_PRECISION (TYPE_MODE (type
))
1943 || integer_zerop (def
))
1945 def2
= build_int_cst (stype
,
1946 GET_MODE_PRECISION (TYPE_MODE (type
))
1947 - tree_to_uhwi (def
));
1951 tree vecstype
= get_vectype_for_scalar_type (stype
);
1952 stmt_vec_info def_stmt_vinfo
;
1954 if (vecstype
== NULL_TREE
)
1956 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1957 def_stmt
= gimple_build_assign (def2
, NEGATE_EXPR
, def
);
1961 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1962 gcc_assert (!new_bb
);
1966 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
1967 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1968 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
1969 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1972 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1974 = build_int_cst (stype
, GET_MODE_PRECISION (TYPE_MODE (stype
)) - 1);
1975 def_stmt
= gimple_build_assign (def2
, BIT_AND_EXPR
,
1976 gimple_assign_lhs (def_stmt
), mask
);
1980 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1981 gcc_assert (!new_bb
);
1985 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
1986 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1987 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
1988 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1992 var1
= vect_recog_temp_ssa_var (type
, NULL
);
1993 def_stmt
= gimple_build_assign (var1
, rhs_code
== LROTATE_EXPR
1994 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
1996 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1998 var2
= vect_recog_temp_ssa_var (type
, NULL
);
1999 def_stmt
= gimple_build_assign (var2
, rhs_code
== LROTATE_EXPR
2000 ? RSHIFT_EXPR
: LSHIFT_EXPR
,
2002 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2004 /* Pattern detected. */
2005 if (dump_enabled_p ())
2006 dump_printf_loc (MSG_NOTE
, vect_location
,
2007 "vect_recog_rotate_pattern: detected:\n");
2009 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2010 var
= vect_recog_temp_ssa_var (type
, NULL
);
2011 pattern_stmt
= gimple_build_assign (var
, BIT_IOR_EXPR
, var1
, var2
);
2013 if (dump_enabled_p ())
2014 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
2016 stmts
->safe_push (last_stmt
);
2017 return pattern_stmt
;
2020 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2028 S3 res_T = b_T op a_t;
2030 where type 'TYPE' is a type with different size than 'type',
2031 and op is <<, >> or rotate.
2036 TYPE b_T, c_T, res_T;
2039 S1 a_t = (type) c_T;
2041 S3 res_T = b_T op a_t;
2045 * STMTS: Contains a stmt from which the pattern search begins,
2046 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2047 with a shift/rotate which has same type on both operands, in the
2048 second case just b_T op c_T, in the first case with added cast
2049 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2053 * TYPE_IN: The type of the input arguments to the pattern.
2055 * TYPE_OUT: The type of the output of this pattern.
2057 * Return value: A new stmt that will be used to replace the shift/rotate
2061 vect_recog_vector_vector_shift_pattern (vec
<gimple
*> *stmts
,
2062 tree
*type_in
, tree
*type_out
)
2064 gimple
*last_stmt
= stmts
->pop ();
2065 tree oprnd0
, oprnd1
, lhs
, var
;
2066 gimple
*pattern_stmt
, *def_stmt
;
2067 enum tree_code rhs_code
;
2068 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2069 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2070 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2071 enum vect_def_type dt
;
2074 if (!is_gimple_assign (last_stmt
))
2077 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2089 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2092 lhs
= gimple_assign_lhs (last_stmt
);
2093 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2094 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2095 if (TREE_CODE (oprnd0
) != SSA_NAME
2096 || TREE_CODE (oprnd1
) != SSA_NAME
2097 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
2098 || TYPE_PRECISION (TREE_TYPE (oprnd1
))
2099 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1
)))
2100 || TYPE_PRECISION (TREE_TYPE (lhs
))
2101 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2104 if (!vect_is_simple_use (oprnd1
, last_stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
2108 if (dt
!= vect_internal_def
)
2111 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (oprnd0
));
2112 *type_out
= *type_in
;
2113 if (*type_in
== NULL_TREE
)
2117 if (gimple_assign_cast_p (def_stmt
))
2119 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
2120 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
2121 && TYPE_PRECISION (TREE_TYPE (rhs1
))
2122 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2126 if (def
== NULL_TREE
)
2128 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2129 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
2130 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2133 /* Pattern detected. */
2134 if (dump_enabled_p ())
2135 dump_printf_loc (MSG_NOTE
, vect_location
,
2136 "vect_recog_vector_vector_shift_pattern: detected:\n");
2138 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2139 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2140 pattern_stmt
= gimple_build_assign (var
, rhs_code
, oprnd0
, def
);
2142 if (dump_enabled_p ())
2143 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
2145 stmts
->safe_push (last_stmt
);
2146 return pattern_stmt
;
2149 /* Detect multiplication by constant which are postive or negatives of power 2,
2150 and convert them to shift patterns.
2152 Mult with constants that are postive power of two.
2159 Mult with constants that are negative power of two.
2164 STMTS: Contains a stmt from which the pattern search begins,
2165 i.e. the mult stmt. Convert the mult operation to LSHIFT if
2166 constant operand is a power of 2.
2168 S1': b_t = a_t << log2 (n)
2170 Convert the mult operation to LSHIFT and followed by a NEGATE
2171 if constant operand is a negative power of 2.
2172 type a_t, b_t, res_T;
2173 S2': b_t = a_t << log2 (n)
2174 S3': res_T = - (b_t)
2178 * TYPE_IN: The type of the input arguments to the pattern.
2180 * TYPE_OUT: The type of the output of this pattern.
2182 * Return value: A new stmt that will be used to replace the multiplication
2186 vect_recog_mult_pattern (vec
<gimple
*> *stmts
,
2187 tree
*type_in
, tree
*type_out
)
2189 gimple
*last_stmt
= stmts
->pop ();
2190 tree oprnd0
, oprnd1
, vectype
, itype
;
2191 gimple
*pattern_stmt
, *def_stmt
;
2193 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2194 int power2_val
, power2_neg_val
;
2197 if (!is_gimple_assign (last_stmt
))
2200 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
2203 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2204 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2205 itype
= TREE_TYPE (oprnd0
);
2207 if (TREE_CODE (oprnd0
) != SSA_NAME
2208 || TREE_CODE (oprnd1
) != INTEGER_CST
2209 || !INTEGRAL_TYPE_P (itype
)
2210 || TYPE_PRECISION (itype
) != GET_MODE_PRECISION (TYPE_MODE (itype
)))
2213 vectype
= get_vectype_for_scalar_type (itype
);
2214 if (vectype
== NULL_TREE
)
2217 /* If the target can handle vectorized multiplication natively,
2218 don't attempt to optimize this. */
2219 optab
= optab_for_tree_code (MULT_EXPR
, vectype
, optab_default
);
2220 if (optab
!= unknown_optab
)
2222 machine_mode vec_mode
= TYPE_MODE (vectype
);
2223 int icode
= (int) optab_handler (optab
, vec_mode
);
2224 if (icode
!= CODE_FOR_nothing
)
2228 /* If target cannot handle vector left shift then we cannot
2229 optimize and bail out. */
2230 optab
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_vector
);
2232 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
2235 power2_val
= wi::exact_log2 (oprnd1
);
2236 power2_neg_val
= wi::exact_log2 (wi::neg (oprnd1
));
2238 /* Handle constant operands that are postive or negative powers of 2. */
2239 if (power2_val
!= -1)
2241 shift
= build_int_cst (itype
, power2_val
);
2243 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2244 LSHIFT_EXPR
, oprnd0
, shift
);
2246 else if (power2_neg_val
!= -1)
2248 /* If the target cannot handle vector NEGATE then we cannot
2249 do the optimization. */
2250 optab
= optab_for_tree_code (NEGATE_EXPR
, vectype
, optab_vector
);
2252 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
2255 shift
= build_int_cst (itype
, power2_neg_val
);
2257 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2258 LSHIFT_EXPR
, oprnd0
, shift
);
2259 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2261 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2262 NEGATE_EXPR
, gimple_assign_lhs (def_stmt
));
2267 /* Pattern detected. */
2268 if (dump_enabled_p ())
2269 dump_printf_loc (MSG_NOTE
, vect_location
,
2270 "vect_recog_mult_pattern: detected:\n");
2272 if (dump_enabled_p ())
2273 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
,
2276 stmts
->safe_push (last_stmt
);
2278 *type_out
= vectype
;
2280 return pattern_stmt
;
2283 /* Detect a signed division by a constant that wouldn't be
2284 otherwise vectorized:
2290 where type 'type' is an integral type and N is a constant.
2292 Similarly handle modulo by a constant:
2298 * STMTS: Contains a stmt from which the pattern search begins,
2299 i.e. the division stmt. S1 is replaced by if N is a power
2300 of two constant and type is signed:
2301 S3 y_t = b_t < 0 ? N - 1 : 0;
2303 S1' a_t = x_t >> log2 (N);
2305 S4 is replaced if N is a power of two constant and
2306 type is signed by (where *_T temporaries have unsigned type):
2307 S9 y_T = b_t < 0 ? -1U : 0U;
2308 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2309 S7 z_t = (type) z_T;
2311 S5 x_t = w_t & (N - 1);
2312 S4' a_t = x_t - z_t;
2316 * TYPE_IN: The type of the input arguments to the pattern.
2318 * TYPE_OUT: The type of the output of this pattern.
2320 * Return value: A new stmt that will be used to replace the division
2321 S1 or modulo S4 stmt. */
2324 vect_recog_divmod_pattern (vec
<gimple
*> *stmts
,
2325 tree
*type_in
, tree
*type_out
)
2327 gimple
*last_stmt
= stmts
->pop ();
2328 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
2329 gimple
*pattern_stmt
, *def_stmt
;
2330 enum tree_code rhs_code
;
2331 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2332 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2333 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2336 int dummy_int
, prec
;
2337 stmt_vec_info def_stmt_vinfo
;
2339 if (!is_gimple_assign (last_stmt
))
2342 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2345 case TRUNC_DIV_EXPR
:
2346 case TRUNC_MOD_EXPR
:
2352 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2355 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2356 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2357 itype
= TREE_TYPE (oprnd0
);
2358 if (TREE_CODE (oprnd0
) != SSA_NAME
2359 || TREE_CODE (oprnd1
) != INTEGER_CST
2360 || TREE_CODE (itype
) != INTEGER_TYPE
2361 || TYPE_PRECISION (itype
) != GET_MODE_PRECISION (TYPE_MODE (itype
)))
2364 vectype
= get_vectype_for_scalar_type (itype
);
2365 if (vectype
== NULL_TREE
)
2368 /* If the target can handle vectorized division or modulo natively,
2369 don't attempt to optimize this. */
2370 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
2371 if (optab
!= unknown_optab
)
2373 machine_mode vec_mode
= TYPE_MODE (vectype
);
2374 int icode
= (int) optab_handler (optab
, vec_mode
);
2375 if (icode
!= CODE_FOR_nothing
)
2379 prec
= TYPE_PRECISION (itype
);
2380 if (integer_pow2p (oprnd1
))
2382 if (TYPE_UNSIGNED (itype
) || tree_int_cst_sgn (oprnd1
) != 1)
2385 /* Pattern detected. */
2386 if (dump_enabled_p ())
2387 dump_printf_loc (MSG_NOTE
, vect_location
,
2388 "vect_recog_divmod_pattern: detected:\n");
2390 cond
= build2 (LT_EXPR
, boolean_type_node
, oprnd0
,
2391 build_int_cst (itype
, 0));
2392 if (rhs_code
== TRUNC_DIV_EXPR
)
2394 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
2397 = gimple_build_assign (var
, COND_EXPR
, cond
,
2398 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2399 build_int_cst (itype
, 1)),
2400 build_int_cst (itype
, 0));
2401 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2402 var
= vect_recog_temp_ssa_var (itype
, NULL
);
2404 = gimple_build_assign (var
, PLUS_EXPR
, oprnd0
,
2405 gimple_assign_lhs (def_stmt
));
2406 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2408 shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
2410 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2411 RSHIFT_EXPR
, var
, shift
);
2416 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2417 if (compare_tree_int (oprnd1
, 2) == 0)
2419 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2420 def_stmt
= gimple_build_assign (signmask
, COND_EXPR
, cond
,
2421 build_int_cst (itype
, 1),
2422 build_int_cst (itype
, 0));
2423 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2428 = build_nonstandard_integer_type (prec
, 1);
2429 tree vecutype
= get_vectype_for_scalar_type (utype
);
2431 = build_int_cst (utype
, GET_MODE_BITSIZE (TYPE_MODE (itype
))
2432 - tree_log2 (oprnd1
));
2433 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
2435 def_stmt
= gimple_build_assign (var
, COND_EXPR
, cond
,
2436 build_int_cst (utype
, -1),
2437 build_int_cst (utype
, 0));
2439 = new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2440 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2441 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2442 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2443 var
= vect_recog_temp_ssa_var (utype
, NULL
);
2444 def_stmt
= gimple_build_assign (var
, RSHIFT_EXPR
,
2445 gimple_assign_lhs (def_stmt
),
2448 = new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2449 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2450 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2451 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2452 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2454 = gimple_build_assign (signmask
, NOP_EXPR
, var
);
2455 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2458 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2459 PLUS_EXPR
, oprnd0
, signmask
);
2460 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2462 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2463 BIT_AND_EXPR
, gimple_assign_lhs (def_stmt
),
2464 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2465 build_int_cst (itype
, 1)));
2466 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2469 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2470 MINUS_EXPR
, gimple_assign_lhs (def_stmt
),
2474 if (dump_enabled_p ())
2475 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
,
2478 stmts
->safe_push (last_stmt
);
2481 *type_out
= vectype
;
2482 return pattern_stmt
;
2485 if (prec
> HOST_BITS_PER_WIDE_INT
2486 || integer_zerop (oprnd1
))
2489 if (!can_mult_highpart_p (TYPE_MODE (vectype
), TYPE_UNSIGNED (itype
)))
2492 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2494 if (TYPE_UNSIGNED (itype
))
2496 unsigned HOST_WIDE_INT mh
, ml
;
2497 int pre_shift
, post_shift
;
2498 unsigned HOST_WIDE_INT d
= (TREE_INT_CST_LOW (oprnd1
)
2499 & GET_MODE_MASK (TYPE_MODE (itype
)));
2500 tree t1
, t2
, t3
, t4
;
2502 if (d
>= ((unsigned HOST_WIDE_INT
) 1 << (prec
- 1)))
2503 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2506 /* Find a suitable multiplier and right shift count
2507 instead of multiplying with D. */
2508 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
2510 /* If the suggested multiplier is more than SIZE bits, we can do better
2511 for even divisors, using an initial right shift. */
2512 if (mh
!= 0 && (d
& 1) == 0)
2514 pre_shift
= floor_log2 (d
& -d
);
2515 mh
= choose_multiplier (d
>> pre_shift
, prec
, prec
- pre_shift
,
2516 &ml
, &post_shift
, &dummy_int
);
2524 if (post_shift
- 1 >= prec
)
2527 /* t1 = oprnd0 h* ml;
2531 q = t4 >> (post_shift - 1); */
2532 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2533 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2534 build_int_cst (itype
, ml
));
2535 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2537 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2539 = gimple_build_assign (t2
, MINUS_EXPR
, oprnd0
, t1
);
2540 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2542 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2544 = gimple_build_assign (t3
, RSHIFT_EXPR
, t2
, integer_one_node
);
2545 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2547 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2549 = gimple_build_assign (t4
, PLUS_EXPR
, t1
, t3
);
2551 if (post_shift
!= 1)
2553 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2555 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2557 = gimple_build_assign (q
, RSHIFT_EXPR
, t4
,
2558 build_int_cst (itype
, post_shift
- 1));
2563 pattern_stmt
= def_stmt
;
2568 if (pre_shift
>= prec
|| post_shift
>= prec
)
2571 /* t1 = oprnd0 >> pre_shift;
2573 q = t2 >> post_shift; */
2576 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2578 = gimple_build_assign (t1
, RSHIFT_EXPR
, oprnd0
,
2579 build_int_cst (NULL
, pre_shift
));
2580 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2585 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2586 def_stmt
= gimple_build_assign (t2
, MULT_HIGHPART_EXPR
, t1
,
2587 build_int_cst (itype
, ml
));
2591 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2593 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2595 = gimple_build_assign (q
, RSHIFT_EXPR
, t2
,
2596 build_int_cst (itype
, post_shift
));
2601 pattern_stmt
= def_stmt
;
2606 unsigned HOST_WIDE_INT ml
;
2608 HOST_WIDE_INT d
= TREE_INT_CST_LOW (oprnd1
);
2609 unsigned HOST_WIDE_INT abs_d
;
2611 tree t1
, t2
, t3
, t4
;
2613 /* Give up for -1. */
2617 /* Since d might be INT_MIN, we have to cast to
2618 unsigned HOST_WIDE_INT before negating to avoid
2619 undefined signed overflow. */
2621 ? (unsigned HOST_WIDE_INT
) d
2622 : - (unsigned HOST_WIDE_INT
) d
);
2624 /* n rem d = n rem -d */
2625 if (rhs_code
== TRUNC_MOD_EXPR
&& d
< 0)
2628 oprnd1
= build_int_cst (itype
, abs_d
);
2630 else if (HOST_BITS_PER_WIDE_INT
>= prec
2631 && abs_d
== (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2632 /* This case is not handled correctly below. */
2635 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
, &post_shift
, &dummy_int
);
2636 if (ml
>= (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2639 ml
|= (~(unsigned HOST_WIDE_INT
) 0) << (prec
- 1);
2641 if (post_shift
>= prec
)
2644 /* t1 = oprnd0 h* ml; */
2645 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2646 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2647 build_int_cst (itype
, ml
));
2651 /* t2 = t1 + oprnd0; */
2652 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2653 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2654 def_stmt
= gimple_build_assign (t2
, PLUS_EXPR
, t1
, oprnd0
);
2661 /* t3 = t2 >> post_shift; */
2662 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2663 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2664 def_stmt
= gimple_build_assign (t3
, RSHIFT_EXPR
, t2
,
2665 build_int_cst (itype
, post_shift
));
2670 wide_int oprnd0_min
, oprnd0_max
;
2672 if (get_range_info (oprnd0
, &oprnd0_min
, &oprnd0_max
) == VR_RANGE
)
2674 if (!wi::neg_p (oprnd0_min
, TYPE_SIGN (itype
)))
2676 else if (wi::neg_p (oprnd0_max
, TYPE_SIGN (itype
)))
2680 if (msb
== 0 && d
>= 0)
2684 pattern_stmt
= def_stmt
;
2688 /* t4 = oprnd0 >> (prec - 1);
2689 or if we know from VRP that oprnd0 >= 0
2691 or if we know from VRP that oprnd0 < 0
2693 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2694 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2696 def_stmt
= gimple_build_assign (t4
, INTEGER_CST
,
2697 build_int_cst (itype
, msb
));
2699 def_stmt
= gimple_build_assign (t4
, RSHIFT_EXPR
, oprnd0
,
2700 build_int_cst (itype
, prec
- 1));
2701 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2703 /* q = t3 - t4; or q = t4 - t3; */
2704 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2705 pattern_stmt
= gimple_build_assign (q
, MINUS_EXPR
, d
< 0 ? t4
: t3
,
2710 if (rhs_code
== TRUNC_MOD_EXPR
)
2714 /* We divided. Now finish by:
2717 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2719 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2720 def_stmt
= gimple_build_assign (t1
, MULT_EXPR
, q
, oprnd1
);
2721 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2723 r
= vect_recog_temp_ssa_var (itype
, NULL
);
2724 pattern_stmt
= gimple_build_assign (r
, MINUS_EXPR
, oprnd0
, t1
);
2727 /* Pattern detected. */
2728 if (dump_enabled_p ())
2730 dump_printf_loc (MSG_NOTE
, vect_location
,
2731 "vect_recog_divmod_pattern: detected: ");
2732 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
2733 dump_printf (MSG_NOTE
, "\n");
2736 stmts
->safe_push (last_stmt
);
2739 *type_out
= vectype
;
2740 return pattern_stmt
;
2743 /* Function vect_recog_mixed_size_cond_pattern
2745 Try to find the following pattern:
2750 S1 a_T = x_t CMP y_t ? b_T : c_T;
2752 where type 'TYPE' is an integral type which has different size
2753 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2754 than 'type', the constants need to fit into an integer type
2755 with the same width as 'type') or results of conversion from 'type'.
2759 * LAST_STMT: A stmt from which the pattern search begins.
2763 * TYPE_IN: The type of the input arguments to the pattern.
2765 * TYPE_OUT: The type of the output of this pattern.
2767 * Return value: A new stmt that will be used to replace the pattern.
2768 Additionally a def_stmt is added.
2770 a_it = x_t CMP y_t ? b_it : c_it;
2771 a_T = (TYPE) a_it; */
2774 vect_recog_mixed_size_cond_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
2777 gimple
*last_stmt
= (*stmts
)[0];
2778 tree cond_expr
, then_clause
, else_clause
;
2779 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
), def_stmt_info
;
2780 tree type
, vectype
, comp_vectype
, itype
= NULL_TREE
, vecitype
;
2781 gimple
*pattern_stmt
, *def_stmt
;
2782 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2783 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2784 tree orig_type0
= NULL_TREE
, orig_type1
= NULL_TREE
;
2785 gimple
*def_stmt0
= NULL
, *def_stmt1
= NULL
;
2787 tree comp_scalar_type
;
2789 if (!is_gimple_assign (last_stmt
)
2790 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
2791 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
2794 cond_expr
= gimple_assign_rhs1 (last_stmt
);
2795 then_clause
= gimple_assign_rhs2 (last_stmt
);
2796 else_clause
= gimple_assign_rhs3 (last_stmt
);
2798 if (!COMPARISON_CLASS_P (cond_expr
))
2801 comp_scalar_type
= TREE_TYPE (TREE_OPERAND (cond_expr
, 0));
2802 comp_vectype
= get_vectype_for_scalar_type (comp_scalar_type
);
2803 if (comp_vectype
== NULL_TREE
)
2806 type
= gimple_expr_type (last_stmt
);
2807 if (types_compatible_p (type
, comp_scalar_type
)
2808 || ((TREE_CODE (then_clause
) != INTEGER_CST
2809 || TREE_CODE (else_clause
) != INTEGER_CST
)
2810 && !INTEGRAL_TYPE_P (comp_scalar_type
))
2811 || !INTEGRAL_TYPE_P (type
))
2814 if ((TREE_CODE (then_clause
) != INTEGER_CST
2815 && !type_conversion_p (then_clause
, last_stmt
, false, &orig_type0
,
2816 &def_stmt0
, &promotion
))
2817 || (TREE_CODE (else_clause
) != INTEGER_CST
2818 && !type_conversion_p (else_clause
, last_stmt
, false, &orig_type1
,
2819 &def_stmt1
, &promotion
)))
2822 if (orig_type0
&& orig_type1
2823 && !types_compatible_p (orig_type0
, orig_type1
))
2828 if (!types_compatible_p (orig_type0
, comp_scalar_type
))
2830 then_clause
= gimple_assign_rhs1 (def_stmt0
);
2836 if (!types_compatible_p (orig_type1
, comp_scalar_type
))
2838 else_clause
= gimple_assign_rhs1 (def_stmt1
);
2843 HOST_WIDE_INT cmp_mode_size
2844 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype
));
2846 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) == cmp_mode_size
)
2849 vectype
= get_vectype_for_scalar_type (type
);
2850 if (vectype
== NULL_TREE
)
2853 if (expand_vec_cond_expr_p (vectype
, comp_vectype
))
2856 if (itype
== NULL_TREE
)
2857 itype
= build_nonstandard_integer_type (cmp_mode_size
,
2858 TYPE_UNSIGNED (type
));
2860 if (itype
== NULL_TREE
2861 || GET_MODE_BITSIZE (TYPE_MODE (itype
)) != cmp_mode_size
)
2864 vecitype
= get_vectype_for_scalar_type (itype
);
2865 if (vecitype
== NULL_TREE
)
2868 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
))
2871 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) > cmp_mode_size
)
2873 if ((TREE_CODE (then_clause
) == INTEGER_CST
2874 && !int_fits_type_p (then_clause
, itype
))
2875 || (TREE_CODE (else_clause
) == INTEGER_CST
2876 && !int_fits_type_p (else_clause
, itype
)))
2880 def_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2881 COND_EXPR
, unshare_expr (cond_expr
),
2882 fold_convert (itype
, then_clause
),
2883 fold_convert (itype
, else_clause
));
2884 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
2885 NOP_EXPR
, gimple_assign_lhs (def_stmt
));
2887 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2888 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2889 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
2890 STMT_VINFO_VECTYPE (def_stmt_info
) = vecitype
;
2891 *type_in
= vecitype
;
2892 *type_out
= vectype
;
2894 if (dump_enabled_p ())
2895 dump_printf_loc (MSG_NOTE
, vect_location
,
2896 "vect_recog_mixed_size_cond_pattern: detected:\n");
2898 return pattern_stmt
;
2902 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2903 true if bool VAR can be optimized that way. */
2906 check_bool_pattern (tree var
, loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
2909 enum vect_def_type dt
;
2911 enum tree_code rhs_code
;
2913 if (!vect_is_simple_use (var
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
, &def
,
2917 if (dt
!= vect_internal_def
)
2920 if (!is_gimple_assign (def_stmt
))
2923 if (!has_single_use (def
))
2926 rhs1
= gimple_assign_rhs1 (def_stmt
);
2927 rhs_code
= gimple_assign_rhs_code (def_stmt
);
2931 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2934 if ((TYPE_PRECISION (TREE_TYPE (rhs1
)) != 1
2935 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
2936 && TREE_CODE (TREE_TYPE (rhs1
)) != BOOLEAN_TYPE
)
2938 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2941 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2946 if (!check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
))
2948 return check_bool_pattern (gimple_assign_rhs2 (def_stmt
), loop_vinfo
,
2952 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
2954 tree vecitype
, comp_vectype
;
2956 /* If the comparison can throw, then is_gimple_condexpr will be
2957 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2958 if (stmt_could_throw_p (def_stmt
))
2961 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
2962 if (comp_vectype
== NULL_TREE
)
2965 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
2967 machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
2969 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
2970 vecitype
= get_vectype_for_scalar_type (itype
);
2971 if (vecitype
== NULL_TREE
)
2975 vecitype
= comp_vectype
;
2976 return expand_vec_cond_expr_p (vecitype
, comp_vectype
);
2983 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2984 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2985 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2988 adjust_bool_pattern_cast (tree type
, tree var
)
2990 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (SSA_NAME_DEF_STMT (var
));
2991 gimple
*cast_stmt
, *pattern_stmt
;
2993 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
));
2994 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
2995 new_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2996 cast_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
2997 NOP_EXPR
, gimple_assign_lhs (pattern_stmt
));
2998 STMT_VINFO_RELATED_STMT (stmt_vinfo
) = cast_stmt
;
2999 return gimple_assign_lhs (cast_stmt
);
3003 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
3004 recursively. VAR is an SSA_NAME that should be transformed from bool
3005 to a wider integer type, OUT_TYPE is the desired final integer type of
3006 the whole pattern, TRUEVAL should be NULL unless optimizing
3007 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
3008 in the then_clause, STMTS is where statements with added pattern stmts
3009 should be pushed to. */
3012 adjust_bool_pattern (tree var
, tree out_type
, tree trueval
,
3013 vec
<gimple
*> *stmts
)
3015 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
3016 enum tree_code rhs_code
, def_rhs_code
;
3017 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
3019 gimple
*pattern_stmt
, *def_stmt
;
3021 rhs1
= gimple_assign_rhs1 (stmt
);
3022 rhs2
= gimple_assign_rhs2 (stmt
);
3023 rhs_code
= gimple_assign_rhs_code (stmt
);
3024 loc
= gimple_location (stmt
);
3029 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3030 itype
= TREE_TYPE (irhs1
);
3032 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3037 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3038 itype
= TREE_TYPE (irhs1
);
3040 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3041 BIT_XOR_EXPR
, irhs1
, build_int_cst (itype
, 1));
3045 /* Try to optimize x = y & (a < b ? 1 : 0); into
3046 x = (a < b ? y : 0);
3052 S1 a_b = x1 CMP1 y1;
3053 S2 b_b = x2 CMP2 y2;
3055 S4 d_T = (TYPE) c_b;
3057 we would normally emit:
3059 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3060 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3061 S3' c_T = a_T & b_T;
3064 but we can save one stmt by using the
3065 result of one of the COND_EXPRs in the other COND_EXPR and leave
3066 BIT_AND_EXPR stmt out:
3068 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3069 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3072 At least when VEC_COND_EXPR is implemented using masks
3073 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3074 computes the comparison masks and ands it, in one case with
3075 all ones vector, in the other case with a vector register.
3076 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3077 often more expensive. */
3078 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
3079 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3080 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3082 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3083 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3084 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3085 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
3088 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
3089 irhs2
= adjust_bool_pattern (rhs2
, out_type
, irhs1
, stmts
);
3090 tstmt
= stmts
->pop ();
3091 gcc_assert (tstmt
== def_stmt
);
3092 stmts
->quick_push (stmt
);
3093 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
3094 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
3095 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
3096 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
3100 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
3103 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
3104 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3105 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3107 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3108 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
3109 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
3110 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
3113 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
3114 irhs1
= adjust_bool_pattern (rhs1
, out_type
, irhs2
, stmts
);
3115 tstmt
= stmts
->pop ();
3116 gcc_assert (tstmt
== def_stmt
);
3117 stmts
->quick_push (stmt
);
3118 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
3119 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
3120 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
3121 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
3125 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3131 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3132 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
3134 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3135 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
3137 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
3138 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
3139 int out_prec
= TYPE_PRECISION (out_type
);
3140 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
3141 irhs2
= adjust_bool_pattern_cast (TREE_TYPE (irhs1
), rhs2
);
3142 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
3143 irhs1
= adjust_bool_pattern_cast (TREE_TYPE (irhs2
), rhs1
);
3146 irhs1
= adjust_bool_pattern_cast (out_type
, rhs1
);
3147 irhs2
= adjust_bool_pattern_cast (out_type
, rhs2
);
3150 itype
= TREE_TYPE (irhs1
);
3152 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3153 rhs_code
, irhs1
, irhs2
);
3157 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
3158 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
3159 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
))
3160 || (TYPE_PRECISION (TREE_TYPE (rhs1
))
3161 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1
)))))
3163 machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
3165 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3168 itype
= TREE_TYPE (rhs1
);
3169 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
3170 if (trueval
== NULL_TREE
)
3171 trueval
= build_int_cst (itype
, 1);
3173 gcc_checking_assert (useless_type_conversion_p (itype
,
3174 TREE_TYPE (trueval
)));
3176 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3177 COND_EXPR
, cond_expr
, trueval
,
3178 build_int_cst (itype
, 0));
3182 stmts
->safe_push (stmt
);
3183 gimple_set_location (pattern_stmt
, loc
);
3184 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
3185 return gimple_assign_lhs (pattern_stmt
);
3189 /* Function vect_recog_bool_pattern
3191 Try to find pattern like following:
3193 bool a_b, b_b, c_b, d_b, e_b;
3196 S1 a_b = x1 CMP1 y1;
3197 S2 b_b = x2 CMP2 y2;
3199 S4 d_b = x3 CMP3 y3;
3201 S6 f_T = (TYPE) e_b;
3203 where type 'TYPE' is an integral type. Or a similar pattern
3206 S6 f_Y = e_b ? r_Y : s_Y;
3208 as results from if-conversion of a complex condition.
3212 * LAST_STMT: A stmt at the end from which the pattern
3213 search begins, i.e. cast of a bool to
3218 * TYPE_IN: The type of the input arguments to the pattern.
3220 * TYPE_OUT: The type of the output of this pattern.
3222 * Return value: A new stmt that will be used to replace the pattern.
3224 Assuming size of TYPE is the same as size of all comparisons
3225 (otherwise some casts would be added where needed), the above
3226 sequence we create related pattern stmts:
3227 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3228 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3229 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3230 S5' e_T = c_T | d_T;
3233 Instead of the above S3' we could emit:
3234 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3235 S3' c_T = a_T | b_T;
3236 but the above is more efficient. */
3239 vect_recog_bool_pattern (vec
<gimple
*> *stmts
, tree
*type_in
,
3242 gimple
*last_stmt
= stmts
->pop ();
3243 enum tree_code rhs_code
;
3244 tree var
, lhs
, rhs
, vectype
;
3245 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
3246 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
3247 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
3248 gimple
*pattern_stmt
;
3250 if (!is_gimple_assign (last_stmt
))
3253 var
= gimple_assign_rhs1 (last_stmt
);
3254 lhs
= gimple_assign_lhs (last_stmt
);
3256 if ((TYPE_PRECISION (TREE_TYPE (var
)) != 1
3257 || !TYPE_UNSIGNED (TREE_TYPE (var
)))
3258 && TREE_CODE (TREE_TYPE (var
)) != BOOLEAN_TYPE
)
3261 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3262 if (CONVERT_EXPR_CODE_P (rhs_code
))
3264 if (TREE_CODE (TREE_TYPE (lhs
)) != INTEGER_TYPE
3265 || TYPE_PRECISION (TREE_TYPE (lhs
)) == 1)
3267 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3268 if (vectype
== NULL_TREE
)
3271 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
3274 rhs
= adjust_bool_pattern (var
, TREE_TYPE (lhs
), NULL_TREE
, stmts
);
3275 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3276 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3277 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3280 = gimple_build_assign (lhs
, NOP_EXPR
, rhs
);
3281 *type_out
= vectype
;
3283 stmts
->safe_push (last_stmt
);
3284 if (dump_enabled_p ())
3285 dump_printf_loc (MSG_NOTE
, vect_location
,
3286 "vect_recog_bool_pattern: detected:\n");
3288 return pattern_stmt
;
3290 else if (rhs_code
== COND_EXPR
3291 && TREE_CODE (var
) == SSA_NAME
)
3293 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3294 if (vectype
== NULL_TREE
)
3297 /* Build a scalar type for the boolean result that when
3298 vectorized matches the vector type of the result in
3299 size and number of elements. */
3301 = wi::udiv_trunc (TYPE_SIZE (vectype
),
3302 TYPE_VECTOR_SUBPARTS (vectype
)).to_uhwi ();
3304 = build_nonstandard_integer_type (prec
,
3305 TYPE_UNSIGNED (TREE_TYPE (var
)));
3306 if (get_vectype_for_scalar_type (type
) == NULL_TREE
)
3309 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
3312 rhs
= adjust_bool_pattern (var
, type
, NULL_TREE
, stmts
);
3313 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3315 = gimple_build_assign (lhs
, COND_EXPR
,
3316 build2 (NE_EXPR
, boolean_type_node
,
3317 rhs
, build_int_cst (type
, 0)),
3318 gimple_assign_rhs2 (last_stmt
),
3319 gimple_assign_rhs3 (last_stmt
));
3320 *type_out
= vectype
;
3322 stmts
->safe_push (last_stmt
);
3323 if (dump_enabled_p ())
3324 dump_printf_loc (MSG_NOTE
, vect_location
,
3325 "vect_recog_bool_pattern: detected:\n");
3327 return pattern_stmt
;
3329 else if (rhs_code
== SSA_NAME
3330 && STMT_VINFO_DATA_REF (stmt_vinfo
))
3332 stmt_vec_info pattern_stmt_info
;
3333 vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
3334 gcc_assert (vectype
!= NULL_TREE
);
3335 if (!VECTOR_MODE_P (TYPE_MODE (vectype
)))
3337 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
3340 rhs
= adjust_bool_pattern (var
, TREE_TYPE (vectype
), NULL_TREE
, stmts
);
3341 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
3342 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3344 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3345 gimple
*cast_stmt
= gimple_build_assign (rhs2
, NOP_EXPR
, rhs
);
3346 new_pattern_def_seq (stmt_vinfo
, cast_stmt
);
3349 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3350 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
,
3352 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3353 STMT_VINFO_DATA_REF (pattern_stmt_info
)
3354 = STMT_VINFO_DATA_REF (stmt_vinfo
);
3355 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info
)
3356 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo
);
3357 STMT_VINFO_DR_INIT (pattern_stmt_info
) = STMT_VINFO_DR_INIT (stmt_vinfo
);
3358 STMT_VINFO_DR_OFFSET (pattern_stmt_info
)
3359 = STMT_VINFO_DR_OFFSET (stmt_vinfo
);
3360 STMT_VINFO_DR_STEP (pattern_stmt_info
) = STMT_VINFO_DR_STEP (stmt_vinfo
);
3361 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info
)
3362 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo
);
3363 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo
)) = pattern_stmt
;
3364 *type_out
= vectype
;
3366 stmts
->safe_push (last_stmt
);
3367 if (dump_enabled_p ())
3368 dump_printf_loc (MSG_NOTE
, vect_location
,
3369 "vect_recog_bool_pattern: detected:\n");
3370 return pattern_stmt
;
3377 /* Mark statements that are involved in a pattern. */
3380 vect_mark_pattern_stmts (gimple
*orig_stmt
, gimple
*pattern_stmt
,
3381 tree pattern_vectype
)
3383 stmt_vec_info pattern_stmt_info
, def_stmt_info
;
3384 stmt_vec_info orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
3385 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (orig_stmt_info
);
3386 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (orig_stmt_info
);
3389 pattern_stmt_info
= vinfo_for_stmt (pattern_stmt
);
3390 if (pattern_stmt_info
== NULL
)
3392 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
,
3394 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3396 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt
));
3398 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt
;
3399 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
3400 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
3401 STMT_VINFO_VECTYPE (pattern_stmt_info
) = pattern_vectype
;
3402 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
3403 STMT_VINFO_RELATED_STMT (orig_stmt_info
) = pattern_stmt
;
3404 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
)
3405 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
3406 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
))
3408 gimple_stmt_iterator si
;
3409 for (si
= gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
));
3410 !gsi_end_p (si
); gsi_next (&si
))
3412 def_stmt
= gsi_stmt (si
);
3413 def_stmt_info
= vinfo_for_stmt (def_stmt
);
3414 if (def_stmt_info
== NULL
)
3416 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
,
3418 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
3420 gimple_set_bb (def_stmt
, gimple_bb (orig_stmt
));
3421 STMT_VINFO_RELATED_STMT (def_stmt_info
) = orig_stmt
;
3422 STMT_VINFO_DEF_TYPE (def_stmt_info
) = vect_internal_def
;
3423 if (STMT_VINFO_VECTYPE (def_stmt_info
) == NULL_TREE
)
3424 STMT_VINFO_VECTYPE (def_stmt_info
) = pattern_vectype
;
3429 /* Function vect_pattern_recog_1
3432 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3433 computation pattern.
3434 STMT: A stmt from which the pattern search should start.
3436 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3437 expression that computes the same functionality and can be used to
3438 replace the sequence of stmts that are involved in the pattern.
3441 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3442 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3443 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3444 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3445 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3446 to the available target pattern.
3448 This function also does some bookkeeping, as explained in the documentation
3449 for vect_recog_pattern. */
3452 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func
,
3453 gimple_stmt_iterator si
,
3454 vec
<gimple
*> *stmts_to_replace
)
3456 gimple
*stmt
= gsi_stmt (si
), *pattern_stmt
;
3457 stmt_vec_info stmt_info
;
3458 loop_vec_info loop_vinfo
;
3459 tree pattern_vectype
;
3460 tree type_in
, type_out
;
3461 enum tree_code code
;
3465 stmts_to_replace
->truncate (0);
3466 stmts_to_replace
->quick_push (stmt
);
3467 pattern_stmt
= (* vect_recog_func
) (stmts_to_replace
, &type_in
, &type_out
);
3471 stmt
= stmts_to_replace
->last ();
3472 stmt_info
= vinfo_for_stmt (stmt
);
3473 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3475 if (VECTOR_MODE_P (TYPE_MODE (type_in
)))
3477 /* No need to check target support (already checked by the pattern
3478 recognition function). */
3479 pattern_vectype
= type_out
? type_out
: type_in
;
3483 machine_mode vec_mode
;
3484 enum insn_code icode
;
3487 /* Check target support */
3488 type_in
= get_vectype_for_scalar_type (type_in
);
3492 type_out
= get_vectype_for_scalar_type (type_out
);
3497 pattern_vectype
= type_out
;
3499 if (is_gimple_assign (pattern_stmt
))
3500 code
= gimple_assign_rhs_code (pattern_stmt
);
3503 gcc_assert (is_gimple_call (pattern_stmt
));
3507 optab
= optab_for_tree_code (code
, type_in
, optab_default
);
3508 vec_mode
= TYPE_MODE (type_in
);
3510 || (icode
= optab_handler (optab
, vec_mode
)) == CODE_FOR_nothing
3511 || (insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (type_out
)))
3515 /* Found a vectorizable pattern. */
3516 if (dump_enabled_p ())
3518 dump_printf_loc (MSG_NOTE
, vect_location
,
3519 "pattern recognized: ");
3520 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3523 /* Mark the stmts that are involved in the pattern. */
3524 vect_mark_pattern_stmts (stmt
, pattern_stmt
, pattern_vectype
);
3526 /* Patterns cannot be vectorized using SLP, because they change the order of
3529 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
, next
)
3531 LOOP_VINFO_REDUCTIONS (loop_vinfo
).ordered_remove (i
);
3533 /* It is possible that additional pattern stmts are created and inserted in
3534 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3535 relevant statements. */
3536 for (i
= 0; stmts_to_replace
->iterate (i
, &stmt
)
3537 && (unsigned) i
< (stmts_to_replace
->length () - 1);
3540 stmt_info
= vinfo_for_stmt (stmt
);
3541 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
3542 if (dump_enabled_p ())
3544 dump_printf_loc (MSG_NOTE
, vect_location
,
3545 "additional pattern stmt: ");
3546 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3549 vect_mark_pattern_stmts (stmt
, pattern_stmt
, NULL_TREE
);
3554 /* Function vect_pattern_recog
3557 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3560 Output - for each computation idiom that is detected we create a new stmt
3561 that provides the same functionality and that can be vectorized. We
3562 also record some information in the struct_stmt_info of the relevant
3563 stmts, as explained below:
3565 At the entry to this function we have the following stmts, with the
3566 following initial value in the STMT_VINFO fields:
3568 stmt in_pattern_p related_stmt vec_stmt
3569 S1: a_i = .... - - -
3570 S2: a_2 = ..use(a_i).. - - -
3571 S3: a_1 = ..use(a_2).. - - -
3572 S4: a_0 = ..use(a_1).. - - -
3573 S5: ... = ..use(a_0).. - - -
3575 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3576 represented by a single stmt. We then:
3577 - create a new stmt S6 equivalent to the pattern (the stmt is not
3578 inserted into the code)
3579 - fill in the STMT_VINFO fields as follows:
3581 in_pattern_p related_stmt vec_stmt
3582 S1: a_i = .... - - -
3583 S2: a_2 = ..use(a_i).. - - -
3584 S3: a_1 = ..use(a_2).. - - -
3585 S4: a_0 = ..use(a_1).. true S6 -
3586 '---> S6: a_new = .... - S4 -
3587 S5: ... = ..use(a_0).. - - -
3589 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3590 to each other through the RELATED_STMT field).
3592 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3593 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3594 remain irrelevant unless used by stmts other than S4.
3596 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3597 (because they are marked as irrelevant). It will vectorize S6, and record
3598 a pointer to the new vector stmt VS6 from S6 (as usual).
3599 S4 will be skipped, and S5 will be vectorized as usual:
3601 in_pattern_p related_stmt vec_stmt
3602 S1: a_i = .... - - -
3603 S2: a_2 = ..use(a_i).. - - -
3604 S3: a_1 = ..use(a_2).. - - -
3605 > VS6: va_new = .... - - -
3606 S4: a_0 = ..use(a_1).. true S6 VS6
3607 '---> S6: a_new = .... - S4 VS6
3608 > VS5: ... = ..vuse(va_new).. - - -
3609 S5: ... = ..use(a_0).. - - -
3611 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3612 elsewhere), and we'll end up with:
3615 VS5: ... = ..vuse(va_new)..
3617 In case of more than one pattern statements, e.g., widen-mult with
3621 S2 a_T = (TYPE) a_t;
3622 '--> S3: a_it = (interm_type) a_t;
3623 S4 prod_T = a_T * CONST;
3624 '--> S5: prod_T' = a_it w* CONST;
3626 there may be other users of a_T outside the pattern. In that case S2 will
3627 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3628 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3629 be recorded in S3. */
3632 vect_pattern_recog (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
3637 gimple_stmt_iterator si
;
3639 vect_recog_func_ptr vect_recog_func
;
3640 auto_vec
<gimple
*, 1> stmts_to_replace
;
3643 if (dump_enabled_p ())
3644 dump_printf_loc (MSG_NOTE
, vect_location
,
3645 "=== vect_pattern_recog ===\n");
3649 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3650 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
3651 nbbs
= loop
->num_nodes
;
3655 bbs
= &BB_VINFO_BB (bb_vinfo
);
3659 /* Scan through the loop stmts, applying the pattern recognition
3660 functions starting at each stmt visited: */
3661 for (i
= 0; i
< nbbs
; i
++)
3663 basic_block bb
= bbs
[i
];
3664 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
3666 if (bb_vinfo
&& (stmt
= gsi_stmt (si
))
3667 && vinfo_for_stmt (stmt
)
3668 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
3671 /* Scan over all generic vect_recog_xxx_pattern functions. */
3672 for (j
= 0; j
< NUM_PATTERNS
; j
++)
3674 vect_recog_func
= vect_vect_recog_func_ptrs
[j
];
3675 vect_pattern_recog_1 (vect_recog_func
, si
,