1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "fold-const.h"
31 #include "stor-layout.h"
33 #include "dominance.h"
34 #include "gimple-pretty-print.h"
35 #include "internal-fn.h"
38 #include "gimple-iterator.h"
41 #include "insn-config.h"
50 #include "insn-codes.h"
53 #include "tree-data-ref.h"
54 #include "tree-vectorizer.h"
55 #include "recog.h" /* FIXME: for insn_data */
56 #include "diagnostic-core.h"
60 /* Pattern recognition functions */
61 static gimple
vect_recog_widen_sum_pattern (vec
<gimple
> *, tree
*,
63 static gimple
vect_recog_widen_mult_pattern (vec
<gimple
> *, tree
*,
65 static gimple
vect_recog_dot_prod_pattern (vec
<gimple
> *, tree
*,
67 static gimple
vect_recog_sad_pattern (vec
<gimple
> *, tree
*,
69 static gimple
vect_recog_pow_pattern (vec
<gimple
> *, tree
*, tree
*);
70 static gimple
vect_recog_over_widening_pattern (vec
<gimple
> *, tree
*,
72 static gimple
vect_recog_widen_shift_pattern (vec
<gimple
> *,
74 static gimple
vect_recog_rotate_pattern (vec
<gimple
> *, tree
*, tree
*);
75 static gimple
vect_recog_vector_vector_shift_pattern (vec
<gimple
> *,
77 static gimple
vect_recog_divmod_pattern (vec
<gimple
> *,
80 static gimple
vect_recog_mult_pattern (vec
<gimple
> *,
83 static gimple
vect_recog_mixed_size_cond_pattern (vec
<gimple
> *,
85 static gimple
vect_recog_bool_pattern (vec
<gimple
> *, tree
*, tree
*);
86 static vect_recog_func_ptr vect_vect_recog_func_ptrs
[NUM_PATTERNS
] = {
87 vect_recog_widen_mult_pattern
,
88 vect_recog_widen_sum_pattern
,
89 vect_recog_dot_prod_pattern
,
90 vect_recog_sad_pattern
,
91 vect_recog_pow_pattern
,
92 vect_recog_widen_shift_pattern
,
93 vect_recog_over_widening_pattern
,
94 vect_recog_rotate_pattern
,
95 vect_recog_vector_vector_shift_pattern
,
96 vect_recog_divmod_pattern
,
97 vect_recog_mult_pattern
,
98 vect_recog_mixed_size_cond_pattern
,
99 vect_recog_bool_pattern
};
102 append_pattern_def_seq (stmt_vec_info stmt_info
, gimple stmt
)
104 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
109 new_pattern_def_seq (stmt_vec_info stmt_info
, gimple stmt
)
111 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
112 append_pattern_def_seq (stmt_info
, stmt
);
115 /* Check whether STMT2 is in the same loop or basic block as STMT1.
116 Which of the two applies depends on whether we're currently doing
117 loop-based or basic-block-based vectorization, as determined by
118 the vinfo_for_stmt for STMT1 (which must be defined).
120 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
121 to be defined as well. */
124 vect_same_loop_or_bb_p (gimple stmt1
, gimple stmt2
)
126 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt1
);
127 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
128 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
130 if (!gimple_bb (stmt2
))
135 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
136 if (!flow_bb_inside_loop_p (loop
, gimple_bb (stmt2
)))
141 if (gimple_bb (stmt2
) != BB_VINFO_BB (bb_vinfo
)
142 || gimple_code (stmt2
) == GIMPLE_PHI
)
146 gcc_assert (vinfo_for_stmt (stmt2
));
150 /* If the LHS of DEF_STMT has a single use, and that statement is
151 in the same loop or basic block, return it. */
154 vect_single_imm_use (gimple def_stmt
)
156 tree lhs
= gimple_assign_lhs (def_stmt
);
160 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
163 if (!vect_same_loop_or_bb_p (def_stmt
, use_stmt
))
169 /* Check whether NAME, an ssa-name used in USE_STMT,
170 is a result of a type promotion, such that:
171 DEF_STMT: NAME = NOP (name0)
172 If CHECK_SIGN is TRUE, check that either both types are signed or both are
176 type_conversion_p (tree name
, gimple use_stmt
, bool check_sign
,
177 tree
*orig_type
, gimple
*def_stmt
, bool *promotion
)
181 loop_vec_info loop_vinfo
;
182 stmt_vec_info stmt_vinfo
;
183 tree type
= TREE_TYPE (name
);
185 enum vect_def_type dt
;
187 bb_vec_info bb_vinfo
;
189 stmt_vinfo
= vinfo_for_stmt (use_stmt
);
190 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
191 bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
192 if (!vect_is_simple_use (name
, use_stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
196 if (dt
!= vect_internal_def
197 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
203 if (!is_gimple_assign (*def_stmt
))
206 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt
)))
209 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
211 *orig_type
= TREE_TYPE (oprnd0
);
212 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*orig_type
)
213 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*orig_type
)) && check_sign
))
216 if (TYPE_PRECISION (type
) >= (TYPE_PRECISION (*orig_type
) * 2))
221 if (!vect_is_simple_use (oprnd0
, *def_stmt
, loop_vinfo
,
222 bb_vinfo
, &dummy_gimple
, &dummy
, &dt
))
228 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
229 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
232 vect_recog_temp_ssa_var (tree type
, gimple stmt
)
234 return make_temp_ssa_name (type
, stmt
, "patt");
237 /* Function vect_recog_dot_prod_pattern
239 Try to find the following pattern:
245 sum_0 = phi <init, sum_1>
248 S3 x_T = (TYPE1) x_t;
249 S4 y_T = (TYPE1) y_t;
251 [S6 prod = (TYPE2) prod; #optional]
252 S7 sum_1 = prod + sum_0;
254 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
255 same size of 'TYPE1' or bigger. This is a special case of a reduction
260 * STMTS: Contains a stmt from which the pattern search begins. In the
261 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
266 * TYPE_IN: The type of the input arguments to the pattern.
268 * TYPE_OUT: The type of the output of this pattern.
270 * Return value: A new stmt that will be used to replace the sequence of
271 stmts that constitute the pattern. In this case it will be:
272 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
274 Note: The dot-prod idiom is a widening reduction pattern that is
275 vectorized without preserving all the intermediate results. It
276 produces only N/2 (widened) results (by summing up pairs of
277 intermediate results) rather than all N results. Therefore, we
278 cannot allow this pattern when we want to get all the results and in
279 the correct order (as is the case when this computation is in an
280 inner-loop nested in an outer-loop that us being vectorized). */
283 vect_recog_dot_prod_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
286 gimple stmt
, last_stmt
= (*stmts
)[0];
288 tree oprnd00
, oprnd01
;
289 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
290 tree type
, half_type
;
293 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
301 loop
= LOOP_VINFO_LOOP (loop_info
);
303 /* We don't allow changing the order of the computation in the inner-loop
304 when doing outer-loop vectorization. */
305 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
308 if (!is_gimple_assign (last_stmt
))
311 type
= gimple_expr_type (last_stmt
);
313 /* Look for the following pattern
317 DDPROD = (TYPE2) DPROD;
318 sum_1 = DDPROD + sum_0;
320 - DX is double the size of X
321 - DY is double the size of Y
322 - DX, DY, DPROD all have the same type
323 - sum is the same size of DPROD or bigger
324 - sum has been recognized as a reduction variable.
326 This is equivalent to:
327 DPROD = X w* Y; #widen mult
328 sum_1 = DPROD w+ sum_0; #widen summation
330 DPROD = X w* Y; #widen mult
331 sum_1 = DPROD + sum_0; #summation
334 /* Starting from LAST_STMT, follow the defs of its uses in search
335 of the above pattern. */
337 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
340 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
342 /* Has been detected as widening-summation? */
344 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
345 type
= gimple_expr_type (stmt
);
346 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
348 oprnd0
= gimple_assign_rhs1 (stmt
);
349 oprnd1
= gimple_assign_rhs2 (stmt
);
350 half_type
= TREE_TYPE (oprnd0
);
356 oprnd0
= gimple_assign_rhs1 (last_stmt
);
357 oprnd1
= gimple_assign_rhs2 (last_stmt
);
358 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
359 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
363 if (type_conversion_p (oprnd0
, stmt
, true, &half_type
, &def_stmt
,
368 oprnd0
= gimple_assign_rhs1 (stmt
);
374 /* So far so good. Since last_stmt was detected as a (summation) reduction,
375 we know that oprnd1 is the reduction variable (defined by a loop-header
376 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
377 Left to check that oprnd0 is defined by a (widen_)mult_expr */
378 if (TREE_CODE (oprnd0
) != SSA_NAME
)
381 prod_type
= half_type
;
382 stmt
= SSA_NAME_DEF_STMT (oprnd0
);
384 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
385 if (!gimple_bb (stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
388 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
389 inside the loop (in case we are analyzing an outer-loop). */
390 if (!is_gimple_assign (stmt
))
392 stmt_vinfo
= vinfo_for_stmt (stmt
);
393 gcc_assert (stmt_vinfo
);
394 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
396 if (gimple_assign_rhs_code (stmt
) != MULT_EXPR
)
398 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
400 /* Has been detected as a widening multiplication? */
402 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
403 if (gimple_assign_rhs_code (stmt
) != WIDEN_MULT_EXPR
)
405 stmt_vinfo
= vinfo_for_stmt (stmt
);
406 gcc_assert (stmt_vinfo
);
407 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_internal_def
);
408 oprnd00
= gimple_assign_rhs1 (stmt
);
409 oprnd01
= gimple_assign_rhs2 (stmt
);
410 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt
))
411 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
);
415 tree half_type0
, half_type1
;
419 oprnd0
= gimple_assign_rhs1 (stmt
);
420 oprnd1
= gimple_assign_rhs2 (stmt
);
421 if (!types_compatible_p (TREE_TYPE (oprnd0
), prod_type
)
422 || !types_compatible_p (TREE_TYPE (oprnd1
), prod_type
))
424 if (!type_conversion_p (oprnd0
, stmt
, true, &half_type0
, &def_stmt
,
428 oprnd00
= gimple_assign_rhs1 (def_stmt
);
429 if (!type_conversion_p (oprnd1
, stmt
, true, &half_type1
, &def_stmt
,
433 oprnd01
= gimple_assign_rhs1 (def_stmt
);
434 if (!types_compatible_p (half_type0
, half_type1
))
436 if (TYPE_PRECISION (prod_type
) != TYPE_PRECISION (half_type0
) * 2)
440 half_type
= TREE_TYPE (oprnd00
);
441 *type_in
= half_type
;
444 /* Pattern detected. Create a stmt to be used to replace the pattern: */
445 var
= vect_recog_temp_ssa_var (type
, NULL
);
446 pattern_stmt
= gimple_build_assign (var
, DOT_PROD_EXPR
,
447 oprnd00
, oprnd01
, oprnd1
);
449 if (dump_enabled_p ())
451 dump_printf_loc (MSG_NOTE
, vect_location
,
452 "vect_recog_dot_prod_pattern: detected: ");
453 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
454 dump_printf (MSG_NOTE
, "\n");
461 /* Function vect_recog_sad_pattern
463 Try to find the following Sum of Absolute Difference (SAD) pattern:
466 signed TYPE1 diff, abs_diff;
469 sum_0 = phi <init, sum_1>
472 S3 x_T = (TYPE1) x_t;
473 S4 y_T = (TYPE1) y_t;
475 S6 abs_diff = ABS_EXPR <diff>;
476 [S7 abs_diff = (TYPE2) abs_diff; #optional]
477 S8 sum_1 = abs_diff + sum_0;
479 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
480 same size of 'TYPE1' or bigger. This is a special case of a reduction
485 * STMTS: Contains a stmt from which the pattern search begins. In the
486 example, when this function is called with S8, the pattern
487 {S3,S4,S5,S6,S7,S8} will be detected.
491 * TYPE_IN: The type of the input arguments to the pattern.
493 * TYPE_OUT: The type of the output of this pattern.
495 * Return value: A new stmt that will be used to replace the sequence of
496 stmts that constitute the pattern. In this case it will be:
497 SAD_EXPR <x_t, y_t, sum_0>
501 vect_recog_sad_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
504 gimple last_stmt
= (*stmts
)[0];
505 tree sad_oprnd0
, sad_oprnd1
;
506 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
508 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
515 loop
= LOOP_VINFO_LOOP (loop_info
);
517 /* We don't allow changing the order of the computation in the inner-loop
518 when doing outer-loop vectorization. */
519 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
522 if (!is_gimple_assign (last_stmt
))
525 tree sum_type
= gimple_expr_type (last_stmt
);
527 /* Look for the following pattern
531 DAD = ABS_EXPR <DDIFF>;
532 DDPROD = (TYPE2) DPROD;
535 - DX is at least double the size of X
536 - DY is at least double the size of Y
537 - DX, DY, DDIFF, DAD all have the same type
538 - sum is the same size of DAD or bigger
539 - sum has been recognized as a reduction variable.
541 This is equivalent to:
542 DDIFF = X w- Y; #widen sub
543 DAD = ABS_EXPR <DDIFF>;
544 sum_1 = DAD w+ sum_0; #widen summation
546 DDIFF = X w- Y; #widen sub
547 DAD = ABS_EXPR <DDIFF>;
548 sum_1 = DAD + sum_0; #summation
551 /* Starting from LAST_STMT, follow the defs of its uses in search
552 of the above pattern. */
554 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
557 tree plus_oprnd0
, plus_oprnd1
;
559 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
561 /* Has been detected as widening-summation? */
563 gimple stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
564 sum_type
= gimple_expr_type (stmt
);
565 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
567 plus_oprnd0
= gimple_assign_rhs1 (stmt
);
568 plus_oprnd1
= gimple_assign_rhs2 (stmt
);
569 half_type
= TREE_TYPE (plus_oprnd0
);
575 plus_oprnd0
= gimple_assign_rhs1 (last_stmt
);
576 plus_oprnd1
= gimple_assign_rhs2 (last_stmt
);
577 if (!types_compatible_p (TREE_TYPE (plus_oprnd0
), sum_type
)
578 || !types_compatible_p (TREE_TYPE (plus_oprnd1
), sum_type
))
581 /* The type conversion could be promotion, demotion,
582 or just signed -> unsigned. */
583 if (type_conversion_p (plus_oprnd0
, last_stmt
, false,
584 &half_type
, &def_stmt
, &promotion
))
585 plus_oprnd0
= gimple_assign_rhs1 (def_stmt
);
587 half_type
= sum_type
;
590 /* So far so good. Since last_stmt was detected as a (summation) reduction,
591 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
592 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
593 Then check that plus_oprnd0 is defined by an abs_expr. */
595 if (TREE_CODE (plus_oprnd0
) != SSA_NAME
)
598 tree abs_type
= half_type
;
599 gimple abs_stmt
= SSA_NAME_DEF_STMT (plus_oprnd0
);
601 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
602 if (!gimple_bb (abs_stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (abs_stmt
)))
605 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
606 inside the loop (in case we are analyzing an outer-loop). */
607 if (!is_gimple_assign (abs_stmt
))
610 stmt_vec_info abs_stmt_vinfo
= vinfo_for_stmt (abs_stmt
);
611 gcc_assert (abs_stmt_vinfo
);
612 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo
) != vect_internal_def
)
614 if (gimple_assign_rhs_code (abs_stmt
) != ABS_EXPR
)
617 tree abs_oprnd
= gimple_assign_rhs1 (abs_stmt
);
618 if (!types_compatible_p (TREE_TYPE (abs_oprnd
), abs_type
))
620 if (TYPE_UNSIGNED (abs_type
))
623 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
625 if (TREE_CODE (abs_oprnd
) != SSA_NAME
)
628 gimple diff_stmt
= SSA_NAME_DEF_STMT (abs_oprnd
);
630 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
631 if (!gimple_bb (diff_stmt
)
632 || !flow_bb_inside_loop_p (loop
, gimple_bb (diff_stmt
)))
635 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
636 inside the loop (in case we are analyzing an outer-loop). */
637 if (!is_gimple_assign (diff_stmt
))
640 stmt_vec_info diff_stmt_vinfo
= vinfo_for_stmt (diff_stmt
);
641 gcc_assert (diff_stmt_vinfo
);
642 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo
) != vect_internal_def
)
644 if (gimple_assign_rhs_code (diff_stmt
) != MINUS_EXPR
)
647 tree half_type0
, half_type1
;
650 tree minus_oprnd0
= gimple_assign_rhs1 (diff_stmt
);
651 tree minus_oprnd1
= gimple_assign_rhs2 (diff_stmt
);
653 if (!types_compatible_p (TREE_TYPE (minus_oprnd0
), abs_type
)
654 || !types_compatible_p (TREE_TYPE (minus_oprnd1
), abs_type
))
656 if (!type_conversion_p (minus_oprnd0
, diff_stmt
, false,
657 &half_type0
, &def_stmt
, &promotion
)
660 sad_oprnd0
= gimple_assign_rhs1 (def_stmt
);
662 if (!type_conversion_p (minus_oprnd1
, diff_stmt
, false,
663 &half_type1
, &def_stmt
, &promotion
)
666 sad_oprnd1
= gimple_assign_rhs1 (def_stmt
);
668 if (!types_compatible_p (half_type0
, half_type1
))
670 if (TYPE_PRECISION (abs_type
) < TYPE_PRECISION (half_type0
) * 2
671 || TYPE_PRECISION (sum_type
) < TYPE_PRECISION (half_type0
) * 2)
674 *type_in
= TREE_TYPE (sad_oprnd0
);
675 *type_out
= sum_type
;
677 /* Pattern detected. Create a stmt to be used to replace the pattern: */
678 tree var
= vect_recog_temp_ssa_var (sum_type
, NULL
);
679 gimple pattern_stmt
= gimple_build_assign (var
, SAD_EXPR
, sad_oprnd0
,
680 sad_oprnd1
, plus_oprnd1
);
682 if (dump_enabled_p ())
684 dump_printf_loc (MSG_NOTE
, vect_location
,
685 "vect_recog_sad_pattern: detected: ");
686 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
687 dump_printf (MSG_NOTE
, "\n");
694 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
697 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
698 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
700 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
701 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
702 that satisfies the above restrictions, we can perform a widening opeartion
703 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
704 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
707 vect_handle_widen_op_by_const (gimple stmt
, enum tree_code code
,
708 tree const_oprnd
, tree
*oprnd
,
709 gimple
*wstmt
, tree type
,
710 tree
*half_type
, gimple def_stmt
)
712 tree new_type
, new_oprnd
;
714 if (code
!= MULT_EXPR
&& code
!= LSHIFT_EXPR
)
717 if (((code
== MULT_EXPR
&& int_fits_type_p (const_oprnd
, *half_type
))
718 || (code
== LSHIFT_EXPR
719 && compare_tree_int (const_oprnd
, TYPE_PRECISION (*half_type
))
721 && TYPE_PRECISION (type
) == (TYPE_PRECISION (*half_type
) * 2))
723 /* CONST_OPRND is a constant of HALF_TYPE. */
724 *oprnd
= gimple_assign_rhs1 (def_stmt
);
728 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (*half_type
) * 4))
731 if (!vect_same_loop_or_bb_p (stmt
, def_stmt
))
734 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
735 a type 2 times bigger than HALF_TYPE. */
736 new_type
= build_nonstandard_integer_type (TYPE_PRECISION (type
) / 2,
737 TYPE_UNSIGNED (type
));
738 if ((code
== MULT_EXPR
&& !int_fits_type_p (const_oprnd
, new_type
))
739 || (code
== LSHIFT_EXPR
740 && compare_tree_int (const_oprnd
, TYPE_PRECISION (new_type
)) == 1))
743 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
744 *oprnd
= gimple_assign_rhs1 (def_stmt
);
745 new_oprnd
= make_ssa_name (new_type
);
746 *wstmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, *oprnd
);
749 *half_type
= new_type
;
754 /* Function vect_recog_widen_mult_pattern
756 Try to find the following pattern:
760 TYPE a_T, b_T, prod_T;
766 S5 prod_T = a_T * b_T;
768 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
770 Also detect unsigned cases:
774 unsigned TYPE u_prod_T;
775 TYPE a_T, b_T, prod_T;
781 S5 prod_T = a_T * b_T;
782 S6 u_prod_T = (unsigned TYPE) prod_T;
784 and multiplication by constants:
791 S5 prod_T = a_T * CONST;
793 A special case of multiplication by constants is when 'TYPE' is 4 times
794 bigger than 'type', but CONST fits an intermediate type 2 times smaller
795 than 'TYPE'. In that case we create an additional pattern stmt for S3
796 to create a variable of the intermediate type, and perform widen-mult
797 on the intermediate type as well:
801 TYPE a_T, prod_T, prod_T';
805 '--> a_it = (interm_type) a_t;
806 S5 prod_T = a_T * CONST;
807 '--> prod_T' = a_it w* CONST;
811 * STMTS: Contains a stmt from which the pattern search begins. In the
812 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
813 is detected. In case of unsigned widen-mult, the original stmt (S5) is
814 replaced with S6 in STMTS. In case of multiplication by a constant
815 of an intermediate type (the last case above), STMTS also contains S3
816 (inserted before S5).
820 * TYPE_IN: The type of the input arguments to the pattern.
822 * TYPE_OUT: The type of the output of this pattern.
824 * Return value: A new stmt that will be used to replace the sequence of
825 stmts that constitute the pattern. In this case it will be:
826 WIDEN_MULT <a_t, b_t>
827 If the result of WIDEN_MULT needs to be converted to a larger type, the
828 returned stmt will be this type conversion stmt.
832 vect_recog_widen_mult_pattern (vec
<gimple
> *stmts
,
833 tree
*type_in
, tree
*type_out
)
835 gimple last_stmt
= stmts
->pop ();
836 gimple def_stmt0
, def_stmt1
;
838 tree type
, half_type0
, half_type1
;
839 gimple new_stmt
= NULL
, pattern_stmt
= NULL
;
840 tree vectype
, vecitype
;
842 enum tree_code dummy_code
;
848 if (!is_gimple_assign (last_stmt
))
851 type
= gimple_expr_type (last_stmt
);
853 /* Starting from LAST_STMT, follow the defs of its uses in search
854 of the above pattern. */
856 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
859 oprnd0
= gimple_assign_rhs1 (last_stmt
);
860 oprnd1
= gimple_assign_rhs2 (last_stmt
);
861 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
862 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
865 /* Check argument 0. */
866 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
870 /* Check argument 1. */
871 op1_ok
= type_conversion_p (oprnd1
, last_stmt
, false, &half_type1
,
872 &def_stmt1
, &promotion
);
874 if (op1_ok
&& promotion
)
876 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
877 oprnd1
= gimple_assign_rhs1 (def_stmt1
);
881 if (TREE_CODE (oprnd1
) == INTEGER_CST
882 && TREE_CODE (half_type0
) == INTEGER_TYPE
883 && vect_handle_widen_op_by_const (last_stmt
, MULT_EXPR
, oprnd1
,
884 &oprnd0
, &new_stmt
, type
,
885 &half_type0
, def_stmt0
))
887 half_type1
= half_type0
;
888 oprnd1
= fold_convert (half_type1
, oprnd1
);
894 /* If the two arguments have different sizes, convert the one with
895 the smaller type into the larger type. */
896 if (TYPE_PRECISION (half_type0
) != TYPE_PRECISION (half_type1
))
898 /* If we already used up the single-stmt slot give up. */
903 gimple def_stmt
= NULL
;
905 if (TYPE_PRECISION (half_type0
) < TYPE_PRECISION (half_type1
))
907 def_stmt
= def_stmt0
;
908 half_type0
= half_type1
;
913 def_stmt
= def_stmt1
;
914 half_type1
= half_type0
;
918 tree old_oprnd
= gimple_assign_rhs1 (def_stmt
);
919 tree new_oprnd
= make_ssa_name (half_type0
);
920 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, old_oprnd
);
924 /* Handle unsigned case. Look for
925 S6 u_prod_T = (unsigned TYPE) prod_T;
926 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
927 if (TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type0
))
933 if (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (half_type1
))
936 use_stmt
= vect_single_imm_use (last_stmt
);
937 if (!use_stmt
|| !is_gimple_assign (use_stmt
)
938 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
941 use_lhs
= gimple_assign_lhs (use_stmt
);
942 use_type
= TREE_TYPE (use_lhs
);
943 if (!INTEGRAL_TYPE_P (use_type
)
944 || (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (use_type
))
945 || (TYPE_PRECISION (type
) != TYPE_PRECISION (use_type
)))
949 last_stmt
= use_stmt
;
952 if (!types_compatible_p (half_type0
, half_type1
))
955 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
956 to get an intermediate result of type ITYPE. In this case we need
957 to build a statement to convert this intermediate result to type TYPE. */
959 if (TYPE_PRECISION (type
) > TYPE_PRECISION (half_type0
) * 2)
960 itype
= build_nonstandard_integer_type
961 (GET_MODE_BITSIZE (TYPE_MODE (half_type0
)) * 2,
962 TYPE_UNSIGNED (type
));
964 /* Pattern detected. */
965 if (dump_enabled_p ())
966 dump_printf_loc (MSG_NOTE
, vect_location
,
967 "vect_recog_widen_mult_pattern: detected:\n");
969 /* Check target support */
970 vectype
= get_vectype_for_scalar_type (half_type0
);
971 vecitype
= get_vectype_for_scalar_type (itype
);
974 || !supportable_widening_operation (WIDEN_MULT_EXPR
, last_stmt
,
976 &dummy_code
, &dummy_code
,
977 &dummy_int
, &dummy_vec
))
981 *type_out
= get_vectype_for_scalar_type (type
);
983 /* Pattern supported. Create a stmt to be used to replace the pattern: */
984 var
= vect_recog_temp_ssa_var (itype
, NULL
);
985 pattern_stmt
= gimple_build_assign (var
, WIDEN_MULT_EXPR
, oprnd0
, oprnd1
);
987 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
988 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
989 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
990 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
992 /* If the original two operands have different sizes, we may need to convert
993 the smaller one into the larget type. If this is the case, at this point
994 the new stmt is already built. */
997 append_pattern_def_seq (stmt_vinfo
, new_stmt
);
998 stmt_vec_info new_stmt_info
999 = new_stmt_vec_info (new_stmt
, loop_vinfo
, bb_vinfo
);
1000 set_vinfo_for_stmt (new_stmt
, new_stmt_info
);
1001 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
1004 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
1005 the result of the widen-mult operation into type TYPE. */
1008 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
1009 stmt_vec_info pattern_stmt_info
1010 = new_stmt_vec_info (pattern_stmt
, loop_vinfo
, bb_vinfo
);
1011 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
1012 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vecitype
;
1013 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
1015 gimple_assign_lhs (pattern_stmt
));
1018 if (dump_enabled_p ())
1019 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1021 stmts
->safe_push (last_stmt
);
1022 return pattern_stmt
;
1026 /* Function vect_recog_pow_pattern
1028 Try to find the following pattern:
1032 with POW being one of pow, powf, powi, powif and N being
1037 * LAST_STMT: A stmt from which the pattern search begins.
1041 * TYPE_IN: The type of the input arguments to the pattern.
1043 * TYPE_OUT: The type of the output of this pattern.
1045 * Return value: A new stmt that will be used to replace the sequence of
1046 stmts that constitute the pattern. In this case it will be:
1053 vect_recog_pow_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
1056 gimple last_stmt
= (*stmts
)[0];
1057 tree fn
, base
, exp
= NULL
;
1061 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
1064 fn
= gimple_call_fndecl (last_stmt
);
1065 if (fn
== NULL_TREE
|| DECL_BUILT_IN_CLASS (fn
) != BUILT_IN_NORMAL
)
1068 switch (DECL_FUNCTION_CODE (fn
))
1070 case BUILT_IN_POWIF
:
1074 base
= gimple_call_arg (last_stmt
, 0);
1075 exp
= gimple_call_arg (last_stmt
, 1);
1076 if (TREE_CODE (exp
) != REAL_CST
1077 && TREE_CODE (exp
) != INTEGER_CST
)
1085 /* We now have a pow or powi builtin function call with a constant
1088 *type_out
= NULL_TREE
;
1090 /* Catch squaring. */
1091 if ((tree_fits_shwi_p (exp
)
1092 && tree_to_shwi (exp
) == 2)
1093 || (TREE_CODE (exp
) == REAL_CST
1094 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconst2
)))
1096 *type_in
= TREE_TYPE (base
);
1098 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1099 stmt
= gimple_build_assign (var
, MULT_EXPR
, base
, base
);
1103 /* Catch square root. */
1104 if (TREE_CODE (exp
) == REAL_CST
1105 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconsthalf
))
1107 tree newfn
= mathfn_built_in (TREE_TYPE (base
), BUILT_IN_SQRT
);
1108 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (base
));
1111 gcall
*stmt
= gimple_build_call (newfn
, 1, base
);
1112 if (vectorizable_function (stmt
, *type_in
, *type_in
)
1115 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
1116 gimple_call_set_lhs (stmt
, var
);
1126 /* Function vect_recog_widen_sum_pattern
1128 Try to find the following pattern:
1131 TYPE x_T, sum = init;
1133 sum_0 = phi <init, sum_1>
1135 S2 x_T = (TYPE) x_t;
1136 S3 sum_1 = x_T + sum_0;
1138 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1139 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1140 a special case of a reduction computation.
1144 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1145 when this function is called with S3, the pattern {S2,S3} will be detected.
1149 * TYPE_IN: The type of the input arguments to the pattern.
1151 * TYPE_OUT: The type of the output of this pattern.
1153 * Return value: A new stmt that will be used to replace the sequence of
1154 stmts that constitute the pattern. In this case it will be:
1155 WIDEN_SUM <x_t, sum_0>
1157 Note: The widening-sum idiom is a widening reduction pattern that is
1158 vectorized without preserving all the intermediate results. It
1159 produces only N/2 (widened) results (by summing up pairs of
1160 intermediate results) rather than all N results. Therefore, we
1161 cannot allow this pattern when we want to get all the results and in
1162 the correct order (as is the case when this computation is in an
1163 inner-loop nested in an outer-loop that us being vectorized). */
1166 vect_recog_widen_sum_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
1169 gimple stmt
, last_stmt
= (*stmts
)[0];
1170 tree oprnd0
, oprnd1
;
1171 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1172 tree type
, half_type
;
1173 gimple pattern_stmt
;
1174 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1182 loop
= LOOP_VINFO_LOOP (loop_info
);
1184 /* We don't allow changing the order of the computation in the inner-loop
1185 when doing outer-loop vectorization. */
1186 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
1189 if (!is_gimple_assign (last_stmt
))
1192 type
= gimple_expr_type (last_stmt
);
1194 /* Look for the following pattern
1197 In which DX is at least double the size of X, and sum_1 has been
1198 recognized as a reduction variable.
1201 /* Starting from LAST_STMT, follow the defs of its uses in search
1202 of the above pattern. */
1204 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
1207 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1208 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1209 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
1210 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
1213 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1214 we know that oprnd1 is the reduction variable (defined by a loop-header
1215 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1216 Left to check that oprnd0 is defined by a cast from type 'type' to type
1219 if (!type_conversion_p (oprnd0
, last_stmt
, true, &half_type
, &stmt
,
1224 oprnd0
= gimple_assign_rhs1 (stmt
);
1225 *type_in
= half_type
;
1228 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1229 var
= vect_recog_temp_ssa_var (type
, NULL
);
1230 pattern_stmt
= gimple_build_assign (var
, WIDEN_SUM_EXPR
, oprnd0
, oprnd1
);
1232 if (dump_enabled_p ())
1234 dump_printf_loc (MSG_NOTE
, vect_location
,
1235 "vect_recog_widen_sum_pattern: detected: ");
1236 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1237 dump_printf (MSG_NOTE
, "\n");
1240 return pattern_stmt
;
1244 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1247 STMT - a statement to check.
1248 DEF - we support operations with two operands, one of which is constant.
1249 The other operand can be defined by a demotion operation, or by a
1250 previous statement in a sequence of over-promoted operations. In the
1251 later case DEF is used to replace that operand. (It is defined by a
1252 pattern statement we created for the previous statement in the
1256 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1257 NULL, it's the type of DEF.
1258 STMTS - additional pattern statements. If a pattern statement (type
1259 conversion) is created in this function, its original statement is
1263 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1264 operands to use in the new pattern statement for STMT (will be created
1265 in vect_recog_over_widening_pattern ()).
1266 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1267 statements for STMT: the first one is a type promotion and the second
1268 one is the operation itself. We return the type promotion statement
1269 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1270 the second pattern statement. */
1273 vect_operation_fits_smaller_type (gimple stmt
, tree def
, tree
*new_type
,
1274 tree
*op0
, tree
*op1
, gimple
*new_def_stmt
,
1277 enum tree_code code
;
1278 tree const_oprnd
, oprnd
;
1279 tree interm_type
= NULL_TREE
, half_type
, new_oprnd
, type
;
1280 gimple def_stmt
, new_stmt
;
1286 *new_def_stmt
= NULL
;
1288 if (!is_gimple_assign (stmt
))
1291 code
= gimple_assign_rhs_code (stmt
);
1292 if (code
!= LSHIFT_EXPR
&& code
!= RSHIFT_EXPR
1293 && code
!= BIT_IOR_EXPR
&& code
!= BIT_XOR_EXPR
&& code
!= BIT_AND_EXPR
)
1296 oprnd
= gimple_assign_rhs1 (stmt
);
1297 const_oprnd
= gimple_assign_rhs2 (stmt
);
1298 type
= gimple_expr_type (stmt
);
1300 if (TREE_CODE (oprnd
) != SSA_NAME
1301 || TREE_CODE (const_oprnd
) != INTEGER_CST
)
1304 /* If oprnd has other uses besides that in stmt we cannot mark it
1305 as being part of a pattern only. */
1306 if (!has_single_use (oprnd
))
1309 /* If we are in the middle of a sequence, we use DEF from a previous
1310 statement. Otherwise, OPRND has to be a result of type promotion. */
1313 half_type
= *new_type
;
1319 if (!type_conversion_p (oprnd
, stmt
, false, &half_type
, &def_stmt
,
1322 || !vect_same_loop_or_bb_p (stmt
, def_stmt
))
1326 /* Can we perform the operation on a smaller type? */
1332 if (!int_fits_type_p (const_oprnd
, half_type
))
1334 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1335 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1338 interm_type
= build_nonstandard_integer_type (
1339 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1340 if (!int_fits_type_p (const_oprnd
, interm_type
))
1347 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1348 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1351 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1352 (e.g., if the original value was char, the shift amount is at most 8
1353 if we want to use short). */
1354 if (compare_tree_int (const_oprnd
, TYPE_PRECISION (half_type
)) == 1)
1357 interm_type
= build_nonstandard_integer_type (
1358 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1360 if (!vect_supportable_shift (code
, interm_type
))
1366 if (vect_supportable_shift (code
, half_type
))
1369 /* Try intermediate type - HALF_TYPE is not supported. */
1370 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1373 interm_type
= build_nonstandard_integer_type (
1374 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1376 if (!vect_supportable_shift (code
, interm_type
))
1385 /* There are four possible cases:
1386 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1387 the first statement in the sequence)
1388 a. The original, HALF_TYPE, is not enough - we replace the promotion
1389 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1390 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1392 2. OPRND is defined by a pattern statement we created.
1393 a. Its type is not sufficient for the operation, we create a new stmt:
1394 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1395 this statement in NEW_DEF_STMT, and it is later put in
1396 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1397 b. OPRND is good to use in the new statement. */
1402 /* Replace the original type conversion HALF_TYPE->TYPE with
1403 HALF_TYPE->INTERM_TYPE. */
1404 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
1406 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
1407 /* Check if the already created pattern stmt is what we need. */
1408 if (!is_gimple_assign (new_stmt
)
1409 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt
))
1410 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != interm_type
)
1413 stmts
->safe_push (def_stmt
);
1414 oprnd
= gimple_assign_lhs (new_stmt
);
1418 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1419 oprnd
= gimple_assign_rhs1 (def_stmt
);
1420 new_oprnd
= make_ssa_name (interm_type
);
1421 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, oprnd
);
1422 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
1423 stmts
->safe_push (def_stmt
);
1429 /* Retrieve the operand before the type promotion. */
1430 oprnd
= gimple_assign_rhs1 (def_stmt
);
1437 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1438 new_oprnd
= make_ssa_name (interm_type
);
1439 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, oprnd
);
1441 *new_def_stmt
= new_stmt
;
1444 /* Otherwise, OPRND is already set. */
1448 *new_type
= interm_type
;
1450 *new_type
= half_type
;
1453 *op1
= fold_convert (*new_type
, const_oprnd
);
1459 /* Try to find a statement or a sequence of statements that can be performed
1463 TYPE x_T, res0_T, res1_T;
1466 S2 x_T = (TYPE) x_t;
1467 S3 res0_T = op (x_T, C0);
1468 S4 res1_T = op (res0_T, C1);
1469 S5 ... = () res1_T; - type demotion
1471 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1473 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1474 be 'type' or some intermediate type. For now, we expect S5 to be a type
1475 demotion operation. We also check that S3 and S4 have only one use. */
1478 vect_recog_over_widening_pattern (vec
<gimple
> *stmts
,
1479 tree
*type_in
, tree
*type_out
)
1481 gimple stmt
= stmts
->pop ();
1482 gimple pattern_stmt
= NULL
, new_def_stmt
, prev_stmt
= NULL
, use_stmt
= NULL
;
1483 tree op0
, op1
, vectype
= NULL_TREE
, use_lhs
, use_type
;
1484 tree var
= NULL_TREE
, new_type
= NULL_TREE
, new_oprnd
;
1491 if (!vinfo_for_stmt (stmt
)
1492 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt
)))
1495 new_def_stmt
= NULL
;
1496 if (!vect_operation_fits_smaller_type (stmt
, var
, &new_type
,
1497 &op0
, &op1
, &new_def_stmt
,
1506 /* STMT can be performed on a smaller type. Check its uses. */
1507 use_stmt
= vect_single_imm_use (stmt
);
1508 if (!use_stmt
|| !is_gimple_assign (use_stmt
))
1511 /* Create pattern statement for STMT. */
1512 vectype
= get_vectype_for_scalar_type (new_type
);
1516 /* We want to collect all the statements for which we create pattern
1517 statetments, except for the case when the last statement in the
1518 sequence doesn't have a corresponding pattern statement. In such
1519 case we associate the last pattern statement with the last statement
1520 in the sequence. Therefore, we only add the original statement to
1521 the list if we know that it is not the last. */
1523 stmts
->safe_push (prev_stmt
);
1525 var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1527 = gimple_build_assign (var
, gimple_assign_rhs_code (stmt
), op0
, op1
);
1528 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
1529 new_pattern_def_seq (vinfo_for_stmt (stmt
), new_def_stmt
);
1531 if (dump_enabled_p ())
1533 dump_printf_loc (MSG_NOTE
, vect_location
,
1534 "created pattern stmt: ");
1535 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1536 dump_printf (MSG_NOTE
, "\n");
1539 type
= gimple_expr_type (stmt
);
1546 /* We got a sequence. We expect it to end with a type demotion operation.
1547 Otherwise, we quit (for now). There are three possible cases: the
1548 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1549 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1550 NEW_TYPE differs (we create a new conversion statement). */
1551 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
1553 use_lhs
= gimple_assign_lhs (use_stmt
);
1554 use_type
= TREE_TYPE (use_lhs
);
1555 /* Support only type demotion or signedess change. */
1556 if (!INTEGRAL_TYPE_P (use_type
)
1557 || TYPE_PRECISION (type
) <= TYPE_PRECISION (use_type
))
1560 /* Check that NEW_TYPE is not bigger than the conversion result. */
1561 if (TYPE_PRECISION (new_type
) > TYPE_PRECISION (use_type
))
1564 if (TYPE_UNSIGNED (new_type
) != TYPE_UNSIGNED (use_type
)
1565 || TYPE_PRECISION (new_type
) != TYPE_PRECISION (use_type
))
1567 /* Create NEW_TYPE->USE_TYPE conversion. */
1568 new_oprnd
= make_ssa_name (use_type
);
1569 pattern_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, var
);
1570 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt
)) = pattern_stmt
;
1572 *type_in
= get_vectype_for_scalar_type (new_type
);
1573 *type_out
= get_vectype_for_scalar_type (use_type
);
1575 /* We created a pattern statement for the last statement in the
1576 sequence, so we don't need to associate it with the pattern
1577 statement created for PREV_STMT. Therefore, we add PREV_STMT
1578 to the list in order to mark it later in vect_pattern_recog_1. */
1580 stmts
->safe_push (prev_stmt
);
1585 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt
))
1586 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt
));
1589 *type_out
= NULL_TREE
;
1592 stmts
->safe_push (use_stmt
);
1595 /* TODO: support general case, create a conversion to the correct type. */
1598 /* Pattern detected. */
1599 if (dump_enabled_p ())
1601 dump_printf_loc (MSG_NOTE
, vect_location
,
1602 "vect_recog_over_widening_pattern: detected: ");
1603 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1604 dump_printf (MSG_NOTE
, "\n");
1607 return pattern_stmt
;
1610 /* Detect widening shift pattern:
1616 S2 a_T = (TYPE) a_t;
1617 S3 res_T = a_T << CONST;
1619 where type 'TYPE' is at least double the size of type 'type'.
1621 Also detect cases where the shift result is immediately converted
1622 to another type 'result_type' that is no larger in size than 'TYPE'.
1623 In those cases we perform a widen-shift that directly results in
1624 'result_type', to avoid a possible over-widening situation:
1628 result_type res_result;
1631 S2 a_T = (TYPE) a_t;
1632 S3 res_T = a_T << CONST;
1633 S4 res_result = (result_type) res_T;
1634 '--> res_result' = a_t w<< CONST;
1636 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1637 create an additional pattern stmt for S2 to create a variable of an
1638 intermediate type, and perform widen-shift on the intermediate type:
1642 TYPE a_T, res_T, res_T';
1645 S2 a_T = (TYPE) a_t;
1646 '--> a_it = (interm_type) a_t;
1647 S3 res_T = a_T << CONST;
1648 '--> res_T' = a_it <<* CONST;
1652 * STMTS: Contains a stmt from which the pattern search begins.
1653 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1654 in STMTS. When an intermediate type is used and a pattern statement is
1655 created for S2, we also put S2 here (before S3).
1659 * TYPE_IN: The type of the input arguments to the pattern.
1661 * TYPE_OUT: The type of the output of this pattern.
1663 * Return value: A new stmt that will be used to replace the sequence of
1664 stmts that constitute the pattern. In this case it will be:
1665 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1668 vect_recog_widen_shift_pattern (vec
<gimple
> *stmts
,
1669 tree
*type_in
, tree
*type_out
)
1671 gimple last_stmt
= stmts
->pop ();
1673 tree oprnd0
, oprnd1
;
1674 tree type
, half_type0
;
1675 gimple pattern_stmt
;
1676 tree vectype
, vectype_out
= NULL_TREE
;
1678 enum tree_code dummy_code
;
1680 vec
<tree
> dummy_vec
;
1684 if (!is_gimple_assign (last_stmt
) || !vinfo_for_stmt (last_stmt
))
1687 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt
)))
1690 if (gimple_assign_rhs_code (last_stmt
) != LSHIFT_EXPR
)
1693 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1694 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1695 if (TREE_CODE (oprnd0
) != SSA_NAME
|| TREE_CODE (oprnd1
) != INTEGER_CST
)
1698 /* Check operand 0: it has to be defined by a type promotion. */
1699 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
1704 /* Check operand 1: has to be positive. We check that it fits the type
1705 in vect_handle_widen_op_by_const (). */
1706 if (tree_int_cst_compare (oprnd1
, size_zero_node
) <= 0)
1709 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
1710 type
= gimple_expr_type (last_stmt
);
1712 /* Check for subsequent conversion to another type. */
1713 use_stmt
= vect_single_imm_use (last_stmt
);
1714 if (use_stmt
&& is_gimple_assign (use_stmt
)
1715 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
))
1716 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
1718 tree use_lhs
= gimple_assign_lhs (use_stmt
);
1719 tree use_type
= TREE_TYPE (use_lhs
);
1721 if (INTEGRAL_TYPE_P (use_type
)
1722 && TYPE_PRECISION (use_type
) <= TYPE_PRECISION (type
))
1724 last_stmt
= use_stmt
;
1729 /* Check if this a widening operation. */
1730 gimple wstmt
= NULL
;
1731 if (!vect_handle_widen_op_by_const (last_stmt
, LSHIFT_EXPR
, oprnd1
,
1733 type
, &half_type0
, def_stmt0
))
1736 /* Pattern detected. */
1737 if (dump_enabled_p ())
1738 dump_printf_loc (MSG_NOTE
, vect_location
,
1739 "vect_recog_widen_shift_pattern: detected:\n");
1741 /* Check target support. */
1742 vectype
= get_vectype_for_scalar_type (half_type0
);
1743 vectype_out
= get_vectype_for_scalar_type (type
);
1747 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR
, last_stmt
,
1748 vectype_out
, vectype
,
1749 &dummy_code
, &dummy_code
,
1750 &dummy_int
, &dummy_vec
))
1754 *type_out
= vectype_out
;
1756 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1757 var
= vect_recog_temp_ssa_var (type
, NULL
);
1759 gimple_build_assign (var
, WIDEN_LSHIFT_EXPR
, oprnd0
, oprnd1
);
1762 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1763 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1764 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1765 new_pattern_def_seq (stmt_vinfo
, wstmt
);
1766 stmt_vec_info new_stmt_info
1767 = new_stmt_vec_info (wstmt
, loop_vinfo
, bb_vinfo
);
1768 set_vinfo_for_stmt (wstmt
, new_stmt_info
);
1769 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
1772 if (dump_enabled_p ())
1773 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1775 stmts
->safe_push (last_stmt
);
1776 return pattern_stmt
;
1779 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1783 S0 a_t = b_t r<< c_t;
1787 * STMTS: Contains a stmt from which the pattern search begins,
1788 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1792 S2 e_t = d_t & (B - 1);
1793 S3 f_t = b_t << c_t;
1794 S4 g_t = b_t >> e_t;
1797 where B is element bitsize of type.
1801 * TYPE_IN: The type of the input arguments to the pattern.
1803 * TYPE_OUT: The type of the output of this pattern.
1805 * Return value: A new stmt that will be used to replace the rotate
1809 vect_recog_rotate_pattern (vec
<gimple
> *stmts
, tree
*type_in
, tree
*type_out
)
1811 gimple last_stmt
= stmts
->pop ();
1812 tree oprnd0
, oprnd1
, lhs
, var
, var1
, var2
, vectype
, type
, stype
, def
, def2
;
1813 gimple pattern_stmt
, def_stmt
;
1814 enum tree_code rhs_code
;
1815 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1816 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1817 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1818 enum vect_def_type dt
;
1819 optab optab1
, optab2
;
1820 edge ext_def
= NULL
;
1822 if (!is_gimple_assign (last_stmt
))
1825 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1835 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
1838 lhs
= gimple_assign_lhs (last_stmt
);
1839 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1840 type
= TREE_TYPE (oprnd0
);
1841 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1842 if (TREE_CODE (oprnd0
) != SSA_NAME
1843 || TYPE_PRECISION (TREE_TYPE (lhs
)) != TYPE_PRECISION (type
)
1844 || !INTEGRAL_TYPE_P (type
)
1845 || !TYPE_UNSIGNED (type
))
1848 if (!vect_is_simple_use (oprnd1
, last_stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
1852 if (dt
!= vect_internal_def
1853 && dt
!= vect_constant_def
1854 && dt
!= vect_external_def
)
1857 vectype
= get_vectype_for_scalar_type (type
);
1858 if (vectype
== NULL_TREE
)
1861 /* If vector/vector or vector/scalar rotate is supported by the target,
1862 don't do anything here. */
1863 optab1
= optab_for_tree_code (rhs_code
, vectype
, optab_vector
);
1865 && optab_handler (optab1
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1868 if (bb_vinfo
!= NULL
|| dt
!= vect_internal_def
)
1870 optab2
= optab_for_tree_code (rhs_code
, vectype
, optab_scalar
);
1872 && optab_handler (optab2
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1876 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1877 don't do anything here either. */
1878 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_vector
);
1879 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_vector
);
1881 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1883 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1885 if (bb_vinfo
== NULL
&& dt
== vect_internal_def
)
1887 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_scalar
);
1888 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_scalar
);
1890 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1892 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1897 *type_out
= vectype
;
1898 if (*type_in
== NULL_TREE
)
1901 if (dt
== vect_external_def
1902 && TREE_CODE (oprnd1
) == SSA_NAME
1905 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1906 ext_def
= loop_preheader_edge (loop
);
1907 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1
))
1909 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (oprnd1
));
1911 || !dominated_by_p (CDI_DOMINATORS
, ext_def
->dest
, bb
))
1917 if (TREE_CODE (oprnd1
) == INTEGER_CST
1918 || TYPE_MODE (TREE_TYPE (oprnd1
)) == TYPE_MODE (type
))
1920 else if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
1922 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
1923 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (type
)
1924 && TYPE_PRECISION (TREE_TYPE (rhs1
))
1925 == TYPE_PRECISION (type
))
1929 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
1930 if (def
== NULL_TREE
)
1932 def
= vect_recog_temp_ssa_var (type
, NULL
);
1933 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
1937 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1938 gcc_assert (!new_bb
);
1941 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1943 stype
= TREE_TYPE (def
);
1945 if (TREE_CODE (def
) == INTEGER_CST
)
1947 if (!tree_fits_uhwi_p (def
)
1948 || tree_to_uhwi (def
) >= GET_MODE_PRECISION (TYPE_MODE (type
))
1949 || integer_zerop (def
))
1951 def2
= build_int_cst (stype
,
1952 GET_MODE_PRECISION (TYPE_MODE (type
))
1953 - tree_to_uhwi (def
));
1957 tree vecstype
= get_vectype_for_scalar_type (stype
);
1958 stmt_vec_info def_stmt_vinfo
;
1960 if (vecstype
== NULL_TREE
)
1962 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1963 def_stmt
= gimple_build_assign (def2
, NEGATE_EXPR
, def
);
1967 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1968 gcc_assert (!new_bb
);
1972 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
1973 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1974 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
1975 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1978 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1980 = build_int_cst (stype
, GET_MODE_PRECISION (TYPE_MODE (stype
)) - 1);
1981 def_stmt
= gimple_build_assign (def2
, BIT_AND_EXPR
,
1982 gimple_assign_lhs (def_stmt
), mask
);
1986 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1987 gcc_assert (!new_bb
);
1991 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
1992 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1993 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
1994 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1998 var1
= vect_recog_temp_ssa_var (type
, NULL
);
1999 def_stmt
= gimple_build_assign (var1
, rhs_code
== LROTATE_EXPR
2000 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
2002 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2004 var2
= vect_recog_temp_ssa_var (type
, NULL
);
2005 def_stmt
= gimple_build_assign (var2
, rhs_code
== LROTATE_EXPR
2006 ? RSHIFT_EXPR
: LSHIFT_EXPR
,
2008 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2010 /* Pattern detected. */
2011 if (dump_enabled_p ())
2012 dump_printf_loc (MSG_NOTE
, vect_location
,
2013 "vect_recog_rotate_pattern: detected:\n");
2015 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2016 var
= vect_recog_temp_ssa_var (type
, NULL
);
2017 pattern_stmt
= gimple_build_assign (var
, BIT_IOR_EXPR
, var1
, var2
);
2019 if (dump_enabled_p ())
2020 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
2022 stmts
->safe_push (last_stmt
);
2023 return pattern_stmt
;
2026 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2034 S3 res_T = b_T op a_t;
2036 where type 'TYPE' is a type with different size than 'type',
2037 and op is <<, >> or rotate.
2042 TYPE b_T, c_T, res_T;
2045 S1 a_t = (type) c_T;
2047 S3 res_T = b_T op a_t;
2051 * STMTS: Contains a stmt from which the pattern search begins,
2052 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2053 with a shift/rotate which has same type on both operands, in the
2054 second case just b_T op c_T, in the first case with added cast
2055 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2059 * TYPE_IN: The type of the input arguments to the pattern.
2061 * TYPE_OUT: The type of the output of this pattern.
2063 * Return value: A new stmt that will be used to replace the shift/rotate
2067 vect_recog_vector_vector_shift_pattern (vec
<gimple
> *stmts
,
2068 tree
*type_in
, tree
*type_out
)
2070 gimple last_stmt
= stmts
->pop ();
2071 tree oprnd0
, oprnd1
, lhs
, var
;
2072 gimple pattern_stmt
, def_stmt
;
2073 enum tree_code rhs_code
;
2074 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2075 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2076 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2077 enum vect_def_type dt
;
2080 if (!is_gimple_assign (last_stmt
))
2083 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2095 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2098 lhs
= gimple_assign_lhs (last_stmt
);
2099 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2100 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2101 if (TREE_CODE (oprnd0
) != SSA_NAME
2102 || TREE_CODE (oprnd1
) != SSA_NAME
2103 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
2104 || TYPE_PRECISION (TREE_TYPE (oprnd1
))
2105 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1
)))
2106 || TYPE_PRECISION (TREE_TYPE (lhs
))
2107 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2110 if (!vect_is_simple_use (oprnd1
, last_stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
2114 if (dt
!= vect_internal_def
)
2117 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (oprnd0
));
2118 *type_out
= *type_in
;
2119 if (*type_in
== NULL_TREE
)
2123 if (gimple_assign_cast_p (def_stmt
))
2125 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
2126 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
2127 && TYPE_PRECISION (TREE_TYPE (rhs1
))
2128 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2132 if (def
== NULL_TREE
)
2134 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2135 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
2136 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2139 /* Pattern detected. */
2140 if (dump_enabled_p ())
2141 dump_printf_loc (MSG_NOTE
, vect_location
,
2142 "vect_recog_vector_vector_shift_pattern: detected:\n");
2144 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2145 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2146 pattern_stmt
= gimple_build_assign (var
, rhs_code
, oprnd0
, def
);
2148 if (dump_enabled_p ())
2149 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
2151 stmts
->safe_push (last_stmt
);
2152 return pattern_stmt
;
2155 /* Detect multiplication by constant which are postive or negatives of power 2,
2156 and convert them to shift patterns.
2158 Mult with constants that are postive power of two.
2165 Mult with constants that are negative power of two.
2170 STMTS: Contains a stmt from which the pattern search begins,
2171 i.e. the mult stmt. Convert the mult operation to LSHIFT if
2172 constant operand is a power of 2.
2174 S1': b_t = a_t << log2 (n)
2176 Convert the mult operation to LSHIFT and followed by a NEGATE
2177 if constant operand is a negative power of 2.
2178 type a_t, b_t, res_T;
2179 S2': b_t = a_t << log2 (n)
2180 S3': res_T = - (b_t)
2184 * TYPE_IN: The type of the input arguments to the pattern.
2186 * TYPE_OUT: The type of the output of this pattern.
2188 * Return value: A new stmt that will be used to replace the multiplication
2192 vect_recog_mult_pattern (vec
<gimple
> *stmts
,
2193 tree
*type_in
, tree
*type_out
)
2195 gimple last_stmt
= stmts
->pop ();
2196 tree oprnd0
, oprnd1
, vectype
, itype
;
2197 gimple pattern_stmt
, def_stmt
;
2199 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2200 int power2_val
, power2_neg_val
;
2203 if (!is_gimple_assign (last_stmt
))
2206 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
2209 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2210 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2211 itype
= TREE_TYPE (oprnd0
);
2213 if (TREE_CODE (oprnd0
) != SSA_NAME
2214 || TREE_CODE (oprnd1
) != INTEGER_CST
2215 || !INTEGRAL_TYPE_P (itype
)
2216 || TYPE_PRECISION (itype
) != GET_MODE_PRECISION (TYPE_MODE (itype
)))
2219 vectype
= get_vectype_for_scalar_type (itype
);
2220 if (vectype
== NULL_TREE
)
2223 /* If the target can handle vectorized multiplication natively,
2224 don't attempt to optimize this. */
2225 optab
= optab_for_tree_code (MULT_EXPR
, vectype
, optab_default
);
2226 if (optab
!= unknown_optab
)
2228 machine_mode vec_mode
= TYPE_MODE (vectype
);
2229 int icode
= (int) optab_handler (optab
, vec_mode
);
2230 if (icode
!= CODE_FOR_nothing
)
2234 /* If target cannot handle vector left shift then we cannot
2235 optimize and bail out. */
2236 optab
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_vector
);
2238 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
2241 power2_val
= wi::exact_log2 (oprnd1
);
2242 power2_neg_val
= wi::exact_log2 (wi::neg (oprnd1
));
2244 /* Handle constant operands that are postive or negative powers of 2. */
2245 if (power2_val
!= -1)
2247 shift
= build_int_cst (itype
, power2_val
);
2249 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2250 LSHIFT_EXPR
, oprnd0
, shift
);
2252 else if (power2_neg_val
!= -1)
2254 /* If the target cannot handle vector NEGATE then we cannot
2255 do the optimization. */
2256 optab
= optab_for_tree_code (NEGATE_EXPR
, vectype
, optab_vector
);
2258 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
2261 shift
= build_int_cst (itype
, power2_neg_val
);
2263 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2264 LSHIFT_EXPR
, oprnd0
, shift
);
2265 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2267 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2268 NEGATE_EXPR
, gimple_assign_lhs (def_stmt
));
2273 /* Pattern detected. */
2274 if (dump_enabled_p ())
2275 dump_printf_loc (MSG_NOTE
, vect_location
,
2276 "vect_recog_mult_pattern: detected:\n");
2278 if (dump_enabled_p ())
2279 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
,
2282 stmts
->safe_push (last_stmt
);
2284 *type_out
= vectype
;
2286 return pattern_stmt
;
2289 /* Detect a signed division by a constant that wouldn't be
2290 otherwise vectorized:
2296 where type 'type' is an integral type and N is a constant.
2298 Similarly handle modulo by a constant:
2304 * STMTS: Contains a stmt from which the pattern search begins,
2305 i.e. the division stmt. S1 is replaced by if N is a power
2306 of two constant and type is signed:
2307 S3 y_t = b_t < 0 ? N - 1 : 0;
2309 S1' a_t = x_t >> log2 (N);
2311 S4 is replaced if N is a power of two constant and
2312 type is signed by (where *_T temporaries have unsigned type):
2313 S9 y_T = b_t < 0 ? -1U : 0U;
2314 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2315 S7 z_t = (type) z_T;
2317 S5 x_t = w_t & (N - 1);
2318 S4' a_t = x_t - z_t;
2322 * TYPE_IN: The type of the input arguments to the pattern.
2324 * TYPE_OUT: The type of the output of this pattern.
2326 * Return value: A new stmt that will be used to replace the division
2327 S1 or modulo S4 stmt. */
2330 vect_recog_divmod_pattern (vec
<gimple
> *stmts
,
2331 tree
*type_in
, tree
*type_out
)
2333 gimple last_stmt
= stmts
->pop ();
2334 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
2335 gimple pattern_stmt
, def_stmt
;
2336 enum tree_code rhs_code
;
2337 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2338 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2339 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2342 int dummy_int
, prec
;
2343 stmt_vec_info def_stmt_vinfo
;
2345 if (!is_gimple_assign (last_stmt
))
2348 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2351 case TRUNC_DIV_EXPR
:
2352 case TRUNC_MOD_EXPR
:
2358 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2361 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2362 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2363 itype
= TREE_TYPE (oprnd0
);
2364 if (TREE_CODE (oprnd0
) != SSA_NAME
2365 || TREE_CODE (oprnd1
) != INTEGER_CST
2366 || TREE_CODE (itype
) != INTEGER_TYPE
2367 || TYPE_PRECISION (itype
) != GET_MODE_PRECISION (TYPE_MODE (itype
)))
2370 vectype
= get_vectype_for_scalar_type (itype
);
2371 if (vectype
== NULL_TREE
)
2374 /* If the target can handle vectorized division or modulo natively,
2375 don't attempt to optimize this. */
2376 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
2377 if (optab
!= unknown_optab
)
2379 machine_mode vec_mode
= TYPE_MODE (vectype
);
2380 int icode
= (int) optab_handler (optab
, vec_mode
);
2381 if (icode
!= CODE_FOR_nothing
)
2385 prec
= TYPE_PRECISION (itype
);
2386 if (integer_pow2p (oprnd1
))
2388 if (TYPE_UNSIGNED (itype
) || tree_int_cst_sgn (oprnd1
) != 1)
2391 /* Pattern detected. */
2392 if (dump_enabled_p ())
2393 dump_printf_loc (MSG_NOTE
, vect_location
,
2394 "vect_recog_divmod_pattern: detected:\n");
2396 cond
= build2 (LT_EXPR
, boolean_type_node
, oprnd0
,
2397 build_int_cst (itype
, 0));
2398 if (rhs_code
== TRUNC_DIV_EXPR
)
2400 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
2403 = gimple_build_assign (var
, COND_EXPR
, cond
,
2404 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2405 build_int_cst (itype
, 1)),
2406 build_int_cst (itype
, 0));
2407 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2408 var
= vect_recog_temp_ssa_var (itype
, NULL
);
2410 = gimple_build_assign (var
, PLUS_EXPR
, oprnd0
,
2411 gimple_assign_lhs (def_stmt
));
2412 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2414 shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
2416 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2417 RSHIFT_EXPR
, var
, shift
);
2422 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2423 if (compare_tree_int (oprnd1
, 2) == 0)
2425 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2426 def_stmt
= gimple_build_assign (signmask
, COND_EXPR
, cond
,
2427 build_int_cst (itype
, 1),
2428 build_int_cst (itype
, 0));
2429 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2434 = build_nonstandard_integer_type (prec
, 1);
2435 tree vecutype
= get_vectype_for_scalar_type (utype
);
2437 = build_int_cst (utype
, GET_MODE_BITSIZE (TYPE_MODE (itype
))
2438 - tree_log2 (oprnd1
));
2439 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
2441 def_stmt
= gimple_build_assign (var
, COND_EXPR
, cond
,
2442 build_int_cst (utype
, -1),
2443 build_int_cst (utype
, 0));
2445 = new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2446 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2447 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2448 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2449 var
= vect_recog_temp_ssa_var (utype
, NULL
);
2450 def_stmt
= gimple_build_assign (var
, RSHIFT_EXPR
,
2451 gimple_assign_lhs (def_stmt
),
2454 = new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2455 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2456 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2457 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2458 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2460 = gimple_build_assign (signmask
, NOP_EXPR
, var
);
2461 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2464 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2465 PLUS_EXPR
, oprnd0
, signmask
);
2466 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2468 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2469 BIT_AND_EXPR
, gimple_assign_lhs (def_stmt
),
2470 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2471 build_int_cst (itype
, 1)));
2472 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2475 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2476 MINUS_EXPR
, gimple_assign_lhs (def_stmt
),
2480 if (dump_enabled_p ())
2481 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
,
2484 stmts
->safe_push (last_stmt
);
2487 *type_out
= vectype
;
2488 return pattern_stmt
;
2491 if (prec
> HOST_BITS_PER_WIDE_INT
2492 || integer_zerop (oprnd1
))
2495 if (!can_mult_highpart_p (TYPE_MODE (vectype
), TYPE_UNSIGNED (itype
)))
2498 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2500 if (TYPE_UNSIGNED (itype
))
2502 unsigned HOST_WIDE_INT mh
, ml
;
2503 int pre_shift
, post_shift
;
2504 unsigned HOST_WIDE_INT d
= (TREE_INT_CST_LOW (oprnd1
)
2505 & GET_MODE_MASK (TYPE_MODE (itype
)));
2506 tree t1
, t2
, t3
, t4
;
2508 if (d
>= ((unsigned HOST_WIDE_INT
) 1 << (prec
- 1)))
2509 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2512 /* Find a suitable multiplier and right shift count
2513 instead of multiplying with D. */
2514 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
2516 /* If the suggested multiplier is more than SIZE bits, we can do better
2517 for even divisors, using an initial right shift. */
2518 if (mh
!= 0 && (d
& 1) == 0)
2520 pre_shift
= floor_log2 (d
& -d
);
2521 mh
= choose_multiplier (d
>> pre_shift
, prec
, prec
- pre_shift
,
2522 &ml
, &post_shift
, &dummy_int
);
2530 if (post_shift
- 1 >= prec
)
2533 /* t1 = oprnd0 h* ml;
2537 q = t4 >> (post_shift - 1); */
2538 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2539 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2540 build_int_cst (itype
, ml
));
2541 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2543 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2545 = gimple_build_assign (t2
, MINUS_EXPR
, oprnd0
, t1
);
2546 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2548 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2550 = gimple_build_assign (t3
, RSHIFT_EXPR
, t2
, integer_one_node
);
2551 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2553 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2555 = gimple_build_assign (t4
, PLUS_EXPR
, t1
, t3
);
2557 if (post_shift
!= 1)
2559 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2561 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2563 = gimple_build_assign (q
, RSHIFT_EXPR
, t4
,
2564 build_int_cst (itype
, post_shift
- 1));
2569 pattern_stmt
= def_stmt
;
2574 if (pre_shift
>= prec
|| post_shift
>= prec
)
2577 /* t1 = oprnd0 >> pre_shift;
2579 q = t2 >> post_shift; */
2582 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2584 = gimple_build_assign (t1
, RSHIFT_EXPR
, oprnd0
,
2585 build_int_cst (NULL
, pre_shift
));
2586 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2591 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2592 def_stmt
= gimple_build_assign (t2
, MULT_HIGHPART_EXPR
, t1
,
2593 build_int_cst (itype
, ml
));
2597 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2599 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2601 = gimple_build_assign (q
, RSHIFT_EXPR
, t2
,
2602 build_int_cst (itype
, post_shift
));
2607 pattern_stmt
= def_stmt
;
2612 unsigned HOST_WIDE_INT ml
;
2614 HOST_WIDE_INT d
= TREE_INT_CST_LOW (oprnd1
);
2615 unsigned HOST_WIDE_INT abs_d
;
2617 tree t1
, t2
, t3
, t4
;
2619 /* Give up for -1. */
2623 /* Since d might be INT_MIN, we have to cast to
2624 unsigned HOST_WIDE_INT before negating to avoid
2625 undefined signed overflow. */
2627 ? (unsigned HOST_WIDE_INT
) d
2628 : - (unsigned HOST_WIDE_INT
) d
);
2630 /* n rem d = n rem -d */
2631 if (rhs_code
== TRUNC_MOD_EXPR
&& d
< 0)
2634 oprnd1
= build_int_cst (itype
, abs_d
);
2636 else if (HOST_BITS_PER_WIDE_INT
>= prec
2637 && abs_d
== (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2638 /* This case is not handled correctly below. */
2641 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
, &post_shift
, &dummy_int
);
2642 if (ml
>= (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2645 ml
|= (~(unsigned HOST_WIDE_INT
) 0) << (prec
- 1);
2647 if (post_shift
>= prec
)
2650 /* t1 = oprnd0 h* ml; */
2651 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2652 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2653 build_int_cst (itype
, ml
));
2657 /* t2 = t1 + oprnd0; */
2658 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2659 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2660 def_stmt
= gimple_build_assign (t2
, PLUS_EXPR
, t1
, oprnd0
);
2667 /* t3 = t2 >> post_shift; */
2668 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2669 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2670 def_stmt
= gimple_build_assign (t3
, RSHIFT_EXPR
, t2
,
2671 build_int_cst (itype
, post_shift
));
2676 wide_int oprnd0_min
, oprnd0_max
;
2678 if (get_range_info (oprnd0
, &oprnd0_min
, &oprnd0_max
) == VR_RANGE
)
2680 if (!wi::neg_p (oprnd0_min
, TYPE_SIGN (itype
)))
2682 else if (wi::neg_p (oprnd0_max
, TYPE_SIGN (itype
)))
2686 if (msb
== 0 && d
>= 0)
2690 pattern_stmt
= def_stmt
;
2694 /* t4 = oprnd0 >> (prec - 1);
2695 or if we know from VRP that oprnd0 >= 0
2697 or if we know from VRP that oprnd0 < 0
2699 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2700 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2702 def_stmt
= gimple_build_assign (t4
, INTEGER_CST
,
2703 build_int_cst (itype
, msb
));
2705 def_stmt
= gimple_build_assign (t4
, RSHIFT_EXPR
, oprnd0
,
2706 build_int_cst (itype
, prec
- 1));
2707 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2709 /* q = t3 - t4; or q = t4 - t3; */
2710 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2711 pattern_stmt
= gimple_build_assign (q
, MINUS_EXPR
, d
< 0 ? t4
: t3
,
2716 if (rhs_code
== TRUNC_MOD_EXPR
)
2720 /* We divided. Now finish by:
2723 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2725 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2726 def_stmt
= gimple_build_assign (t1
, MULT_EXPR
, q
, oprnd1
);
2727 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2729 r
= vect_recog_temp_ssa_var (itype
, NULL
);
2730 pattern_stmt
= gimple_build_assign (r
, MINUS_EXPR
, oprnd0
, t1
);
2733 /* Pattern detected. */
2734 if (dump_enabled_p ())
2736 dump_printf_loc (MSG_NOTE
, vect_location
,
2737 "vect_recog_divmod_pattern: detected: ");
2738 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
2739 dump_printf (MSG_NOTE
, "\n");
2742 stmts
->safe_push (last_stmt
);
2745 *type_out
= vectype
;
2746 return pattern_stmt
;
2749 /* Function vect_recog_mixed_size_cond_pattern
2751 Try to find the following pattern:
2756 S1 a_T = x_t CMP y_t ? b_T : c_T;
2758 where type 'TYPE' is an integral type which has different size
2759 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2760 than 'type', the constants need to fit into an integer type
2761 with the same width as 'type') or results of conversion from 'type'.
2765 * LAST_STMT: A stmt from which the pattern search begins.
2769 * TYPE_IN: The type of the input arguments to the pattern.
2771 * TYPE_OUT: The type of the output of this pattern.
2773 * Return value: A new stmt that will be used to replace the pattern.
2774 Additionally a def_stmt is added.
2776 a_it = x_t CMP y_t ? b_it : c_it;
2777 a_T = (TYPE) a_it; */
2780 vect_recog_mixed_size_cond_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
2783 gimple last_stmt
= (*stmts
)[0];
2784 tree cond_expr
, then_clause
, else_clause
;
2785 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
), def_stmt_info
;
2786 tree type
, vectype
, comp_vectype
, itype
= NULL_TREE
, vecitype
;
2787 gimple pattern_stmt
, def_stmt
;
2788 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2789 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2790 tree orig_type0
= NULL_TREE
, orig_type1
= NULL_TREE
;
2791 gimple def_stmt0
= NULL
, def_stmt1
= NULL
;
2793 tree comp_scalar_type
;
2795 if (!is_gimple_assign (last_stmt
)
2796 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
2797 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
2800 cond_expr
= gimple_assign_rhs1 (last_stmt
);
2801 then_clause
= gimple_assign_rhs2 (last_stmt
);
2802 else_clause
= gimple_assign_rhs3 (last_stmt
);
2804 if (!COMPARISON_CLASS_P (cond_expr
))
2807 comp_scalar_type
= TREE_TYPE (TREE_OPERAND (cond_expr
, 0));
2808 comp_vectype
= get_vectype_for_scalar_type (comp_scalar_type
);
2809 if (comp_vectype
== NULL_TREE
)
2812 type
= gimple_expr_type (last_stmt
);
2813 if (types_compatible_p (type
, comp_scalar_type
)
2814 || ((TREE_CODE (then_clause
) != INTEGER_CST
2815 || TREE_CODE (else_clause
) != INTEGER_CST
)
2816 && !INTEGRAL_TYPE_P (comp_scalar_type
))
2817 || !INTEGRAL_TYPE_P (type
))
2820 if ((TREE_CODE (then_clause
) != INTEGER_CST
2821 && !type_conversion_p (then_clause
, last_stmt
, false, &orig_type0
,
2822 &def_stmt0
, &promotion
))
2823 || (TREE_CODE (else_clause
) != INTEGER_CST
2824 && !type_conversion_p (else_clause
, last_stmt
, false, &orig_type1
,
2825 &def_stmt1
, &promotion
)))
2828 if (orig_type0
&& orig_type1
2829 && !types_compatible_p (orig_type0
, orig_type1
))
2834 if (!types_compatible_p (orig_type0
, comp_scalar_type
))
2836 then_clause
= gimple_assign_rhs1 (def_stmt0
);
2842 if (!types_compatible_p (orig_type1
, comp_scalar_type
))
2844 else_clause
= gimple_assign_rhs1 (def_stmt1
);
2849 HOST_WIDE_INT cmp_mode_size
2850 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype
));
2852 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) == cmp_mode_size
)
2855 vectype
= get_vectype_for_scalar_type (type
);
2856 if (vectype
== NULL_TREE
)
2859 if (expand_vec_cond_expr_p (vectype
, comp_vectype
))
2862 if (itype
== NULL_TREE
)
2863 itype
= build_nonstandard_integer_type (cmp_mode_size
,
2864 TYPE_UNSIGNED (type
));
2866 if (itype
== NULL_TREE
2867 || GET_MODE_BITSIZE (TYPE_MODE (itype
)) != cmp_mode_size
)
2870 vecitype
= get_vectype_for_scalar_type (itype
);
2871 if (vecitype
== NULL_TREE
)
2874 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
))
2877 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) > cmp_mode_size
)
2879 if ((TREE_CODE (then_clause
) == INTEGER_CST
2880 && !int_fits_type_p (then_clause
, itype
))
2881 || (TREE_CODE (else_clause
) == INTEGER_CST
2882 && !int_fits_type_p (else_clause
, itype
)))
2886 def_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2887 COND_EXPR
, unshare_expr (cond_expr
),
2888 fold_convert (itype
, then_clause
),
2889 fold_convert (itype
, else_clause
));
2890 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
2891 NOP_EXPR
, gimple_assign_lhs (def_stmt
));
2893 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2894 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2895 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
2896 STMT_VINFO_VECTYPE (def_stmt_info
) = vecitype
;
2897 *type_in
= vecitype
;
2898 *type_out
= vectype
;
2900 if (dump_enabled_p ())
2901 dump_printf_loc (MSG_NOTE
, vect_location
,
2902 "vect_recog_mixed_size_cond_pattern: detected:\n");
2904 return pattern_stmt
;
2908 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2909 true if bool VAR can be optimized that way. */
2912 check_bool_pattern (tree var
, loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
2915 enum vect_def_type dt
;
2917 enum tree_code rhs_code
;
2919 if (!vect_is_simple_use (var
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
, &def
,
2923 if (dt
!= vect_internal_def
)
2926 if (!is_gimple_assign (def_stmt
))
2929 if (!has_single_use (def
))
2932 rhs1
= gimple_assign_rhs1 (def_stmt
);
2933 rhs_code
= gimple_assign_rhs_code (def_stmt
);
2937 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2940 if ((TYPE_PRECISION (TREE_TYPE (rhs1
)) != 1
2941 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
2942 && TREE_CODE (TREE_TYPE (rhs1
)) != BOOLEAN_TYPE
)
2944 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2947 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2952 if (!check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
))
2954 return check_bool_pattern (gimple_assign_rhs2 (def_stmt
), loop_vinfo
,
2958 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
2960 tree vecitype
, comp_vectype
;
2962 /* If the comparison can throw, then is_gimple_condexpr will be
2963 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2964 if (stmt_could_throw_p (def_stmt
))
2967 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
2968 if (comp_vectype
== NULL_TREE
)
2971 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
2973 machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
2975 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
2976 vecitype
= get_vectype_for_scalar_type (itype
);
2977 if (vecitype
== NULL_TREE
)
2981 vecitype
= comp_vectype
;
2982 return expand_vec_cond_expr_p (vecitype
, comp_vectype
);
2989 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2990 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2991 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2994 adjust_bool_pattern_cast (tree type
, tree var
)
2996 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (SSA_NAME_DEF_STMT (var
));
2997 gimple cast_stmt
, pattern_stmt
;
2999 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
));
3000 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
3001 new_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
3002 cast_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
3003 NOP_EXPR
, gimple_assign_lhs (pattern_stmt
));
3004 STMT_VINFO_RELATED_STMT (stmt_vinfo
) = cast_stmt
;
3005 return gimple_assign_lhs (cast_stmt
);
3009 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
3010 recursively. VAR is an SSA_NAME that should be transformed from bool
3011 to a wider integer type, OUT_TYPE is the desired final integer type of
3012 the whole pattern, TRUEVAL should be NULL unless optimizing
3013 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
3014 in the then_clause, STMTS is where statements with added pattern stmts
3015 should be pushed to. */
3018 adjust_bool_pattern (tree var
, tree out_type
, tree trueval
,
3021 gimple stmt
= SSA_NAME_DEF_STMT (var
);
3022 enum tree_code rhs_code
, def_rhs_code
;
3023 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
3025 gimple pattern_stmt
, def_stmt
;
3027 rhs1
= gimple_assign_rhs1 (stmt
);
3028 rhs2
= gimple_assign_rhs2 (stmt
);
3029 rhs_code
= gimple_assign_rhs_code (stmt
);
3030 loc
= gimple_location (stmt
);
3035 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3036 itype
= TREE_TYPE (irhs1
);
3038 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3043 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3044 itype
= TREE_TYPE (irhs1
);
3046 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3047 BIT_XOR_EXPR
, irhs1
, build_int_cst (itype
, 1));
3051 /* Try to optimize x = y & (a < b ? 1 : 0); into
3052 x = (a < b ? y : 0);
3058 S1 a_b = x1 CMP1 y1;
3059 S2 b_b = x2 CMP2 y2;
3061 S4 d_T = (TYPE) c_b;
3063 we would normally emit:
3065 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3066 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3067 S3' c_T = a_T & b_T;
3070 but we can save one stmt by using the
3071 result of one of the COND_EXPRs in the other COND_EXPR and leave
3072 BIT_AND_EXPR stmt out:
3074 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3075 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3078 At least when VEC_COND_EXPR is implemented using masks
3079 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3080 computes the comparison masks and ands it, in one case with
3081 all ones vector, in the other case with a vector register.
3082 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3083 often more expensive. */
3084 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
3085 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3086 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3088 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3089 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3090 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3091 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
3094 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
3095 irhs2
= adjust_bool_pattern (rhs2
, out_type
, irhs1
, stmts
);
3096 tstmt
= stmts
->pop ();
3097 gcc_assert (tstmt
== def_stmt
);
3098 stmts
->quick_push (stmt
);
3099 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
3100 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
3101 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
3102 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
3106 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
3109 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
3110 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3111 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3113 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3114 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
3115 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
3116 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
3119 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
3120 irhs1
= adjust_bool_pattern (rhs1
, out_type
, irhs2
, stmts
);
3121 tstmt
= stmts
->pop ();
3122 gcc_assert (tstmt
== def_stmt
);
3123 stmts
->quick_push (stmt
);
3124 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
3125 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
3126 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
3127 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
3131 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3137 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3138 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
3140 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3141 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
3143 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
3144 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
3145 int out_prec
= TYPE_PRECISION (out_type
);
3146 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
3147 irhs2
= adjust_bool_pattern_cast (TREE_TYPE (irhs1
), rhs2
);
3148 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
3149 irhs1
= adjust_bool_pattern_cast (TREE_TYPE (irhs2
), rhs1
);
3152 irhs1
= adjust_bool_pattern_cast (out_type
, rhs1
);
3153 irhs2
= adjust_bool_pattern_cast (out_type
, rhs2
);
3156 itype
= TREE_TYPE (irhs1
);
3158 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3159 rhs_code
, irhs1
, irhs2
);
3163 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
3164 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
3165 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
))
3166 || (TYPE_PRECISION (TREE_TYPE (rhs1
))
3167 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1
)))))
3169 machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
3171 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3174 itype
= TREE_TYPE (rhs1
);
3175 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
3176 if (trueval
== NULL_TREE
)
3177 trueval
= build_int_cst (itype
, 1);
3179 gcc_checking_assert (useless_type_conversion_p (itype
,
3180 TREE_TYPE (trueval
)));
3182 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3183 COND_EXPR
, cond_expr
, trueval
,
3184 build_int_cst (itype
, 0));
3188 stmts
->safe_push (stmt
);
3189 gimple_set_location (pattern_stmt
, loc
);
3190 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
3191 return gimple_assign_lhs (pattern_stmt
);
3195 /* Function vect_recog_bool_pattern
3197 Try to find pattern like following:
3199 bool a_b, b_b, c_b, d_b, e_b;
3202 S1 a_b = x1 CMP1 y1;
3203 S2 b_b = x2 CMP2 y2;
3205 S4 d_b = x3 CMP3 y3;
3207 S6 f_T = (TYPE) e_b;
3209 where type 'TYPE' is an integral type. Or a similar pattern
3212 S6 f_Y = e_b ? r_Y : s_Y;
3214 as results from if-conversion of a complex condition.
3218 * LAST_STMT: A stmt at the end from which the pattern
3219 search begins, i.e. cast of a bool to
3224 * TYPE_IN: The type of the input arguments to the pattern.
3226 * TYPE_OUT: The type of the output of this pattern.
3228 * Return value: A new stmt that will be used to replace the pattern.
3230 Assuming size of TYPE is the same as size of all comparisons
3231 (otherwise some casts would be added where needed), the above
3232 sequence we create related pattern stmts:
3233 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3234 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3235 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3236 S5' e_T = c_T | d_T;
3239 Instead of the above S3' we could emit:
3240 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3241 S3' c_T = a_T | b_T;
3242 but the above is more efficient. */
3245 vect_recog_bool_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
3248 gimple last_stmt
= stmts
->pop ();
3249 enum tree_code rhs_code
;
3250 tree var
, lhs
, rhs
, vectype
;
3251 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
3252 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
3253 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
3254 gimple pattern_stmt
;
3256 if (!is_gimple_assign (last_stmt
))
3259 var
= gimple_assign_rhs1 (last_stmt
);
3260 lhs
= gimple_assign_lhs (last_stmt
);
3262 if ((TYPE_PRECISION (TREE_TYPE (var
)) != 1
3263 || !TYPE_UNSIGNED (TREE_TYPE (var
)))
3264 && TREE_CODE (TREE_TYPE (var
)) != BOOLEAN_TYPE
)
3267 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3268 if (CONVERT_EXPR_CODE_P (rhs_code
))
3270 if (TREE_CODE (TREE_TYPE (lhs
)) != INTEGER_TYPE
3271 || TYPE_PRECISION (TREE_TYPE (lhs
)) == 1)
3273 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3274 if (vectype
== NULL_TREE
)
3277 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
3280 rhs
= adjust_bool_pattern (var
, TREE_TYPE (lhs
), NULL_TREE
, stmts
);
3281 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3282 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3283 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3286 = gimple_build_assign (lhs
, NOP_EXPR
, rhs
);
3287 *type_out
= vectype
;
3289 stmts
->safe_push (last_stmt
);
3290 if (dump_enabled_p ())
3291 dump_printf_loc (MSG_NOTE
, vect_location
,
3292 "vect_recog_bool_pattern: detected:\n");
3294 return pattern_stmt
;
3296 else if (rhs_code
== COND_EXPR
3297 && TREE_CODE (var
) == SSA_NAME
)
3299 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3300 if (vectype
== NULL_TREE
)
3303 /* Build a scalar type for the boolean result that when
3304 vectorized matches the vector type of the result in
3305 size and number of elements. */
3307 = wi::udiv_trunc (TYPE_SIZE (vectype
),
3308 TYPE_VECTOR_SUBPARTS (vectype
)).to_uhwi ();
3310 = build_nonstandard_integer_type (prec
,
3311 TYPE_UNSIGNED (TREE_TYPE (var
)));
3312 if (get_vectype_for_scalar_type (type
) == NULL_TREE
)
3315 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
3318 rhs
= adjust_bool_pattern (var
, type
, NULL_TREE
, stmts
);
3319 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3321 = gimple_build_assign (lhs
, COND_EXPR
,
3322 build2 (NE_EXPR
, boolean_type_node
,
3323 rhs
, build_int_cst (type
, 0)),
3324 gimple_assign_rhs2 (last_stmt
),
3325 gimple_assign_rhs3 (last_stmt
));
3326 *type_out
= vectype
;
3328 stmts
->safe_push (last_stmt
);
3329 if (dump_enabled_p ())
3330 dump_printf_loc (MSG_NOTE
, vect_location
,
3331 "vect_recog_bool_pattern: detected:\n");
3333 return pattern_stmt
;
3335 else if (rhs_code
== SSA_NAME
3336 && STMT_VINFO_DATA_REF (stmt_vinfo
))
3338 stmt_vec_info pattern_stmt_info
;
3339 vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
3340 gcc_assert (vectype
!= NULL_TREE
);
3341 if (!VECTOR_MODE_P (TYPE_MODE (vectype
)))
3343 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
3346 rhs
= adjust_bool_pattern (var
, TREE_TYPE (vectype
), NULL_TREE
, stmts
);
3347 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
3348 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3350 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3351 gimple cast_stmt
= gimple_build_assign (rhs2
, NOP_EXPR
, rhs
);
3352 new_pattern_def_seq (stmt_vinfo
, cast_stmt
);
3355 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3356 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
,
3358 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3359 STMT_VINFO_DATA_REF (pattern_stmt_info
)
3360 = STMT_VINFO_DATA_REF (stmt_vinfo
);
3361 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info
)
3362 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo
);
3363 STMT_VINFO_DR_INIT (pattern_stmt_info
) = STMT_VINFO_DR_INIT (stmt_vinfo
);
3364 STMT_VINFO_DR_OFFSET (pattern_stmt_info
)
3365 = STMT_VINFO_DR_OFFSET (stmt_vinfo
);
3366 STMT_VINFO_DR_STEP (pattern_stmt_info
) = STMT_VINFO_DR_STEP (stmt_vinfo
);
3367 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info
)
3368 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo
);
3369 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo
)) = pattern_stmt
;
3370 *type_out
= vectype
;
3372 stmts
->safe_push (last_stmt
);
3373 if (dump_enabled_p ())
3374 dump_printf_loc (MSG_NOTE
, vect_location
,
3375 "vect_recog_bool_pattern: detected:\n");
3376 return pattern_stmt
;
3383 /* Mark statements that are involved in a pattern. */
3386 vect_mark_pattern_stmts (gimple orig_stmt
, gimple pattern_stmt
,
3387 tree pattern_vectype
)
3389 stmt_vec_info pattern_stmt_info
, def_stmt_info
;
3390 stmt_vec_info orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
3391 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (orig_stmt_info
);
3392 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (orig_stmt_info
);
3395 pattern_stmt_info
= vinfo_for_stmt (pattern_stmt
);
3396 if (pattern_stmt_info
== NULL
)
3398 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
,
3400 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3402 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt
));
3404 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt
;
3405 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
3406 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
3407 STMT_VINFO_VECTYPE (pattern_stmt_info
) = pattern_vectype
;
3408 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
3409 STMT_VINFO_RELATED_STMT (orig_stmt_info
) = pattern_stmt
;
3410 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
)
3411 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
3412 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
))
3414 gimple_stmt_iterator si
;
3415 for (si
= gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
));
3416 !gsi_end_p (si
); gsi_next (&si
))
3418 def_stmt
= gsi_stmt (si
);
3419 def_stmt_info
= vinfo_for_stmt (def_stmt
);
3420 if (def_stmt_info
== NULL
)
3422 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
,
3424 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
3426 gimple_set_bb (def_stmt
, gimple_bb (orig_stmt
));
3427 STMT_VINFO_RELATED_STMT (def_stmt_info
) = orig_stmt
;
3428 STMT_VINFO_DEF_TYPE (def_stmt_info
) = vect_internal_def
;
3429 if (STMT_VINFO_VECTYPE (def_stmt_info
) == NULL_TREE
)
3430 STMT_VINFO_VECTYPE (def_stmt_info
) = pattern_vectype
;
3435 /* Function vect_pattern_recog_1
3438 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3439 computation pattern.
3440 STMT: A stmt from which the pattern search should start.
3442 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3443 expression that computes the same functionality and can be used to
3444 replace the sequence of stmts that are involved in the pattern.
3447 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3448 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3449 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3450 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3451 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3452 to the available target pattern.
3454 This function also does some bookkeeping, as explained in the documentation
3455 for vect_recog_pattern. */
3458 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func
,
3459 gimple_stmt_iterator si
,
3460 vec
<gimple
> *stmts_to_replace
)
3462 gimple stmt
= gsi_stmt (si
), pattern_stmt
;
3463 stmt_vec_info stmt_info
;
3464 loop_vec_info loop_vinfo
;
3465 tree pattern_vectype
;
3466 tree type_in
, type_out
;
3467 enum tree_code code
;
3471 stmts_to_replace
->truncate (0);
3472 stmts_to_replace
->quick_push (stmt
);
3473 pattern_stmt
= (* vect_recog_func
) (stmts_to_replace
, &type_in
, &type_out
);
3477 stmt
= stmts_to_replace
->last ();
3478 stmt_info
= vinfo_for_stmt (stmt
);
3479 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3481 if (VECTOR_MODE_P (TYPE_MODE (type_in
)))
3483 /* No need to check target support (already checked by the pattern
3484 recognition function). */
3485 pattern_vectype
= type_out
? type_out
: type_in
;
3489 machine_mode vec_mode
;
3490 enum insn_code icode
;
3493 /* Check target support */
3494 type_in
= get_vectype_for_scalar_type (type_in
);
3498 type_out
= get_vectype_for_scalar_type (type_out
);
3503 pattern_vectype
= type_out
;
3505 if (is_gimple_assign (pattern_stmt
))
3506 code
= gimple_assign_rhs_code (pattern_stmt
);
3509 gcc_assert (is_gimple_call (pattern_stmt
));
3513 optab
= optab_for_tree_code (code
, type_in
, optab_default
);
3514 vec_mode
= TYPE_MODE (type_in
);
3516 || (icode
= optab_handler (optab
, vec_mode
)) == CODE_FOR_nothing
3517 || (insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (type_out
)))
3521 /* Found a vectorizable pattern. */
3522 if (dump_enabled_p ())
3524 dump_printf_loc (MSG_NOTE
, vect_location
,
3525 "pattern recognized: ");
3526 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3529 /* Mark the stmts that are involved in the pattern. */
3530 vect_mark_pattern_stmts (stmt
, pattern_stmt
, pattern_vectype
);
3532 /* Patterns cannot be vectorized using SLP, because they change the order of
3535 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
, next
)
3537 LOOP_VINFO_REDUCTIONS (loop_vinfo
).ordered_remove (i
);
3539 /* It is possible that additional pattern stmts are created and inserted in
3540 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3541 relevant statements. */
3542 for (i
= 0; stmts_to_replace
->iterate (i
, &stmt
)
3543 && (unsigned) i
< (stmts_to_replace
->length () - 1);
3546 stmt_info
= vinfo_for_stmt (stmt
);
3547 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
3548 if (dump_enabled_p ())
3550 dump_printf_loc (MSG_NOTE
, vect_location
,
3551 "additional pattern stmt: ");
3552 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3555 vect_mark_pattern_stmts (stmt
, pattern_stmt
, NULL_TREE
);
3560 /* Function vect_pattern_recog
3563 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3566 Output - for each computation idiom that is detected we create a new stmt
3567 that provides the same functionality and that can be vectorized. We
3568 also record some information in the struct_stmt_info of the relevant
3569 stmts, as explained below:
3571 At the entry to this function we have the following stmts, with the
3572 following initial value in the STMT_VINFO fields:
3574 stmt in_pattern_p related_stmt vec_stmt
3575 S1: a_i = .... - - -
3576 S2: a_2 = ..use(a_i).. - - -
3577 S3: a_1 = ..use(a_2).. - - -
3578 S4: a_0 = ..use(a_1).. - - -
3579 S5: ... = ..use(a_0).. - - -
3581 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3582 represented by a single stmt. We then:
3583 - create a new stmt S6 equivalent to the pattern (the stmt is not
3584 inserted into the code)
3585 - fill in the STMT_VINFO fields as follows:
3587 in_pattern_p related_stmt vec_stmt
3588 S1: a_i = .... - - -
3589 S2: a_2 = ..use(a_i).. - - -
3590 S3: a_1 = ..use(a_2).. - - -
3591 S4: a_0 = ..use(a_1).. true S6 -
3592 '---> S6: a_new = .... - S4 -
3593 S5: ... = ..use(a_0).. - - -
3595 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3596 to each other through the RELATED_STMT field).
3598 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3599 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3600 remain irrelevant unless used by stmts other than S4.
3602 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3603 (because they are marked as irrelevant). It will vectorize S6, and record
3604 a pointer to the new vector stmt VS6 from S6 (as usual).
3605 S4 will be skipped, and S5 will be vectorized as usual:
3607 in_pattern_p related_stmt vec_stmt
3608 S1: a_i = .... - - -
3609 S2: a_2 = ..use(a_i).. - - -
3610 S3: a_1 = ..use(a_2).. - - -
3611 > VS6: va_new = .... - - -
3612 S4: a_0 = ..use(a_1).. true S6 VS6
3613 '---> S6: a_new = .... - S4 VS6
3614 > VS5: ... = ..vuse(va_new).. - - -
3615 S5: ... = ..use(a_0).. - - -
3617 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3618 elsewhere), and we'll end up with:
3621 VS5: ... = ..vuse(va_new)..
3623 In case of more than one pattern statements, e.g., widen-mult with
3627 S2 a_T = (TYPE) a_t;
3628 '--> S3: a_it = (interm_type) a_t;
3629 S4 prod_T = a_T * CONST;
3630 '--> S5: prod_T' = a_it w* CONST;
3632 there may be other users of a_T outside the pattern. In that case S2 will
3633 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3634 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3635 be recorded in S3. */
3638 vect_pattern_recog (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
3643 gimple_stmt_iterator si
;
3645 vect_recog_func_ptr vect_recog_func
;
3646 auto_vec
<gimple
, 1> stmts_to_replace
;
3649 if (dump_enabled_p ())
3650 dump_printf_loc (MSG_NOTE
, vect_location
,
3651 "=== vect_pattern_recog ===\n");
3655 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3656 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
3657 nbbs
= loop
->num_nodes
;
3661 bbs
= &BB_VINFO_BB (bb_vinfo
);
3665 /* Scan through the loop stmts, applying the pattern recognition
3666 functions starting at each stmt visited: */
3667 for (i
= 0; i
< nbbs
; i
++)
3669 basic_block bb
= bbs
[i
];
3670 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
3672 if (bb_vinfo
&& (stmt
= gsi_stmt (si
))
3673 && vinfo_for_stmt (stmt
)
3674 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
3677 /* Scan over all generic vect_recog_xxx_pattern functions. */
3678 for (j
= 0; j
< NUM_PATTERNS
; j
++)
3680 vect_recog_func
= vect_vect_recog_func_ptrs
[j
];
3681 vect_pattern_recog_1 (vect_recog_func
, si
,