1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "fold-const.h"
31 #include "stor-layout.h"
33 #include "dominance.h"
34 #include "gimple-pretty-print.h"
35 #include "internal-fn.h"
38 #include "gimple-iterator.h"
41 #include "insn-config.h"
50 #include "insn-codes.h"
53 #include "tree-data-ref.h"
54 #include "tree-vectorizer.h"
55 #include "recog.h" /* FIXME: for insn_data */
56 #include "diagnostic-core.h"
60 /* Pattern recognition functions */
61 static gimple
vect_recog_widen_sum_pattern (vec
<gimple
> *, tree
*,
63 static gimple
vect_recog_widen_mult_pattern (vec
<gimple
> *, tree
*,
65 static gimple
vect_recog_dot_prod_pattern (vec
<gimple
> *, tree
*,
67 static gimple
vect_recog_sad_pattern (vec
<gimple
> *, tree
*,
69 static gimple
vect_recog_pow_pattern (vec
<gimple
> *, tree
*, tree
*);
70 static gimple
vect_recog_over_widening_pattern (vec
<gimple
> *, tree
*,
72 static gimple
vect_recog_widen_shift_pattern (vec
<gimple
> *,
74 static gimple
vect_recog_rotate_pattern (vec
<gimple
> *, tree
*, tree
*);
75 static gimple
vect_recog_vector_vector_shift_pattern (vec
<gimple
> *,
77 static gimple
vect_recog_divmod_pattern (vec
<gimple
> *,
80 static gimple
vect_recog_mult_pattern (vec
<gimple
> *,
83 static gimple
vect_recog_mixed_size_cond_pattern (vec
<gimple
> *,
85 static gimple
vect_recog_bool_pattern (vec
<gimple
> *, tree
*, tree
*);
86 static vect_recog_func_ptr vect_vect_recog_func_ptrs
[NUM_PATTERNS
] = {
87 vect_recog_widen_mult_pattern
,
88 vect_recog_widen_sum_pattern
,
89 vect_recog_dot_prod_pattern
,
90 vect_recog_sad_pattern
,
91 vect_recog_pow_pattern
,
92 vect_recog_widen_shift_pattern
,
93 vect_recog_over_widening_pattern
,
94 vect_recog_rotate_pattern
,
95 vect_recog_vector_vector_shift_pattern
,
96 vect_recog_divmod_pattern
,
97 vect_recog_mult_pattern
,
98 vect_recog_mixed_size_cond_pattern
,
99 vect_recog_bool_pattern
};
102 append_pattern_def_seq (stmt_vec_info stmt_info
, gimple stmt
)
104 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
109 new_pattern_def_seq (stmt_vec_info stmt_info
, gimple stmt
)
111 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
112 append_pattern_def_seq (stmt_info
, stmt
);
115 /* Check whether STMT2 is in the same loop or basic block as STMT1.
116 Which of the two applies depends on whether we're currently doing
117 loop-based or basic-block-based vectorization, as determined by
118 the vinfo_for_stmt for STMT1 (which must be defined).
120 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
121 to be defined as well. */
124 vect_same_loop_or_bb_p (gimple stmt1
, gimple stmt2
)
126 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt1
);
127 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
128 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
130 if (!gimple_bb (stmt2
))
135 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
136 if (!flow_bb_inside_loop_p (loop
, gimple_bb (stmt2
)))
141 if (gimple_bb (stmt2
) != BB_VINFO_BB (bb_vinfo
)
142 || gimple_code (stmt2
) == GIMPLE_PHI
)
146 gcc_assert (vinfo_for_stmt (stmt2
));
150 /* If the LHS of DEF_STMT has a single use, and that statement is
151 in the same loop or basic block, return it. */
154 vect_single_imm_use (gimple def_stmt
)
156 tree lhs
= gimple_assign_lhs (def_stmt
);
160 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
163 if (!vect_same_loop_or_bb_p (def_stmt
, use_stmt
))
169 /* Check whether NAME, an ssa-name used in USE_STMT,
170 is a result of a type promotion, such that:
171 DEF_STMT: NAME = NOP (name0)
172 If CHECK_SIGN is TRUE, check that either both types are signed or both are
176 type_conversion_p (tree name
, gimple use_stmt
, bool check_sign
,
177 tree
*orig_type
, gimple
*def_stmt
, bool *promotion
)
181 loop_vec_info loop_vinfo
;
182 stmt_vec_info stmt_vinfo
;
183 tree type
= TREE_TYPE (name
);
185 enum vect_def_type dt
;
187 bb_vec_info bb_vinfo
;
189 stmt_vinfo
= vinfo_for_stmt (use_stmt
);
190 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
191 bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
192 if (!vect_is_simple_use (name
, use_stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
196 if (dt
!= vect_internal_def
197 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
203 if (!is_gimple_assign (*def_stmt
))
206 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt
)))
209 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
211 *orig_type
= TREE_TYPE (oprnd0
);
212 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*orig_type
)
213 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*orig_type
)) && check_sign
))
216 if (TYPE_PRECISION (type
) >= (TYPE_PRECISION (*orig_type
) * 2))
221 if (!vect_is_simple_use (oprnd0
, *def_stmt
, loop_vinfo
,
222 bb_vinfo
, &dummy_gimple
, &dummy
, &dt
))
228 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
229 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
232 vect_recog_temp_ssa_var (tree type
, gimple stmt
)
234 return make_temp_ssa_name (type
, stmt
, "patt");
237 /* Function vect_recog_dot_prod_pattern
239 Try to find the following pattern:
245 sum_0 = phi <init, sum_1>
248 S3 x_T = (TYPE1) x_t;
249 S4 y_T = (TYPE1) y_t;
251 [S6 prod = (TYPE2) prod; #optional]
252 S7 sum_1 = prod + sum_0;
254 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
255 same size of 'TYPE1' or bigger. This is a special case of a reduction
260 * STMTS: Contains a stmt from which the pattern search begins. In the
261 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
266 * TYPE_IN: The type of the input arguments to the pattern.
268 * TYPE_OUT: The type of the output of this pattern.
270 * Return value: A new stmt that will be used to replace the sequence of
271 stmts that constitute the pattern. In this case it will be:
272 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
274 Note: The dot-prod idiom is a widening reduction pattern that is
275 vectorized without preserving all the intermediate results. It
276 produces only N/2 (widened) results (by summing up pairs of
277 intermediate results) rather than all N results. Therefore, we
278 cannot allow this pattern when we want to get all the results and in
279 the correct order (as is the case when this computation is in an
280 inner-loop nested in an outer-loop that us being vectorized). */
283 vect_recog_dot_prod_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
286 gimple stmt
, last_stmt
= (*stmts
)[0];
288 tree oprnd00
, oprnd01
;
289 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
290 tree type
, half_type
;
293 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
301 loop
= LOOP_VINFO_LOOP (loop_info
);
303 /* We don't allow changing the order of the computation in the inner-loop
304 when doing outer-loop vectorization. */
305 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
308 if (!is_gimple_assign (last_stmt
))
311 type
= gimple_expr_type (last_stmt
);
313 /* Look for the following pattern
317 DDPROD = (TYPE2) DPROD;
318 sum_1 = DDPROD + sum_0;
320 - DX is double the size of X
321 - DY is double the size of Y
322 - DX, DY, DPROD all have the same type
323 - sum is the same size of DPROD or bigger
324 - sum has been recognized as a reduction variable.
326 This is equivalent to:
327 DPROD = X w* Y; #widen mult
328 sum_1 = DPROD w+ sum_0; #widen summation
330 DPROD = X w* Y; #widen mult
331 sum_1 = DPROD + sum_0; #summation
334 /* Starting from LAST_STMT, follow the defs of its uses in search
335 of the above pattern. */
337 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
340 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
342 /* Has been detected as widening-summation? */
344 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
345 type
= gimple_expr_type (stmt
);
346 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
348 oprnd0
= gimple_assign_rhs1 (stmt
);
349 oprnd1
= gimple_assign_rhs2 (stmt
);
350 half_type
= TREE_TYPE (oprnd0
);
356 oprnd0
= gimple_assign_rhs1 (last_stmt
);
357 oprnd1
= gimple_assign_rhs2 (last_stmt
);
358 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
359 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
363 if (type_conversion_p (oprnd0
, stmt
, true, &half_type
, &def_stmt
,
368 oprnd0
= gimple_assign_rhs1 (stmt
);
374 /* So far so good. Since last_stmt was detected as a (summation) reduction,
375 we know that oprnd1 is the reduction variable (defined by a loop-header
376 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
377 Left to check that oprnd0 is defined by a (widen_)mult_expr */
378 if (TREE_CODE (oprnd0
) != SSA_NAME
)
381 prod_type
= half_type
;
382 stmt
= SSA_NAME_DEF_STMT (oprnd0
);
384 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
385 if (!gimple_bb (stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
388 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
389 inside the loop (in case we are analyzing an outer-loop). */
390 if (!is_gimple_assign (stmt
))
392 stmt_vinfo
= vinfo_for_stmt (stmt
);
393 gcc_assert (stmt_vinfo
);
394 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
396 if (gimple_assign_rhs_code (stmt
) != MULT_EXPR
)
398 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
400 /* Has been detected as a widening multiplication? */
402 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
403 if (gimple_assign_rhs_code (stmt
) != WIDEN_MULT_EXPR
)
405 stmt_vinfo
= vinfo_for_stmt (stmt
);
406 gcc_assert (stmt_vinfo
);
407 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_internal_def
);
408 oprnd00
= gimple_assign_rhs1 (stmt
);
409 oprnd01
= gimple_assign_rhs2 (stmt
);
410 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt
))
411 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
);
415 tree half_type0
, half_type1
;
419 oprnd0
= gimple_assign_rhs1 (stmt
);
420 oprnd1
= gimple_assign_rhs2 (stmt
);
421 if (!types_compatible_p (TREE_TYPE (oprnd0
), prod_type
)
422 || !types_compatible_p (TREE_TYPE (oprnd1
), prod_type
))
424 if (!type_conversion_p (oprnd0
, stmt
, true, &half_type0
, &def_stmt
,
428 oprnd00
= gimple_assign_rhs1 (def_stmt
);
429 if (!type_conversion_p (oprnd1
, stmt
, true, &half_type1
, &def_stmt
,
433 oprnd01
= gimple_assign_rhs1 (def_stmt
);
434 if (!types_compatible_p (half_type0
, half_type1
))
436 if (TYPE_PRECISION (prod_type
) != TYPE_PRECISION (half_type0
) * 2)
440 half_type
= TREE_TYPE (oprnd00
);
441 *type_in
= half_type
;
444 /* Pattern detected. Create a stmt to be used to replace the pattern: */
445 var
= vect_recog_temp_ssa_var (type
, NULL
);
446 pattern_stmt
= gimple_build_assign (var
, DOT_PROD_EXPR
,
447 oprnd00
, oprnd01
, oprnd1
);
449 if (dump_enabled_p ())
451 dump_printf_loc (MSG_NOTE
, vect_location
,
452 "vect_recog_dot_prod_pattern: detected: ");
453 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
454 dump_printf (MSG_NOTE
, "\n");
461 /* Function vect_recog_sad_pattern
463 Try to find the following Sum of Absolute Difference (SAD) pattern:
466 signed TYPE1 diff, abs_diff;
469 sum_0 = phi <init, sum_1>
472 S3 x_T = (TYPE1) x_t;
473 S4 y_T = (TYPE1) y_t;
475 S6 abs_diff = ABS_EXPR <diff>;
476 [S7 abs_diff = (TYPE2) abs_diff; #optional]
477 S8 sum_1 = abs_diff + sum_0;
479 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
480 same size of 'TYPE1' or bigger. This is a special case of a reduction
485 * STMTS: Contains a stmt from which the pattern search begins. In the
486 example, when this function is called with S8, the pattern
487 {S3,S4,S5,S6,S7,S8} will be detected.
491 * TYPE_IN: The type of the input arguments to the pattern.
493 * TYPE_OUT: The type of the output of this pattern.
495 * Return value: A new stmt that will be used to replace the sequence of
496 stmts that constitute the pattern. In this case it will be:
497 SAD_EXPR <x_t, y_t, sum_0>
501 vect_recog_sad_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
504 gimple last_stmt
= (*stmts
)[0];
505 tree sad_oprnd0
, sad_oprnd1
;
506 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
508 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
515 loop
= LOOP_VINFO_LOOP (loop_info
);
517 /* We don't allow changing the order of the computation in the inner-loop
518 when doing outer-loop vectorization. */
519 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
522 if (!is_gimple_assign (last_stmt
))
525 tree sum_type
= gimple_expr_type (last_stmt
);
527 /* Look for the following pattern
531 DAD = ABS_EXPR <DDIFF>;
532 DDPROD = (TYPE2) DPROD;
535 - DX is at least double the size of X
536 - DY is at least double the size of Y
537 - DX, DY, DDIFF, DAD all have the same type
538 - sum is the same size of DAD or bigger
539 - sum has been recognized as a reduction variable.
541 This is equivalent to:
542 DDIFF = X w- Y; #widen sub
543 DAD = ABS_EXPR <DDIFF>;
544 sum_1 = DAD w+ sum_0; #widen summation
546 DDIFF = X w- Y; #widen sub
547 DAD = ABS_EXPR <DDIFF>;
548 sum_1 = DAD + sum_0; #summation
551 /* Starting from LAST_STMT, follow the defs of its uses in search
552 of the above pattern. */
554 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
557 tree plus_oprnd0
, plus_oprnd1
;
559 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
561 /* Has been detected as widening-summation? */
563 gimple stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
564 sum_type
= gimple_expr_type (stmt
);
565 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
567 plus_oprnd0
= gimple_assign_rhs1 (stmt
);
568 plus_oprnd1
= gimple_assign_rhs2 (stmt
);
569 half_type
= TREE_TYPE (plus_oprnd0
);
575 plus_oprnd0
= gimple_assign_rhs1 (last_stmt
);
576 plus_oprnd1
= gimple_assign_rhs2 (last_stmt
);
577 if (!types_compatible_p (TREE_TYPE (plus_oprnd0
), sum_type
)
578 || !types_compatible_p (TREE_TYPE (plus_oprnd1
), sum_type
))
581 /* The type conversion could be promotion, demotion,
582 or just signed -> unsigned. */
583 if (type_conversion_p (plus_oprnd0
, last_stmt
, false,
584 &half_type
, &def_stmt
, &promotion
))
585 plus_oprnd0
= gimple_assign_rhs1 (def_stmt
);
587 half_type
= sum_type
;
590 /* So far so good. Since last_stmt was detected as a (summation) reduction,
591 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
592 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
593 Then check that plus_oprnd0 is defined by an abs_expr. */
595 if (TREE_CODE (plus_oprnd0
) != SSA_NAME
)
598 tree abs_type
= half_type
;
599 gimple abs_stmt
= SSA_NAME_DEF_STMT (plus_oprnd0
);
601 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
602 if (!gimple_bb (abs_stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (abs_stmt
)))
605 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
606 inside the loop (in case we are analyzing an outer-loop). */
607 if (!is_gimple_assign (abs_stmt
))
610 stmt_vec_info abs_stmt_vinfo
= vinfo_for_stmt (abs_stmt
);
611 gcc_assert (abs_stmt_vinfo
);
612 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo
) != vect_internal_def
)
614 if (gimple_assign_rhs_code (abs_stmt
) != ABS_EXPR
)
617 tree abs_oprnd
= gimple_assign_rhs1 (abs_stmt
);
618 if (!types_compatible_p (TREE_TYPE (abs_oprnd
), abs_type
))
620 if (TYPE_UNSIGNED (abs_type
))
623 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
625 if (TREE_CODE (abs_oprnd
) != SSA_NAME
)
628 gimple diff_stmt
= SSA_NAME_DEF_STMT (abs_oprnd
);
630 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
631 if (!gimple_bb (diff_stmt
)
632 || !flow_bb_inside_loop_p (loop
, gimple_bb (diff_stmt
)))
635 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
636 inside the loop (in case we are analyzing an outer-loop). */
637 if (!is_gimple_assign (diff_stmt
))
640 stmt_vec_info diff_stmt_vinfo
= vinfo_for_stmt (diff_stmt
);
641 gcc_assert (diff_stmt_vinfo
);
642 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo
) != vect_internal_def
)
644 if (gimple_assign_rhs_code (diff_stmt
) != MINUS_EXPR
)
647 tree half_type0
, half_type1
;
650 tree minus_oprnd0
= gimple_assign_rhs1 (diff_stmt
);
651 tree minus_oprnd1
= gimple_assign_rhs2 (diff_stmt
);
653 if (!types_compatible_p (TREE_TYPE (minus_oprnd0
), abs_type
)
654 || !types_compatible_p (TREE_TYPE (minus_oprnd1
), abs_type
))
656 if (!type_conversion_p (minus_oprnd0
, diff_stmt
, false,
657 &half_type0
, &def_stmt
, &promotion
)
660 sad_oprnd0
= gimple_assign_rhs1 (def_stmt
);
662 if (!type_conversion_p (minus_oprnd1
, diff_stmt
, false,
663 &half_type1
, &def_stmt
, &promotion
)
666 sad_oprnd1
= gimple_assign_rhs1 (def_stmt
);
668 if (!types_compatible_p (half_type0
, half_type1
))
670 if (TYPE_PRECISION (abs_type
) < TYPE_PRECISION (half_type0
) * 2
671 || TYPE_PRECISION (sum_type
) < TYPE_PRECISION (half_type0
) * 2)
674 *type_in
= TREE_TYPE (sad_oprnd0
);
675 *type_out
= sum_type
;
677 /* Pattern detected. Create a stmt to be used to replace the pattern: */
678 tree var
= vect_recog_temp_ssa_var (sum_type
, NULL
);
679 gimple pattern_stmt
= gimple_build_assign (var
, SAD_EXPR
, sad_oprnd0
,
680 sad_oprnd1
, plus_oprnd1
);
682 if (dump_enabled_p ())
684 dump_printf_loc (MSG_NOTE
, vect_location
,
685 "vect_recog_sad_pattern: detected: ");
686 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
687 dump_printf (MSG_NOTE
, "\n");
694 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
697 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
698 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
700 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
701 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
702 that satisfies the above restrictions, we can perform a widening opeartion
703 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
704 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
707 vect_handle_widen_op_by_const (gimple stmt
, enum tree_code code
,
708 tree const_oprnd
, tree
*oprnd
,
709 gimple
*wstmt
, tree type
,
710 tree
*half_type
, gimple def_stmt
)
712 tree new_type
, new_oprnd
;
714 if (code
!= MULT_EXPR
&& code
!= LSHIFT_EXPR
)
717 if (((code
== MULT_EXPR
&& int_fits_type_p (const_oprnd
, *half_type
))
718 || (code
== LSHIFT_EXPR
719 && compare_tree_int (const_oprnd
, TYPE_PRECISION (*half_type
))
721 && TYPE_PRECISION (type
) == (TYPE_PRECISION (*half_type
) * 2))
723 /* CONST_OPRND is a constant of HALF_TYPE. */
724 *oprnd
= gimple_assign_rhs1 (def_stmt
);
728 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (*half_type
) * 4))
731 if (!vect_same_loop_or_bb_p (stmt
, def_stmt
))
734 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
735 a type 2 times bigger than HALF_TYPE. */
736 new_type
= build_nonstandard_integer_type (TYPE_PRECISION (type
) / 2,
737 TYPE_UNSIGNED (type
));
738 if ((code
== MULT_EXPR
&& !int_fits_type_p (const_oprnd
, new_type
))
739 || (code
== LSHIFT_EXPR
740 && compare_tree_int (const_oprnd
, TYPE_PRECISION (new_type
)) == 1))
743 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
744 *oprnd
= gimple_assign_rhs1 (def_stmt
);
745 new_oprnd
= make_ssa_name (new_type
);
746 *wstmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, *oprnd
);
749 *half_type
= new_type
;
754 /* Function vect_recog_widen_mult_pattern
756 Try to find the following pattern:
760 TYPE a_T, b_T, prod_T;
766 S5 prod_T = a_T * b_T;
768 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
770 Also detect unsigned cases:
774 unsigned TYPE u_prod_T;
775 TYPE a_T, b_T, prod_T;
781 S5 prod_T = a_T * b_T;
782 S6 u_prod_T = (unsigned TYPE) prod_T;
784 and multiplication by constants:
791 S5 prod_T = a_T * CONST;
793 A special case of multiplication by constants is when 'TYPE' is 4 times
794 bigger than 'type', but CONST fits an intermediate type 2 times smaller
795 than 'TYPE'. In that case we create an additional pattern stmt for S3
796 to create a variable of the intermediate type, and perform widen-mult
797 on the intermediate type as well:
801 TYPE a_T, prod_T, prod_T';
805 '--> a_it = (interm_type) a_t;
806 S5 prod_T = a_T * CONST;
807 '--> prod_T' = a_it w* CONST;
811 * STMTS: Contains a stmt from which the pattern search begins. In the
812 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
813 is detected. In case of unsigned widen-mult, the original stmt (S5) is
814 replaced with S6 in STMTS. In case of multiplication by a constant
815 of an intermediate type (the last case above), STMTS also contains S3
816 (inserted before S5).
820 * TYPE_IN: The type of the input arguments to the pattern.
822 * TYPE_OUT: The type of the output of this pattern.
824 * Return value: A new stmt that will be used to replace the sequence of
825 stmts that constitute the pattern. In this case it will be:
826 WIDEN_MULT <a_t, b_t>
827 If the result of WIDEN_MULT needs to be converted to a larger type, the
828 returned stmt will be this type conversion stmt.
832 vect_recog_widen_mult_pattern (vec
<gimple
> *stmts
,
833 tree
*type_in
, tree
*type_out
)
835 gimple last_stmt
= stmts
->pop ();
836 gimple def_stmt0
, def_stmt1
;
838 tree type
, half_type0
, half_type1
;
839 gimple new_stmt
= NULL
, pattern_stmt
= NULL
;
840 tree vectype
, vecitype
;
842 enum tree_code dummy_code
;
848 if (!is_gimple_assign (last_stmt
))
851 type
= gimple_expr_type (last_stmt
);
853 /* Starting from LAST_STMT, follow the defs of its uses in search
854 of the above pattern. */
856 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
859 oprnd0
= gimple_assign_rhs1 (last_stmt
);
860 oprnd1
= gimple_assign_rhs2 (last_stmt
);
861 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
862 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
865 /* Check argument 0. */
866 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
870 /* Check argument 1. */
871 op1_ok
= type_conversion_p (oprnd1
, last_stmt
, false, &half_type1
,
872 &def_stmt1
, &promotion
);
874 if (op1_ok
&& promotion
)
876 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
877 oprnd1
= gimple_assign_rhs1 (def_stmt1
);
881 if (TREE_CODE (oprnd1
) == INTEGER_CST
882 && TREE_CODE (half_type0
) == INTEGER_TYPE
883 && vect_handle_widen_op_by_const (last_stmt
, MULT_EXPR
, oprnd1
,
884 &oprnd0
, &new_stmt
, type
,
885 &half_type0
, def_stmt0
))
887 half_type1
= half_type0
;
888 oprnd1
= fold_convert (half_type1
, oprnd1
);
894 /* If the two arguments have different sizes, convert the one with
895 the smaller type into the larger type. */
896 if (TYPE_PRECISION (half_type0
) != TYPE_PRECISION (half_type1
))
898 /* If we already used up the single-stmt slot give up. */
903 gimple def_stmt
= NULL
;
905 if (TYPE_PRECISION (half_type0
) < TYPE_PRECISION (half_type1
))
907 def_stmt
= def_stmt0
;
908 half_type0
= half_type1
;
913 def_stmt
= def_stmt1
;
914 half_type1
= half_type0
;
918 tree old_oprnd
= gimple_assign_rhs1 (def_stmt
);
919 tree new_oprnd
= make_ssa_name (half_type0
);
920 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, old_oprnd
);
924 /* Handle unsigned case. Look for
925 S6 u_prod_T = (unsigned TYPE) prod_T;
926 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
927 if (TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type0
))
933 if (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (half_type1
))
936 use_stmt
= vect_single_imm_use (last_stmt
);
937 if (!use_stmt
|| !is_gimple_assign (use_stmt
)
938 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
941 use_lhs
= gimple_assign_lhs (use_stmt
);
942 use_type
= TREE_TYPE (use_lhs
);
943 if (!INTEGRAL_TYPE_P (use_type
)
944 || (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (use_type
))
945 || (TYPE_PRECISION (type
) != TYPE_PRECISION (use_type
)))
949 last_stmt
= use_stmt
;
952 if (!types_compatible_p (half_type0
, half_type1
))
955 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
956 to get an intermediate result of type ITYPE. In this case we need
957 to build a statement to convert this intermediate result to type TYPE. */
959 if (TYPE_PRECISION (type
) > TYPE_PRECISION (half_type0
) * 2)
960 itype
= build_nonstandard_integer_type
961 (GET_MODE_BITSIZE (TYPE_MODE (half_type0
)) * 2,
962 TYPE_UNSIGNED (type
));
964 /* Pattern detected. */
965 if (dump_enabled_p ())
966 dump_printf_loc (MSG_NOTE
, vect_location
,
967 "vect_recog_widen_mult_pattern: detected:\n");
969 /* Check target support */
970 vectype
= get_vectype_for_scalar_type (half_type0
);
971 vecitype
= get_vectype_for_scalar_type (itype
);
974 || !supportable_widening_operation (WIDEN_MULT_EXPR
, last_stmt
,
976 &dummy_code
, &dummy_code
,
977 &dummy_int
, &dummy_vec
))
981 *type_out
= get_vectype_for_scalar_type (type
);
983 /* Pattern supported. Create a stmt to be used to replace the pattern: */
984 var
= vect_recog_temp_ssa_var (itype
, NULL
);
985 pattern_stmt
= gimple_build_assign (var
, WIDEN_MULT_EXPR
, oprnd0
, oprnd1
);
987 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
988 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
989 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
990 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
992 /* If the original two operands have different sizes, we may need to convert
993 the smaller one into the larget type. If this is the case, at this point
994 the new stmt is already built. */
997 append_pattern_def_seq (stmt_vinfo
, new_stmt
);
998 stmt_vec_info new_stmt_info
999 = new_stmt_vec_info (new_stmt
, loop_vinfo
, bb_vinfo
);
1000 set_vinfo_for_stmt (new_stmt
, new_stmt_info
);
1001 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
1004 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
1005 the result of the widen-mult operation into type TYPE. */
1008 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
1009 stmt_vec_info pattern_stmt_info
1010 = new_stmt_vec_info (pattern_stmt
, loop_vinfo
, bb_vinfo
);
1011 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
1012 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vecitype
;
1013 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
1015 gimple_assign_lhs (pattern_stmt
));
1018 if (dump_enabled_p ())
1019 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1021 stmts
->safe_push (last_stmt
);
1022 return pattern_stmt
;
1026 /* Function vect_recog_pow_pattern
1028 Try to find the following pattern:
1032 with POW being one of pow, powf, powi, powif and N being
1037 * LAST_STMT: A stmt from which the pattern search begins.
1041 * TYPE_IN: The type of the input arguments to the pattern.
1043 * TYPE_OUT: The type of the output of this pattern.
1045 * Return value: A new stmt that will be used to replace the sequence of
1046 stmts that constitute the pattern. In this case it will be:
1053 vect_recog_pow_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
1056 gimple last_stmt
= (*stmts
)[0];
1057 tree fn
, base
, exp
= NULL
;
1061 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
1064 fn
= gimple_call_fndecl (last_stmt
);
1065 if (fn
== NULL_TREE
|| DECL_BUILT_IN_CLASS (fn
) != BUILT_IN_NORMAL
)
1068 switch (DECL_FUNCTION_CODE (fn
))
1070 case BUILT_IN_POWIF
:
1074 base
= gimple_call_arg (last_stmt
, 0);
1075 exp
= gimple_call_arg (last_stmt
, 1);
1076 if (TREE_CODE (exp
) != REAL_CST
1077 && TREE_CODE (exp
) != INTEGER_CST
)
1085 /* We now have a pow or powi builtin function call with a constant
1088 *type_out
= NULL_TREE
;
1090 /* Catch squaring. */
1091 if ((tree_fits_shwi_p (exp
)
1092 && tree_to_shwi (exp
) == 2)
1093 || (TREE_CODE (exp
) == REAL_CST
1094 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconst2
)))
1096 *type_in
= TREE_TYPE (base
);
1098 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1099 stmt
= gimple_build_assign (var
, MULT_EXPR
, base
, base
);
1103 /* Catch square root. */
1104 if (TREE_CODE (exp
) == REAL_CST
1105 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconsthalf
))
1107 tree newfn
= mathfn_built_in (TREE_TYPE (base
), BUILT_IN_SQRT
);
1108 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (base
));
1111 gcall
*stmt
= gimple_build_call (newfn
, 1, base
);
1112 if (vectorizable_function (stmt
, *type_in
, *type_in
)
1115 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
1116 gimple_call_set_lhs (stmt
, var
);
1126 /* Function vect_recog_widen_sum_pattern
1128 Try to find the following pattern:
1131 TYPE x_T, sum = init;
1133 sum_0 = phi <init, sum_1>
1135 S2 x_T = (TYPE) x_t;
1136 S3 sum_1 = x_T + sum_0;
1138 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1139 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1140 a special case of a reduction computation.
1144 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1145 when this function is called with S3, the pattern {S2,S3} will be detected.
1149 * TYPE_IN: The type of the input arguments to the pattern.
1151 * TYPE_OUT: The type of the output of this pattern.
1153 * Return value: A new stmt that will be used to replace the sequence of
1154 stmts that constitute the pattern. In this case it will be:
1155 WIDEN_SUM <x_t, sum_0>
1157 Note: The widening-sum idiom is a widening reduction pattern that is
1158 vectorized without preserving all the intermediate results. It
1159 produces only N/2 (widened) results (by summing up pairs of
1160 intermediate results) rather than all N results. Therefore, we
1161 cannot allow this pattern when we want to get all the results and in
1162 the correct order (as is the case when this computation is in an
1163 inner-loop nested in an outer-loop that us being vectorized). */
1166 vect_recog_widen_sum_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
1169 gimple stmt
, last_stmt
= (*stmts
)[0];
1170 tree oprnd0
, oprnd1
;
1171 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1172 tree type
, half_type
;
1173 gimple pattern_stmt
;
1174 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1182 loop
= LOOP_VINFO_LOOP (loop_info
);
1184 /* We don't allow changing the order of the computation in the inner-loop
1185 when doing outer-loop vectorization. */
1186 if (loop
&& nested_in_vect_loop_p (loop
, last_stmt
))
1189 if (!is_gimple_assign (last_stmt
))
1192 type
= gimple_expr_type (last_stmt
);
1194 /* Look for the following pattern
1197 In which DX is at least double the size of X, and sum_1 has been
1198 recognized as a reduction variable.
1201 /* Starting from LAST_STMT, follow the defs of its uses in search
1202 of the above pattern. */
1204 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
1207 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1208 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1209 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
1210 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
1213 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1214 we know that oprnd1 is the reduction variable (defined by a loop-header
1215 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1216 Left to check that oprnd0 is defined by a cast from type 'type' to type
1219 if (!type_conversion_p (oprnd0
, last_stmt
, true, &half_type
, &stmt
,
1224 oprnd0
= gimple_assign_rhs1 (stmt
);
1225 *type_in
= half_type
;
1228 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1229 var
= vect_recog_temp_ssa_var (type
, NULL
);
1230 pattern_stmt
= gimple_build_assign (var
, WIDEN_SUM_EXPR
, oprnd0
, oprnd1
);
1232 if (dump_enabled_p ())
1234 dump_printf_loc (MSG_NOTE
, vect_location
,
1235 "vect_recog_widen_sum_pattern: detected: ");
1236 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1237 dump_printf (MSG_NOTE
, "\n");
1240 return pattern_stmt
;
1244 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1247 STMT - a statement to check.
1248 DEF - we support operations with two operands, one of which is constant.
1249 The other operand can be defined by a demotion operation, or by a
1250 previous statement in a sequence of over-promoted operations. In the
1251 later case DEF is used to replace that operand. (It is defined by a
1252 pattern statement we created for the previous statement in the
1256 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1257 NULL, it's the type of DEF.
1258 STMTS - additional pattern statements. If a pattern statement (type
1259 conversion) is created in this function, its original statement is
1263 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1264 operands to use in the new pattern statement for STMT (will be created
1265 in vect_recog_over_widening_pattern ()).
1266 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1267 statements for STMT: the first one is a type promotion and the second
1268 one is the operation itself. We return the type promotion statement
1269 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1270 the second pattern statement. */
1273 vect_operation_fits_smaller_type (gimple stmt
, tree def
, tree
*new_type
,
1274 tree
*op0
, tree
*op1
, gimple
*new_def_stmt
,
1277 enum tree_code code
;
1278 tree const_oprnd
, oprnd
;
1279 tree interm_type
= NULL_TREE
, half_type
, new_oprnd
, type
;
1280 gimple def_stmt
, new_stmt
;
1286 *new_def_stmt
= NULL
;
1288 if (!is_gimple_assign (stmt
))
1291 code
= gimple_assign_rhs_code (stmt
);
1292 if (code
!= LSHIFT_EXPR
&& code
!= RSHIFT_EXPR
1293 && code
!= BIT_IOR_EXPR
&& code
!= BIT_XOR_EXPR
&& code
!= BIT_AND_EXPR
)
1296 oprnd
= gimple_assign_rhs1 (stmt
);
1297 const_oprnd
= gimple_assign_rhs2 (stmt
);
1298 type
= gimple_expr_type (stmt
);
1300 if (TREE_CODE (oprnd
) != SSA_NAME
1301 || TREE_CODE (const_oprnd
) != INTEGER_CST
)
1304 /* If oprnd has other uses besides that in stmt we cannot mark it
1305 as being part of a pattern only. */
1306 if (!has_single_use (oprnd
))
1309 /* If we are in the middle of a sequence, we use DEF from a previous
1310 statement. Otherwise, OPRND has to be a result of type promotion. */
1313 half_type
= *new_type
;
1319 if (!type_conversion_p (oprnd
, stmt
, false, &half_type
, &def_stmt
,
1322 || !vect_same_loop_or_bb_p (stmt
, def_stmt
))
1326 /* Can we perform the operation on a smaller type? */
1332 if (!int_fits_type_p (const_oprnd
, half_type
))
1334 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1335 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1338 interm_type
= build_nonstandard_integer_type (
1339 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1340 if (!int_fits_type_p (const_oprnd
, interm_type
))
1347 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1348 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1351 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1352 (e.g., if the original value was char, the shift amount is at most 8
1353 if we want to use short). */
1354 if (compare_tree_int (const_oprnd
, TYPE_PRECISION (half_type
)) == 1)
1357 interm_type
= build_nonstandard_integer_type (
1358 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1360 if (!vect_supportable_shift (code
, interm_type
))
1366 if (vect_supportable_shift (code
, half_type
))
1369 /* Try intermediate type - HALF_TYPE is not supported. */
1370 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1373 interm_type
= build_nonstandard_integer_type (
1374 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1376 if (!vect_supportable_shift (code
, interm_type
))
1385 /* There are four possible cases:
1386 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1387 the first statement in the sequence)
1388 a. The original, HALF_TYPE, is not enough - we replace the promotion
1389 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1390 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1392 2. OPRND is defined by a pattern statement we created.
1393 a. Its type is not sufficient for the operation, we create a new stmt:
1394 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1395 this statement in NEW_DEF_STMT, and it is later put in
1396 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1397 b. OPRND is good to use in the new statement. */
1402 /* Replace the original type conversion HALF_TYPE->TYPE with
1403 HALF_TYPE->INTERM_TYPE. */
1404 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
1406 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
1407 /* Check if the already created pattern stmt is what we need. */
1408 if (!is_gimple_assign (new_stmt
)
1409 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt
))
1410 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != interm_type
)
1413 stmts
->safe_push (def_stmt
);
1414 oprnd
= gimple_assign_lhs (new_stmt
);
1418 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1419 oprnd
= gimple_assign_rhs1 (def_stmt
);
1420 new_oprnd
= make_ssa_name (interm_type
);
1421 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, oprnd
);
1422 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
1423 stmts
->safe_push (def_stmt
);
1429 /* Retrieve the operand before the type promotion. */
1430 oprnd
= gimple_assign_rhs1 (def_stmt
);
1437 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1438 new_oprnd
= make_ssa_name (interm_type
);
1439 new_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, oprnd
);
1441 *new_def_stmt
= new_stmt
;
1444 /* Otherwise, OPRND is already set. */
1448 *new_type
= interm_type
;
1450 *new_type
= half_type
;
1453 *op1
= fold_convert (*new_type
, const_oprnd
);
1459 /* Try to find a statement or a sequence of statements that can be performed
1463 TYPE x_T, res0_T, res1_T;
1466 S2 x_T = (TYPE) x_t;
1467 S3 res0_T = op (x_T, C0);
1468 S4 res1_T = op (res0_T, C1);
1469 S5 ... = () res1_T; - type demotion
1471 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1473 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1474 be 'type' or some intermediate type. For now, we expect S5 to be a type
1475 demotion operation. We also check that S3 and S4 have only one use. */
1478 vect_recog_over_widening_pattern (vec
<gimple
> *stmts
,
1479 tree
*type_in
, tree
*type_out
)
1481 gimple stmt
= stmts
->pop ();
1482 gimple pattern_stmt
= NULL
, new_def_stmt
, prev_stmt
= NULL
, use_stmt
= NULL
;
1483 tree op0
, op1
, vectype
= NULL_TREE
, use_lhs
, use_type
;
1484 tree var
= NULL_TREE
, new_type
= NULL_TREE
, new_oprnd
;
1491 if (!vinfo_for_stmt (stmt
)
1492 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt
)))
1495 new_def_stmt
= NULL
;
1496 if (!vect_operation_fits_smaller_type (stmt
, var
, &new_type
,
1497 &op0
, &op1
, &new_def_stmt
,
1506 /* STMT can be performed on a smaller type. Check its uses. */
1507 use_stmt
= vect_single_imm_use (stmt
);
1508 if (!use_stmt
|| !is_gimple_assign (use_stmt
))
1511 /* Create pattern statement for STMT. */
1512 vectype
= get_vectype_for_scalar_type (new_type
);
1516 /* We want to collect all the statements for which we create pattern
1517 statetments, except for the case when the last statement in the
1518 sequence doesn't have a corresponding pattern statement. In such
1519 case we associate the last pattern statement with the last statement
1520 in the sequence. Therefore, we only add the original statement to
1521 the list if we know that it is not the last. */
1523 stmts
->safe_push (prev_stmt
);
1525 var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1527 = gimple_build_assign (var
, gimple_assign_rhs_code (stmt
), op0
, op1
);
1528 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
1529 new_pattern_def_seq (vinfo_for_stmt (stmt
), new_def_stmt
);
1531 if (dump_enabled_p ())
1533 dump_printf_loc (MSG_NOTE
, vect_location
,
1534 "created pattern stmt: ");
1535 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1536 dump_printf (MSG_NOTE
, "\n");
1539 type
= gimple_expr_type (stmt
);
1546 /* We got a sequence. We expect it to end with a type demotion operation.
1547 Otherwise, we quit (for now). There are three possible cases: the
1548 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1549 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1550 NEW_TYPE differs (we create a new conversion statement). */
1551 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
1553 use_lhs
= gimple_assign_lhs (use_stmt
);
1554 use_type
= TREE_TYPE (use_lhs
);
1555 /* Support only type demotion or signedess change. */
1556 if (!INTEGRAL_TYPE_P (use_type
)
1557 || TYPE_PRECISION (type
) <= TYPE_PRECISION (use_type
))
1560 /* Check that NEW_TYPE is not bigger than the conversion result. */
1561 if (TYPE_PRECISION (new_type
) > TYPE_PRECISION (use_type
))
1564 if (TYPE_UNSIGNED (new_type
) != TYPE_UNSIGNED (use_type
)
1565 || TYPE_PRECISION (new_type
) != TYPE_PRECISION (use_type
))
1567 /* Create NEW_TYPE->USE_TYPE conversion. */
1568 new_oprnd
= make_ssa_name (use_type
);
1569 pattern_stmt
= gimple_build_assign (new_oprnd
, NOP_EXPR
, var
);
1570 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt
)) = pattern_stmt
;
1572 *type_in
= get_vectype_for_scalar_type (new_type
);
1573 *type_out
= get_vectype_for_scalar_type (use_type
);
1575 /* We created a pattern statement for the last statement in the
1576 sequence, so we don't need to associate it with the pattern
1577 statement created for PREV_STMT. Therefore, we add PREV_STMT
1578 to the list in order to mark it later in vect_pattern_recog_1. */
1580 stmts
->safe_push (prev_stmt
);
1585 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt
))
1586 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt
));
1589 *type_out
= NULL_TREE
;
1592 stmts
->safe_push (use_stmt
);
1595 /* TODO: support general case, create a conversion to the correct type. */
1598 /* Pattern detected. */
1599 if (dump_enabled_p ())
1601 dump_printf_loc (MSG_NOTE
, vect_location
,
1602 "vect_recog_over_widening_pattern: detected: ");
1603 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1604 dump_printf (MSG_NOTE
, "\n");
1607 return pattern_stmt
;
1610 /* Detect widening shift pattern:
1616 S2 a_T = (TYPE) a_t;
1617 S3 res_T = a_T << CONST;
1619 where type 'TYPE' is at least double the size of type 'type'.
1621 Also detect cases where the shift result is immediately converted
1622 to another type 'result_type' that is no larger in size than 'TYPE'.
1623 In those cases we perform a widen-shift that directly results in
1624 'result_type', to avoid a possible over-widening situation:
1628 result_type res_result;
1631 S2 a_T = (TYPE) a_t;
1632 S3 res_T = a_T << CONST;
1633 S4 res_result = (result_type) res_T;
1634 '--> res_result' = a_t w<< CONST;
1636 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1637 create an additional pattern stmt for S2 to create a variable of an
1638 intermediate type, and perform widen-shift on the intermediate type:
1642 TYPE a_T, res_T, res_T';
1645 S2 a_T = (TYPE) a_t;
1646 '--> a_it = (interm_type) a_t;
1647 S3 res_T = a_T << CONST;
1648 '--> res_T' = a_it <<* CONST;
1652 * STMTS: Contains a stmt from which the pattern search begins.
1653 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1654 in STMTS. When an intermediate type is used and a pattern statement is
1655 created for S2, we also put S2 here (before S3).
1659 * TYPE_IN: The type of the input arguments to the pattern.
1661 * TYPE_OUT: The type of the output of this pattern.
1663 * Return value: A new stmt that will be used to replace the sequence of
1664 stmts that constitute the pattern. In this case it will be:
1665 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1668 vect_recog_widen_shift_pattern (vec
<gimple
> *stmts
,
1669 tree
*type_in
, tree
*type_out
)
1671 gimple last_stmt
= stmts
->pop ();
1673 tree oprnd0
, oprnd1
;
1674 tree type
, half_type0
;
1675 gimple pattern_stmt
;
1676 tree vectype
, vectype_out
= NULL_TREE
;
1678 enum tree_code dummy_code
;
1680 vec
<tree
> dummy_vec
;
1684 if (!is_gimple_assign (last_stmt
) || !vinfo_for_stmt (last_stmt
))
1687 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt
)))
1690 if (gimple_assign_rhs_code (last_stmt
) != LSHIFT_EXPR
)
1693 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1694 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1695 if (TREE_CODE (oprnd0
) != SSA_NAME
|| TREE_CODE (oprnd1
) != INTEGER_CST
)
1698 /* Check operand 0: it has to be defined by a type promotion. */
1699 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
1704 /* Check operand 1: has to be positive. We check that it fits the type
1705 in vect_handle_widen_op_by_const (). */
1706 if (tree_int_cst_compare (oprnd1
, size_zero_node
) <= 0)
1709 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
1710 type
= gimple_expr_type (last_stmt
);
1712 /* Check for subsequent conversion to another type. */
1713 use_stmt
= vect_single_imm_use (last_stmt
);
1714 if (use_stmt
&& is_gimple_assign (use_stmt
)
1715 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
))
1716 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
1718 tree use_lhs
= gimple_assign_lhs (use_stmt
);
1719 tree use_type
= TREE_TYPE (use_lhs
);
1721 if (INTEGRAL_TYPE_P (use_type
)
1722 && TYPE_PRECISION (use_type
) <= TYPE_PRECISION (type
))
1724 last_stmt
= use_stmt
;
1729 /* Check if this a widening operation. */
1730 gimple wstmt
= NULL
;
1731 if (!vect_handle_widen_op_by_const (last_stmt
, LSHIFT_EXPR
, oprnd1
,
1733 type
, &half_type0
, def_stmt0
))
1736 /* Pattern detected. */
1737 if (dump_enabled_p ())
1738 dump_printf_loc (MSG_NOTE
, vect_location
,
1739 "vect_recog_widen_shift_pattern: detected:\n");
1741 /* Check target support. */
1742 vectype
= get_vectype_for_scalar_type (half_type0
);
1743 vectype_out
= get_vectype_for_scalar_type (type
);
1747 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR
, last_stmt
,
1748 vectype_out
, vectype
,
1749 &dummy_code
, &dummy_code
,
1750 &dummy_int
, &dummy_vec
))
1754 *type_out
= vectype_out
;
1756 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1757 var
= vect_recog_temp_ssa_var (type
, NULL
);
1759 gimple_build_assign (var
, WIDEN_LSHIFT_EXPR
, oprnd0
, oprnd1
);
1762 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1763 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1764 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1765 new_pattern_def_seq (stmt_vinfo
, wstmt
);
1766 stmt_vec_info new_stmt_info
1767 = new_stmt_vec_info (wstmt
, loop_vinfo
, bb_vinfo
);
1768 set_vinfo_for_stmt (wstmt
, new_stmt_info
);
1769 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
1772 if (dump_enabled_p ())
1773 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1775 stmts
->safe_push (last_stmt
);
1776 return pattern_stmt
;
1779 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1783 S0 a_t = b_t r<< c_t;
1787 * STMTS: Contains a stmt from which the pattern search begins,
1788 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1792 S2 e_t = d_t & (B - 1);
1793 S3 f_t = b_t << c_t;
1794 S4 g_t = b_t >> e_t;
1797 where B is element bitsize of type.
1801 * TYPE_IN: The type of the input arguments to the pattern.
1803 * TYPE_OUT: The type of the output of this pattern.
1805 * Return value: A new stmt that will be used to replace the rotate
1809 vect_recog_rotate_pattern (vec
<gimple
> *stmts
, tree
*type_in
, tree
*type_out
)
1811 gimple last_stmt
= stmts
->pop ();
1812 tree oprnd0
, oprnd1
, lhs
, var
, var1
, var2
, vectype
, type
, stype
, def
, def2
;
1813 gimple pattern_stmt
, def_stmt
;
1814 enum tree_code rhs_code
;
1815 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1816 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1817 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1818 enum vect_def_type dt
;
1819 optab optab1
, optab2
;
1820 edge ext_def
= NULL
;
1822 if (!is_gimple_assign (last_stmt
))
1825 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1835 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
1838 lhs
= gimple_assign_lhs (last_stmt
);
1839 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1840 type
= TREE_TYPE (oprnd0
);
1841 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1842 if (TREE_CODE (oprnd0
) != SSA_NAME
1843 || TYPE_PRECISION (TREE_TYPE (lhs
)) != TYPE_PRECISION (type
)
1844 || !INTEGRAL_TYPE_P (type
)
1845 || !TYPE_UNSIGNED (type
))
1848 if (!vect_is_simple_use (oprnd1
, last_stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
1852 if (dt
!= vect_internal_def
1853 && dt
!= vect_constant_def
1854 && dt
!= vect_external_def
)
1857 vectype
= get_vectype_for_scalar_type (type
);
1858 if (vectype
== NULL_TREE
)
1861 /* If vector/vector or vector/scalar rotate is supported by the target,
1862 don't do anything here. */
1863 optab1
= optab_for_tree_code (rhs_code
, vectype
, optab_vector
);
1865 && optab_handler (optab1
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1868 if (bb_vinfo
!= NULL
|| dt
!= vect_internal_def
)
1870 optab2
= optab_for_tree_code (rhs_code
, vectype
, optab_scalar
);
1872 && optab_handler (optab2
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1876 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1877 don't do anything here either. */
1878 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_vector
);
1879 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_vector
);
1881 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1883 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1885 if (bb_vinfo
== NULL
&& dt
== vect_internal_def
)
1887 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_scalar
);
1888 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_scalar
);
1890 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1892 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1897 *type_out
= vectype
;
1898 if (*type_in
== NULL_TREE
)
1901 if (dt
== vect_external_def
1902 && TREE_CODE (oprnd1
) == SSA_NAME
1905 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1906 ext_def
= loop_preheader_edge (loop
);
1907 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1
))
1909 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (oprnd1
));
1911 || !dominated_by_p (CDI_DOMINATORS
, ext_def
->dest
, bb
))
1917 if (TREE_CODE (oprnd1
) == INTEGER_CST
1918 || TYPE_MODE (TREE_TYPE (oprnd1
)) == TYPE_MODE (type
))
1920 else if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
1922 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
1923 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (type
)
1924 && TYPE_PRECISION (TREE_TYPE (rhs1
))
1925 == TYPE_PRECISION (type
))
1929 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
1930 if (def
== NULL_TREE
)
1932 def
= vect_recog_temp_ssa_var (type
, NULL
);
1933 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
1937 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1938 gcc_assert (!new_bb
);
1941 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1943 stype
= TREE_TYPE (def
);
1945 if (TREE_CODE (def
) == INTEGER_CST
)
1947 if (!tree_fits_uhwi_p (def
)
1948 || tree_to_uhwi (def
) >= GET_MODE_PRECISION (TYPE_MODE (type
))
1949 || integer_zerop (def
))
1951 def2
= build_int_cst (stype
,
1952 GET_MODE_PRECISION (TYPE_MODE (type
))
1953 - tree_to_uhwi (def
));
1957 tree vecstype
= get_vectype_for_scalar_type (stype
);
1958 stmt_vec_info def_stmt_vinfo
;
1960 if (vecstype
== NULL_TREE
)
1962 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1963 def_stmt
= gimple_build_assign (def2
, NEGATE_EXPR
, def
);
1967 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1968 gcc_assert (!new_bb
);
1972 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
1973 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1974 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
1975 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1978 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1980 = build_int_cst (stype
, GET_MODE_PRECISION (TYPE_MODE (stype
)) - 1);
1981 def_stmt
= gimple_build_assign (def2
, BIT_AND_EXPR
,
1982 gimple_assign_lhs (def_stmt
), mask
);
1986 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1987 gcc_assert (!new_bb
);
1991 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
1992 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1993 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
1994 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1998 var1
= vect_recog_temp_ssa_var (type
, NULL
);
1999 def_stmt
= gimple_build_assign (var1
, rhs_code
== LROTATE_EXPR
2000 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
2002 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2004 var2
= vect_recog_temp_ssa_var (type
, NULL
);
2005 def_stmt
= gimple_build_assign (var2
, rhs_code
== LROTATE_EXPR
2006 ? RSHIFT_EXPR
: LSHIFT_EXPR
,
2008 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2010 /* Pattern detected. */
2011 if (dump_enabled_p ())
2012 dump_printf_loc (MSG_NOTE
, vect_location
,
2013 "vect_recog_rotate_pattern: detected:\n");
2015 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2016 var
= vect_recog_temp_ssa_var (type
, NULL
);
2017 pattern_stmt
= gimple_build_assign (var
, BIT_IOR_EXPR
, var1
, var2
);
2019 if (dump_enabled_p ())
2020 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
2022 stmts
->safe_push (last_stmt
);
2023 return pattern_stmt
;
2026 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2034 S3 res_T = b_T op a_t;
2036 where type 'TYPE' is a type with different size than 'type',
2037 and op is <<, >> or rotate.
2042 TYPE b_T, c_T, res_T;
2045 S1 a_t = (type) c_T;
2047 S3 res_T = b_T op a_t;
2051 * STMTS: Contains a stmt from which the pattern search begins,
2052 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2053 with a shift/rotate which has same type on both operands, in the
2054 second case just b_T op c_T, in the first case with added cast
2055 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2059 * TYPE_IN: The type of the input arguments to the pattern.
2061 * TYPE_OUT: The type of the output of this pattern.
2063 * Return value: A new stmt that will be used to replace the shift/rotate
2067 vect_recog_vector_vector_shift_pattern (vec
<gimple
> *stmts
,
2068 tree
*type_in
, tree
*type_out
)
2070 gimple last_stmt
= stmts
->pop ();
2071 tree oprnd0
, oprnd1
, lhs
, var
;
2072 gimple pattern_stmt
, def_stmt
;
2073 enum tree_code rhs_code
;
2074 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2075 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2076 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2077 enum vect_def_type dt
;
2080 if (!is_gimple_assign (last_stmt
))
2083 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2095 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2098 lhs
= gimple_assign_lhs (last_stmt
);
2099 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2100 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2101 if (TREE_CODE (oprnd0
) != SSA_NAME
2102 || TREE_CODE (oprnd1
) != SSA_NAME
2103 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
2104 || TYPE_PRECISION (TREE_TYPE (oprnd1
))
2105 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1
)))
2106 || TYPE_PRECISION (TREE_TYPE (lhs
))
2107 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2110 if (!vect_is_simple_use (oprnd1
, last_stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
2114 if (dt
!= vect_internal_def
)
2117 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (oprnd0
));
2118 *type_out
= *type_in
;
2119 if (*type_in
== NULL_TREE
)
2123 if (gimple_assign_cast_p (def_stmt
))
2125 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
2126 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
2127 && TYPE_PRECISION (TREE_TYPE (rhs1
))
2128 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2132 if (def
== NULL_TREE
)
2134 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2135 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
2136 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2139 /* Pattern detected. */
2140 if (dump_enabled_p ())
2141 dump_printf_loc (MSG_NOTE
, vect_location
,
2142 "vect_recog_vector_vector_shift_pattern: detected:\n");
2144 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2145 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2146 pattern_stmt
= gimple_build_assign (var
, rhs_code
, oprnd0
, def
);
2148 if (dump_enabled_p ())
2149 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
2151 stmts
->safe_push (last_stmt
);
2152 return pattern_stmt
;
2155 /* Detect multiplication by constant which are postive or negatives of power 2,
2156 and convert them to shift patterns.
2158 Mult with constants that are postive power of two.
2165 Mult with constants that are negative power of two.
2170 STMTS: Contains a stmt from which the pattern search begins,
2171 i.e. the mult stmt. Convert the mult operation to LSHIFT if
2172 constant operand is a power of 2.
2174 S1': b_t = a_t << log2 (n)
2176 Convert the mult operation to LSHIFT and followed by a NEGATE
2177 if constant operand is a negative power of 2.
2178 type a_t, b_t, res_T;
2179 S2': b_t = a_t << log2 (n)
2180 S3': res_T = - (b_t)
2184 * TYPE_IN: The type of the input arguments to the pattern.
2186 * TYPE_OUT: The type of the output of this pattern.
2188 * Return value: A new stmt that will be used to replace the multiplication
2192 vect_recog_mult_pattern (vec
<gimple
> *stmts
,
2193 tree
*type_in
, tree
*type_out
)
2195 gimple last_stmt
= stmts
->pop ();
2196 tree oprnd0
, oprnd1
, vectype
, itype
;
2197 gimple pattern_stmt
, def_stmt
;
2199 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2200 int power2_val
, power2_neg_val
;
2203 if (!is_gimple_assign (last_stmt
))
2206 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
2209 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2210 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2211 itype
= TREE_TYPE (oprnd0
);
2213 if (TREE_CODE (oprnd0
) != SSA_NAME
2214 || TREE_CODE (oprnd1
) != INTEGER_CST
2215 || !INTEGRAL_TYPE_P (itype
)
2216 || TYPE_PRECISION (itype
) != GET_MODE_PRECISION (TYPE_MODE (itype
)))
2219 vectype
= get_vectype_for_scalar_type (itype
);
2220 if (vectype
== NULL_TREE
)
2223 /* If the target can handle vectorized multiplication natively,
2224 don't attempt to optimize this. */
2225 optab
= optab_for_tree_code (MULT_EXPR
, vectype
, optab_default
);
2226 if (optab
!= unknown_optab
)
2228 machine_mode vec_mode
= TYPE_MODE (vectype
);
2229 int icode
= (int) optab_handler (optab
, vec_mode
);
2230 if (icode
!= CODE_FOR_nothing
)
2234 /* If target cannot handle vector left shift then we cannot
2235 optimize and bail out. */
2236 optab
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_vector
);
2238 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
2241 power2_val
= wi::exact_log2 (oprnd1
);
2242 power2_neg_val
= wi::exact_log2 (wi::neg (oprnd1
));
2244 /* Handle constant operands that are postive or negative powers of 2. */
2245 if (power2_val
!= -1)
2247 shift
= build_int_cst (itype
, power2_val
);
2249 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2250 LSHIFT_EXPR
, oprnd0
, shift
);
2252 else if (power2_neg_val
!= -1)
2254 /* If the target cannot handle vector NEGATE then we cannot
2255 do the optimization. */
2256 optab
= optab_for_tree_code (NEGATE_EXPR
, vectype
, optab_vector
);
2258 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
2261 shift
= build_int_cst (itype
, power2_neg_val
);
2263 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2264 LSHIFT_EXPR
, oprnd0
, shift
);
2265 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2267 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2268 NEGATE_EXPR
, gimple_assign_lhs (def_stmt
));
2273 /* Pattern detected. */
2274 if (dump_enabled_p ())
2275 dump_printf_loc (MSG_NOTE
, vect_location
,
2276 "vect_recog_mult_pattern: detected:\n");
2278 if (dump_enabled_p ())
2279 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
,
2282 stmts
->safe_push (last_stmt
);
2284 *type_out
= vectype
;
2286 return pattern_stmt
;
2289 /* Detect a signed division by a constant that wouldn't be
2290 otherwise vectorized:
2296 where type 'type' is an integral type and N is a constant.
2298 Similarly handle modulo by a constant:
2304 * STMTS: Contains a stmt from which the pattern search begins,
2305 i.e. the division stmt. S1 is replaced by if N is a power
2306 of two constant and type is signed:
2307 S3 y_t = b_t < 0 ? N - 1 : 0;
2309 S1' a_t = x_t >> log2 (N);
2311 S4 is replaced if N is a power of two constant and
2312 type is signed by (where *_T temporaries have unsigned type):
2313 S9 y_T = b_t < 0 ? -1U : 0U;
2314 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2315 S7 z_t = (type) z_T;
2317 S5 x_t = w_t & (N - 1);
2318 S4' a_t = x_t - z_t;
2322 * TYPE_IN: The type of the input arguments to the pattern.
2324 * TYPE_OUT: The type of the output of this pattern.
2326 * Return value: A new stmt that will be used to replace the division
2327 S1 or modulo S4 stmt. */
2330 vect_recog_divmod_pattern (vec
<gimple
> *stmts
,
2331 tree
*type_in
, tree
*type_out
)
2333 gimple last_stmt
= stmts
->pop ();
2334 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
2335 gimple pattern_stmt
, def_stmt
;
2336 enum tree_code rhs_code
;
2337 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2338 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2339 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2342 int dummy_int
, prec
;
2343 stmt_vec_info def_stmt_vinfo
;
2345 if (!is_gimple_assign (last_stmt
))
2348 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2351 case TRUNC_DIV_EXPR
:
2352 case TRUNC_MOD_EXPR
:
2358 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2361 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2362 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2363 itype
= TREE_TYPE (oprnd0
);
2364 if (TREE_CODE (oprnd0
) != SSA_NAME
2365 || TREE_CODE (oprnd1
) != INTEGER_CST
2366 || TREE_CODE (itype
) != INTEGER_TYPE
2367 || TYPE_PRECISION (itype
) != GET_MODE_PRECISION (TYPE_MODE (itype
)))
2370 vectype
= get_vectype_for_scalar_type (itype
);
2371 if (vectype
== NULL_TREE
)
2374 /* If the target can handle vectorized division or modulo natively,
2375 don't attempt to optimize this. */
2376 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
2377 if (optab
!= unknown_optab
)
2379 machine_mode vec_mode
= TYPE_MODE (vectype
);
2380 int icode
= (int) optab_handler (optab
, vec_mode
);
2381 if (icode
!= CODE_FOR_nothing
)
2385 prec
= TYPE_PRECISION (itype
);
2386 if (integer_pow2p (oprnd1
))
2388 if (TYPE_UNSIGNED (itype
) || tree_int_cst_sgn (oprnd1
) != 1)
2391 /* Pattern detected. */
2392 if (dump_enabled_p ())
2393 dump_printf_loc (MSG_NOTE
, vect_location
,
2394 "vect_recog_divmod_pattern: detected:\n");
2396 cond
= build2 (LT_EXPR
, boolean_type_node
, oprnd0
,
2397 build_int_cst (itype
, 0));
2398 if (rhs_code
== TRUNC_DIV_EXPR
)
2400 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
2403 = gimple_build_assign (var
, COND_EXPR
, cond
,
2404 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2405 build_int_cst (itype
, 1)),
2406 build_int_cst (itype
, 0));
2407 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2408 var
= vect_recog_temp_ssa_var (itype
, NULL
);
2410 = gimple_build_assign (var
, PLUS_EXPR
, oprnd0
,
2411 gimple_assign_lhs (def_stmt
));
2412 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2414 shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
2416 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2417 RSHIFT_EXPR
, var
, shift
);
2422 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2423 if (compare_tree_int (oprnd1
, 2) == 0)
2425 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2426 def_stmt
= gimple_build_assign (signmask
, COND_EXPR
, cond
,
2427 build_int_cst (itype
, 1),
2428 build_int_cst (itype
, 0));
2429 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2434 = build_nonstandard_integer_type (prec
, 1);
2435 tree vecutype
= get_vectype_for_scalar_type (utype
);
2437 = build_int_cst (utype
, GET_MODE_BITSIZE (TYPE_MODE (itype
))
2438 - tree_log2 (oprnd1
));
2439 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
2441 def_stmt
= gimple_build_assign (var
, COND_EXPR
, cond
,
2442 build_int_cst (utype
, -1),
2443 build_int_cst (utype
, 0));
2445 = new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2446 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2447 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2448 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2449 var
= vect_recog_temp_ssa_var (utype
, NULL
);
2450 def_stmt
= gimple_build_assign (var
, RSHIFT_EXPR
,
2451 gimple_assign_lhs (def_stmt
),
2454 = new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2455 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2456 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2457 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2458 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2460 = gimple_build_assign (signmask
, NOP_EXPR
, var
);
2461 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2464 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2465 PLUS_EXPR
, oprnd0
, signmask
);
2466 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2468 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2469 BIT_AND_EXPR
, gimple_assign_lhs (def_stmt
),
2470 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
2471 build_int_cst (itype
, 1)));
2472 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2475 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2476 MINUS_EXPR
, gimple_assign_lhs (def_stmt
),
2480 if (dump_enabled_p ())
2481 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
,
2484 stmts
->safe_push (last_stmt
);
2487 *type_out
= vectype
;
2488 return pattern_stmt
;
2491 if (prec
> HOST_BITS_PER_WIDE_INT
2492 || integer_zerop (oprnd1
))
2495 if (!can_mult_highpart_p (TYPE_MODE (vectype
), TYPE_UNSIGNED (itype
)))
2498 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2500 if (TYPE_UNSIGNED (itype
))
2502 unsigned HOST_WIDE_INT mh
, ml
;
2503 int pre_shift
, post_shift
;
2504 unsigned HOST_WIDE_INT d
= (TREE_INT_CST_LOW (oprnd1
)
2505 & GET_MODE_MASK (TYPE_MODE (itype
)));
2506 tree t1
, t2
, t3
, t4
;
2508 if (d
>= ((unsigned HOST_WIDE_INT
) 1 << (prec
- 1)))
2509 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2512 /* Find a suitable multiplier and right shift count
2513 instead of multiplying with D. */
2514 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
2516 /* If the suggested multiplier is more than SIZE bits, we can do better
2517 for even divisors, using an initial right shift. */
2518 if (mh
!= 0 && (d
& 1) == 0)
2520 pre_shift
= floor_log2 (d
& -d
);
2521 mh
= choose_multiplier (d
>> pre_shift
, prec
, prec
- pre_shift
,
2522 &ml
, &post_shift
, &dummy_int
);
2530 if (post_shift
- 1 >= prec
)
2533 /* t1 = oprnd0 h* ml;
2537 q = t4 >> (post_shift - 1); */
2538 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2539 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2540 build_int_cst (itype
, ml
));
2541 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2543 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2545 = gimple_build_assign (t2
, MINUS_EXPR
, oprnd0
, t1
);
2546 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2548 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2550 = gimple_build_assign (t3
, RSHIFT_EXPR
, t2
, integer_one_node
);
2551 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2553 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2555 = gimple_build_assign (t4
, PLUS_EXPR
, t1
, t3
);
2557 if (post_shift
!= 1)
2559 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2561 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2563 = gimple_build_assign (q
, RSHIFT_EXPR
, t4
,
2564 build_int_cst (itype
, post_shift
- 1));
2569 pattern_stmt
= def_stmt
;
2574 if (pre_shift
>= prec
|| post_shift
>= prec
)
2577 /* t1 = oprnd0 >> pre_shift;
2579 q = t2 >> post_shift; */
2582 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2584 = gimple_build_assign (t1
, RSHIFT_EXPR
, oprnd0
,
2585 build_int_cst (NULL
, pre_shift
));
2586 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2591 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2592 def_stmt
= gimple_build_assign (t2
, MULT_HIGHPART_EXPR
, t1
,
2593 build_int_cst (itype
, ml
));
2597 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2599 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2601 = gimple_build_assign (q
, RSHIFT_EXPR
, t2
,
2602 build_int_cst (itype
, post_shift
));
2607 pattern_stmt
= def_stmt
;
2612 unsigned HOST_WIDE_INT ml
;
2614 HOST_WIDE_INT d
= TREE_INT_CST_LOW (oprnd1
);
2615 unsigned HOST_WIDE_INT abs_d
;
2617 tree t1
, t2
, t3
, t4
;
2619 /* Give up for -1. */
2623 /* Since d might be INT_MIN, we have to cast to
2624 unsigned HOST_WIDE_INT before negating to avoid
2625 undefined signed overflow. */
2627 ? (unsigned HOST_WIDE_INT
) d
2628 : - (unsigned HOST_WIDE_INT
) d
);
2630 /* n rem d = n rem -d */
2631 if (rhs_code
== TRUNC_MOD_EXPR
&& d
< 0)
2634 oprnd1
= build_int_cst (itype
, abs_d
);
2636 else if (HOST_BITS_PER_WIDE_INT
>= prec
2637 && abs_d
== (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2638 /* This case is not handled correctly below. */
2641 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
, &post_shift
, &dummy_int
);
2642 if (ml
>= (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2645 ml
|= (~(unsigned HOST_WIDE_INT
) 0) << (prec
- 1);
2647 if (post_shift
>= prec
)
2650 /* t1 = oprnd0 h* ml; */
2651 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2652 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
2653 build_int_cst (itype
, ml
));
2657 /* t2 = t1 + oprnd0; */
2658 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2659 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2660 def_stmt
= gimple_build_assign (t2
, PLUS_EXPR
, t1
, oprnd0
);
2667 /* t3 = t2 >> post_shift; */
2668 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2669 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2670 def_stmt
= gimple_build_assign (t3
, RSHIFT_EXPR
, t2
,
2671 build_int_cst (itype
, post_shift
));
2676 wide_int oprnd0_min
, oprnd0_max
;
2678 if (get_range_info (oprnd0
, &oprnd0_min
, &oprnd0_max
) == VR_RANGE
)
2680 if (!wi::neg_p (oprnd0_min
, TYPE_SIGN (itype
)))
2682 else if (wi::neg_p (oprnd0_max
, TYPE_SIGN (itype
)))
2686 if (msb
== 0 && d
>= 0)
2690 pattern_stmt
= def_stmt
;
2694 /* t4 = oprnd0 >> (prec - 1);
2695 or if we know from VRP that oprnd0 >= 0
2697 or if we know from VRP that oprnd0 < 0
2699 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2700 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2702 def_stmt
= gimple_build_assign (t4
, INTEGER_CST
,
2703 build_int_cst (itype
, msb
));
2705 def_stmt
= gimple_build_assign (t4
, RSHIFT_EXPR
, oprnd0
,
2706 build_int_cst (itype
, prec
- 1));
2707 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2709 /* q = t3 - t4; or q = t4 - t3; */
2710 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2711 pattern_stmt
= gimple_build_assign (q
, MINUS_EXPR
, d
< 0 ? t4
: t3
,
2716 if (rhs_code
== TRUNC_MOD_EXPR
)
2720 /* We divided. Now finish by:
2723 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2725 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2726 def_stmt
= gimple_build_assign (t1
, MULT_EXPR
, q
, oprnd1
);
2727 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2729 r
= vect_recog_temp_ssa_var (itype
, NULL
);
2730 pattern_stmt
= gimple_build_assign (r
, MINUS_EXPR
, oprnd0
, t1
);
2733 /* Pattern detected. */
2734 if (dump_enabled_p ())
2736 dump_printf_loc (MSG_NOTE
, vect_location
,
2737 "vect_recog_divmod_pattern: detected: ");
2738 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
2739 dump_printf (MSG_NOTE
, "\n");
2742 stmts
->safe_push (last_stmt
);
2745 *type_out
= vectype
;
2746 return pattern_stmt
;
2749 /* Function vect_recog_mixed_size_cond_pattern
2751 Try to find the following pattern:
2756 S1 a_T = x_t CMP y_t ? b_T : c_T;
2758 where type 'TYPE' is an integral type which has different size
2759 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2760 than 'type', the constants need to fit into an integer type
2761 with the same width as 'type') or results of conversion from 'type'.
2765 * LAST_STMT: A stmt from which the pattern search begins.
2769 * TYPE_IN: The type of the input arguments to the pattern.
2771 * TYPE_OUT: The type of the output of this pattern.
2773 * Return value: A new stmt that will be used to replace the pattern.
2774 Additionally a def_stmt is added.
2776 a_it = x_t CMP y_t ? b_it : c_it;
2777 a_T = (TYPE) a_it; */
2780 vect_recog_mixed_size_cond_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
2783 gimple last_stmt
= (*stmts
)[0];
2784 tree cond_expr
, then_clause
, else_clause
;
2785 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
), def_stmt_info
;
2786 tree type
, vectype
, comp_vectype
, itype
= NULL_TREE
, vecitype
;
2787 machine_mode cmpmode
;
2788 gimple pattern_stmt
, def_stmt
;
2789 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2790 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2791 tree orig_type0
= NULL_TREE
, orig_type1
= NULL_TREE
;
2792 gimple def_stmt0
= NULL
, def_stmt1
= NULL
;
2794 tree comp_scalar_type
;
2796 if (!is_gimple_assign (last_stmt
)
2797 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
2798 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
2801 cond_expr
= gimple_assign_rhs1 (last_stmt
);
2802 then_clause
= gimple_assign_rhs2 (last_stmt
);
2803 else_clause
= gimple_assign_rhs3 (last_stmt
);
2805 if (!COMPARISON_CLASS_P (cond_expr
))
2808 comp_scalar_type
= TREE_TYPE (TREE_OPERAND (cond_expr
, 0));
2809 comp_vectype
= get_vectype_for_scalar_type (comp_scalar_type
);
2810 if (comp_vectype
== NULL_TREE
)
2813 type
= gimple_expr_type (last_stmt
);
2814 if (types_compatible_p (type
, comp_scalar_type
)
2815 || ((TREE_CODE (then_clause
) != INTEGER_CST
2816 || TREE_CODE (else_clause
) != INTEGER_CST
)
2817 && !INTEGRAL_TYPE_P (comp_scalar_type
))
2818 || !INTEGRAL_TYPE_P (type
))
2821 if ((TREE_CODE (then_clause
) != INTEGER_CST
2822 && !type_conversion_p (then_clause
, last_stmt
, false, &orig_type0
,
2823 &def_stmt0
, &promotion
))
2824 || (TREE_CODE (else_clause
) != INTEGER_CST
2825 && !type_conversion_p (else_clause
, last_stmt
, false, &orig_type1
,
2826 &def_stmt1
, &promotion
)))
2829 if (orig_type0
&& orig_type1
2830 && !types_compatible_p (orig_type0
, orig_type1
))
2835 if (!types_compatible_p (orig_type0
, comp_scalar_type
))
2837 then_clause
= gimple_assign_rhs1 (def_stmt0
);
2843 if (!types_compatible_p (orig_type1
, comp_scalar_type
))
2845 else_clause
= gimple_assign_rhs1 (def_stmt1
);
2849 cmpmode
= GET_MODE_INNER (TYPE_MODE (comp_vectype
));
2851 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) == GET_MODE_BITSIZE (cmpmode
))
2854 vectype
= get_vectype_for_scalar_type (type
);
2855 if (vectype
== NULL_TREE
)
2858 if (expand_vec_cond_expr_p (vectype
, comp_vectype
))
2861 if (itype
== NULL_TREE
)
2862 itype
= build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode
),
2863 TYPE_UNSIGNED (type
));
2865 if (itype
== NULL_TREE
2866 || GET_MODE_BITSIZE (TYPE_MODE (itype
)) != GET_MODE_BITSIZE (cmpmode
))
2869 vecitype
= get_vectype_for_scalar_type (itype
);
2870 if (vecitype
== NULL_TREE
)
2873 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
))
2876 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) > GET_MODE_BITSIZE (cmpmode
))
2878 if ((TREE_CODE (then_clause
) == INTEGER_CST
2879 && !int_fits_type_p (then_clause
, itype
))
2880 || (TREE_CODE (else_clause
) == INTEGER_CST
2881 && !int_fits_type_p (else_clause
, itype
)))
2885 def_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
2886 COND_EXPR
, unshare_expr (cond_expr
),
2887 fold_convert (itype
, then_clause
),
2888 fold_convert (itype
, else_clause
));
2889 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
2890 NOP_EXPR
, gimple_assign_lhs (def_stmt
));
2892 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2893 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2894 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
2895 STMT_VINFO_VECTYPE (def_stmt_info
) = vecitype
;
2896 *type_in
= vecitype
;
2897 *type_out
= vectype
;
2899 if (dump_enabled_p ())
2900 dump_printf_loc (MSG_NOTE
, vect_location
,
2901 "vect_recog_mixed_size_cond_pattern: detected:\n");
2903 return pattern_stmt
;
2907 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2908 true if bool VAR can be optimized that way. */
2911 check_bool_pattern (tree var
, loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
2914 enum vect_def_type dt
;
2916 enum tree_code rhs_code
;
2918 if (!vect_is_simple_use (var
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
, &def
,
2922 if (dt
!= vect_internal_def
)
2925 if (!is_gimple_assign (def_stmt
))
2928 if (!has_single_use (def
))
2931 rhs1
= gimple_assign_rhs1 (def_stmt
);
2932 rhs_code
= gimple_assign_rhs_code (def_stmt
);
2936 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2939 if ((TYPE_PRECISION (TREE_TYPE (rhs1
)) != 1
2940 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
2941 && TREE_CODE (TREE_TYPE (rhs1
)) != BOOLEAN_TYPE
)
2943 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2946 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2951 if (!check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
))
2953 return check_bool_pattern (gimple_assign_rhs2 (def_stmt
), loop_vinfo
,
2957 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
2959 tree vecitype
, comp_vectype
;
2961 /* If the comparison can throw, then is_gimple_condexpr will be
2962 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2963 if (stmt_could_throw_p (def_stmt
))
2966 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
2967 if (comp_vectype
== NULL_TREE
)
2970 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
2972 machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
2974 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
2975 vecitype
= get_vectype_for_scalar_type (itype
);
2976 if (vecitype
== NULL_TREE
)
2980 vecitype
= comp_vectype
;
2981 return expand_vec_cond_expr_p (vecitype
, comp_vectype
);
2988 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2989 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2990 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2993 adjust_bool_pattern_cast (tree type
, tree var
)
2995 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (SSA_NAME_DEF_STMT (var
));
2996 gimple cast_stmt
, pattern_stmt
;
2998 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
));
2999 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
3000 new_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
3001 cast_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
3002 NOP_EXPR
, gimple_assign_lhs (pattern_stmt
));
3003 STMT_VINFO_RELATED_STMT (stmt_vinfo
) = cast_stmt
;
3004 return gimple_assign_lhs (cast_stmt
);
3008 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
3009 recursively. VAR is an SSA_NAME that should be transformed from bool
3010 to a wider integer type, OUT_TYPE is the desired final integer type of
3011 the whole pattern, TRUEVAL should be NULL unless optimizing
3012 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
3013 in the then_clause, STMTS is where statements with added pattern stmts
3014 should be pushed to. */
3017 adjust_bool_pattern (tree var
, tree out_type
, tree trueval
,
3020 gimple stmt
= SSA_NAME_DEF_STMT (var
);
3021 enum tree_code rhs_code
, def_rhs_code
;
3022 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
3024 gimple pattern_stmt
, def_stmt
;
3026 rhs1
= gimple_assign_rhs1 (stmt
);
3027 rhs2
= gimple_assign_rhs2 (stmt
);
3028 rhs_code
= gimple_assign_rhs_code (stmt
);
3029 loc
= gimple_location (stmt
);
3034 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3035 itype
= TREE_TYPE (irhs1
);
3037 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3042 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3043 itype
= TREE_TYPE (irhs1
);
3045 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3046 BIT_XOR_EXPR
, irhs1
, build_int_cst (itype
, 1));
3050 /* Try to optimize x = y & (a < b ? 1 : 0); into
3051 x = (a < b ? y : 0);
3057 S1 a_b = x1 CMP1 y1;
3058 S2 b_b = x2 CMP2 y2;
3060 S4 d_T = (TYPE) c_b;
3062 we would normally emit:
3064 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3065 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3066 S3' c_T = a_T & b_T;
3069 but we can save one stmt by using the
3070 result of one of the COND_EXPRs in the other COND_EXPR and leave
3071 BIT_AND_EXPR stmt out:
3073 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3074 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3077 At least when VEC_COND_EXPR is implemented using masks
3078 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3079 computes the comparison masks and ands it, in one case with
3080 all ones vector, in the other case with a vector register.
3081 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3082 often more expensive. */
3083 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
3084 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3085 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3087 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3088 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3089 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3090 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
3093 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
3094 irhs2
= adjust_bool_pattern (rhs2
, out_type
, irhs1
, stmts
);
3095 tstmt
= stmts
->pop ();
3096 gcc_assert (tstmt
== def_stmt
);
3097 stmts
->quick_push (stmt
);
3098 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
3099 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
3100 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
3101 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
3105 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
3108 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
3109 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3110 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3112 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3113 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
3114 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
3115 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
3118 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
3119 irhs1
= adjust_bool_pattern (rhs1
, out_type
, irhs2
, stmts
);
3120 tstmt
= stmts
->pop ();
3121 gcc_assert (tstmt
== def_stmt
);
3122 stmts
->quick_push (stmt
);
3123 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
3124 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
3125 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
3126 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
3130 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3136 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3137 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
3139 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3140 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
3142 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
3143 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
3144 int out_prec
= TYPE_PRECISION (out_type
);
3145 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
3146 irhs2
= adjust_bool_pattern_cast (TREE_TYPE (irhs1
), rhs2
);
3147 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
3148 irhs1
= adjust_bool_pattern_cast (TREE_TYPE (irhs2
), rhs1
);
3151 irhs1
= adjust_bool_pattern_cast (out_type
, rhs1
);
3152 irhs2
= adjust_bool_pattern_cast (out_type
, rhs2
);
3155 itype
= TREE_TYPE (irhs1
);
3157 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3158 rhs_code
, irhs1
, irhs2
);
3162 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
3163 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
3164 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
))
3165 || (TYPE_PRECISION (TREE_TYPE (rhs1
))
3166 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1
)))))
3168 machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
3170 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3173 itype
= TREE_TYPE (rhs1
);
3174 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
3175 if (trueval
== NULL_TREE
)
3176 trueval
= build_int_cst (itype
, 1);
3178 gcc_checking_assert (useless_type_conversion_p (itype
,
3179 TREE_TYPE (trueval
)));
3181 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
3182 COND_EXPR
, cond_expr
, trueval
,
3183 build_int_cst (itype
, 0));
3187 stmts
->safe_push (stmt
);
3188 gimple_set_location (pattern_stmt
, loc
);
3189 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
3190 return gimple_assign_lhs (pattern_stmt
);
3194 /* Function vect_recog_bool_pattern
3196 Try to find pattern like following:
3198 bool a_b, b_b, c_b, d_b, e_b;
3201 S1 a_b = x1 CMP1 y1;
3202 S2 b_b = x2 CMP2 y2;
3204 S4 d_b = x3 CMP3 y3;
3206 S6 f_T = (TYPE) e_b;
3208 where type 'TYPE' is an integral type. Or a similar pattern
3211 S6 f_Y = e_b ? r_Y : s_Y;
3213 as results from if-conversion of a complex condition.
3217 * LAST_STMT: A stmt at the end from which the pattern
3218 search begins, i.e. cast of a bool to
3223 * TYPE_IN: The type of the input arguments to the pattern.
3225 * TYPE_OUT: The type of the output of this pattern.
3227 * Return value: A new stmt that will be used to replace the pattern.
3229 Assuming size of TYPE is the same as size of all comparisons
3230 (otherwise some casts would be added where needed), the above
3231 sequence we create related pattern stmts:
3232 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3233 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3234 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3235 S5' e_T = c_T | d_T;
3238 Instead of the above S3' we could emit:
3239 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3240 S3' c_T = a_T | b_T;
3241 but the above is more efficient. */
3244 vect_recog_bool_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
3247 gimple last_stmt
= stmts
->pop ();
3248 enum tree_code rhs_code
;
3249 tree var
, lhs
, rhs
, vectype
;
3250 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
3251 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
3252 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
3253 gimple pattern_stmt
;
3255 if (!is_gimple_assign (last_stmt
))
3258 var
= gimple_assign_rhs1 (last_stmt
);
3259 lhs
= gimple_assign_lhs (last_stmt
);
3261 if ((TYPE_PRECISION (TREE_TYPE (var
)) != 1
3262 || !TYPE_UNSIGNED (TREE_TYPE (var
)))
3263 && TREE_CODE (TREE_TYPE (var
)) != BOOLEAN_TYPE
)
3266 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3267 if (CONVERT_EXPR_CODE_P (rhs_code
))
3269 if (TREE_CODE (TREE_TYPE (lhs
)) != INTEGER_TYPE
3270 || TYPE_PRECISION (TREE_TYPE (lhs
)) == 1)
3272 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3273 if (vectype
== NULL_TREE
)
3276 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
3279 rhs
= adjust_bool_pattern (var
, TREE_TYPE (lhs
), NULL_TREE
, stmts
);
3280 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3281 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3282 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3285 = gimple_build_assign (lhs
, NOP_EXPR
, rhs
);
3286 *type_out
= vectype
;
3288 stmts
->safe_push (last_stmt
);
3289 if (dump_enabled_p ())
3290 dump_printf_loc (MSG_NOTE
, vect_location
,
3291 "vect_recog_bool_pattern: detected:\n");
3293 return pattern_stmt
;
3295 else if (rhs_code
== COND_EXPR
3296 && TREE_CODE (var
) == SSA_NAME
)
3298 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3299 if (vectype
== NULL_TREE
)
3302 /* Build a scalar type for the boolean result that when
3303 vectorized matches the vector type of the result in
3304 size and number of elements. */
3306 = wi::udiv_trunc (TYPE_SIZE (vectype
),
3307 TYPE_VECTOR_SUBPARTS (vectype
)).to_uhwi ();
3309 = build_nonstandard_integer_type (prec
,
3310 TYPE_UNSIGNED (TREE_TYPE (var
)));
3311 if (get_vectype_for_scalar_type (type
) == NULL_TREE
)
3314 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
3317 rhs
= adjust_bool_pattern (var
, type
, NULL_TREE
, stmts
);
3318 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3320 = gimple_build_assign (lhs
, COND_EXPR
,
3321 build2 (NE_EXPR
, boolean_type_node
,
3322 rhs
, build_int_cst (type
, 0)),
3323 gimple_assign_rhs2 (last_stmt
),
3324 gimple_assign_rhs3 (last_stmt
));
3325 *type_out
= vectype
;
3327 stmts
->safe_push (last_stmt
);
3328 if (dump_enabled_p ())
3329 dump_printf_loc (MSG_NOTE
, vect_location
,
3330 "vect_recog_bool_pattern: detected:\n");
3332 return pattern_stmt
;
3334 else if (rhs_code
== SSA_NAME
3335 && STMT_VINFO_DATA_REF (stmt_vinfo
))
3337 stmt_vec_info pattern_stmt_info
;
3338 vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
3339 gcc_assert (vectype
!= NULL_TREE
);
3340 if (!VECTOR_MODE_P (TYPE_MODE (vectype
)))
3342 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
3345 rhs
= adjust_bool_pattern (var
, TREE_TYPE (vectype
), NULL_TREE
, stmts
);
3346 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
3347 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3349 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3350 gimple cast_stmt
= gimple_build_assign (rhs2
, NOP_EXPR
, rhs
);
3351 new_pattern_def_seq (stmt_vinfo
, cast_stmt
);
3354 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
3355 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
,
3357 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3358 STMT_VINFO_DATA_REF (pattern_stmt_info
)
3359 = STMT_VINFO_DATA_REF (stmt_vinfo
);
3360 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info
)
3361 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo
);
3362 STMT_VINFO_DR_INIT (pattern_stmt_info
) = STMT_VINFO_DR_INIT (stmt_vinfo
);
3363 STMT_VINFO_DR_OFFSET (pattern_stmt_info
)
3364 = STMT_VINFO_DR_OFFSET (stmt_vinfo
);
3365 STMT_VINFO_DR_STEP (pattern_stmt_info
) = STMT_VINFO_DR_STEP (stmt_vinfo
);
3366 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info
)
3367 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo
);
3368 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo
)) = pattern_stmt
;
3369 *type_out
= vectype
;
3371 stmts
->safe_push (last_stmt
);
3372 if (dump_enabled_p ())
3373 dump_printf_loc (MSG_NOTE
, vect_location
,
3374 "vect_recog_bool_pattern: detected:\n");
3375 return pattern_stmt
;
3382 /* Mark statements that are involved in a pattern. */
3385 vect_mark_pattern_stmts (gimple orig_stmt
, gimple pattern_stmt
,
3386 tree pattern_vectype
)
3388 stmt_vec_info pattern_stmt_info
, def_stmt_info
;
3389 stmt_vec_info orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
3390 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (orig_stmt_info
);
3391 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (orig_stmt_info
);
3394 pattern_stmt_info
= vinfo_for_stmt (pattern_stmt
);
3395 if (pattern_stmt_info
== NULL
)
3397 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
,
3399 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3401 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt
));
3403 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt
;
3404 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
3405 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
3406 STMT_VINFO_VECTYPE (pattern_stmt_info
) = pattern_vectype
;
3407 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
3408 STMT_VINFO_RELATED_STMT (orig_stmt_info
) = pattern_stmt
;
3409 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
)
3410 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
3411 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
))
3413 gimple_stmt_iterator si
;
3414 for (si
= gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
));
3415 !gsi_end_p (si
); gsi_next (&si
))
3417 def_stmt
= gsi_stmt (si
);
3418 def_stmt_info
= vinfo_for_stmt (def_stmt
);
3419 if (def_stmt_info
== NULL
)
3421 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
,
3423 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
3425 gimple_set_bb (def_stmt
, gimple_bb (orig_stmt
));
3426 STMT_VINFO_RELATED_STMT (def_stmt_info
) = orig_stmt
;
3427 STMT_VINFO_DEF_TYPE (def_stmt_info
) = vect_internal_def
;
3428 if (STMT_VINFO_VECTYPE (def_stmt_info
) == NULL_TREE
)
3429 STMT_VINFO_VECTYPE (def_stmt_info
) = pattern_vectype
;
3434 /* Function vect_pattern_recog_1
3437 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3438 computation pattern.
3439 STMT: A stmt from which the pattern search should start.
3441 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3442 expression that computes the same functionality and can be used to
3443 replace the sequence of stmts that are involved in the pattern.
3446 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3447 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3448 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3449 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3450 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3451 to the available target pattern.
3453 This function also does some bookkeeping, as explained in the documentation
3454 for vect_recog_pattern. */
3457 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func
,
3458 gimple_stmt_iterator si
,
3459 vec
<gimple
> *stmts_to_replace
)
3461 gimple stmt
= gsi_stmt (si
), pattern_stmt
;
3462 stmt_vec_info stmt_info
;
3463 loop_vec_info loop_vinfo
;
3464 tree pattern_vectype
;
3465 tree type_in
, type_out
;
3466 enum tree_code code
;
3470 stmts_to_replace
->truncate (0);
3471 stmts_to_replace
->quick_push (stmt
);
3472 pattern_stmt
= (* vect_recog_func
) (stmts_to_replace
, &type_in
, &type_out
);
3476 stmt
= stmts_to_replace
->last ();
3477 stmt_info
= vinfo_for_stmt (stmt
);
3478 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3480 if (VECTOR_MODE_P (TYPE_MODE (type_in
)))
3482 /* No need to check target support (already checked by the pattern
3483 recognition function). */
3484 pattern_vectype
= type_out
? type_out
: type_in
;
3488 machine_mode vec_mode
;
3489 enum insn_code icode
;
3492 /* Check target support */
3493 type_in
= get_vectype_for_scalar_type (type_in
);
3497 type_out
= get_vectype_for_scalar_type (type_out
);
3502 pattern_vectype
= type_out
;
3504 if (is_gimple_assign (pattern_stmt
))
3505 code
= gimple_assign_rhs_code (pattern_stmt
);
3508 gcc_assert (is_gimple_call (pattern_stmt
));
3512 optab
= optab_for_tree_code (code
, type_in
, optab_default
);
3513 vec_mode
= TYPE_MODE (type_in
);
3515 || (icode
= optab_handler (optab
, vec_mode
)) == CODE_FOR_nothing
3516 || (insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (type_out
)))
3520 /* Found a vectorizable pattern. */
3521 if (dump_enabled_p ())
3523 dump_printf_loc (MSG_NOTE
, vect_location
,
3524 "pattern recognized: ");
3525 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3528 /* Mark the stmts that are involved in the pattern. */
3529 vect_mark_pattern_stmts (stmt
, pattern_stmt
, pattern_vectype
);
3531 /* Patterns cannot be vectorized using SLP, because they change the order of
3534 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
, next
)
3536 LOOP_VINFO_REDUCTIONS (loop_vinfo
).ordered_remove (i
);
3538 /* It is possible that additional pattern stmts are created and inserted in
3539 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3540 relevant statements. */
3541 for (i
= 0; stmts_to_replace
->iterate (i
, &stmt
)
3542 && (unsigned) i
< (stmts_to_replace
->length () - 1);
3545 stmt_info
= vinfo_for_stmt (stmt
);
3546 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
3547 if (dump_enabled_p ())
3549 dump_printf_loc (MSG_NOTE
, vect_location
,
3550 "additional pattern stmt: ");
3551 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3554 vect_mark_pattern_stmts (stmt
, pattern_stmt
, NULL_TREE
);
3559 /* Function vect_pattern_recog
3562 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3565 Output - for each computation idiom that is detected we create a new stmt
3566 that provides the same functionality and that can be vectorized. We
3567 also record some information in the struct_stmt_info of the relevant
3568 stmts, as explained below:
3570 At the entry to this function we have the following stmts, with the
3571 following initial value in the STMT_VINFO fields:
3573 stmt in_pattern_p related_stmt vec_stmt
3574 S1: a_i = .... - - -
3575 S2: a_2 = ..use(a_i).. - - -
3576 S3: a_1 = ..use(a_2).. - - -
3577 S4: a_0 = ..use(a_1).. - - -
3578 S5: ... = ..use(a_0).. - - -
3580 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3581 represented by a single stmt. We then:
3582 - create a new stmt S6 equivalent to the pattern (the stmt is not
3583 inserted into the code)
3584 - fill in the STMT_VINFO fields as follows:
3586 in_pattern_p related_stmt vec_stmt
3587 S1: a_i = .... - - -
3588 S2: a_2 = ..use(a_i).. - - -
3589 S3: a_1 = ..use(a_2).. - - -
3590 S4: a_0 = ..use(a_1).. true S6 -
3591 '---> S6: a_new = .... - S4 -
3592 S5: ... = ..use(a_0).. - - -
3594 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3595 to each other through the RELATED_STMT field).
3597 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3598 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3599 remain irrelevant unless used by stmts other than S4.
3601 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3602 (because they are marked as irrelevant). It will vectorize S6, and record
3603 a pointer to the new vector stmt VS6 from S6 (as usual).
3604 S4 will be skipped, and S5 will be vectorized as usual:
3606 in_pattern_p related_stmt vec_stmt
3607 S1: a_i = .... - - -
3608 S2: a_2 = ..use(a_i).. - - -
3609 S3: a_1 = ..use(a_2).. - - -
3610 > VS6: va_new = .... - - -
3611 S4: a_0 = ..use(a_1).. true S6 VS6
3612 '---> S6: a_new = .... - S4 VS6
3613 > VS5: ... = ..vuse(va_new).. - - -
3614 S5: ... = ..use(a_0).. - - -
3616 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3617 elsewhere), and we'll end up with:
3620 VS5: ... = ..vuse(va_new)..
3622 In case of more than one pattern statements, e.g., widen-mult with
3626 S2 a_T = (TYPE) a_t;
3627 '--> S3: a_it = (interm_type) a_t;
3628 S4 prod_T = a_T * CONST;
3629 '--> S5: prod_T' = a_it w* CONST;
3631 there may be other users of a_T outside the pattern. In that case S2 will
3632 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3633 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3634 be recorded in S3. */
3637 vect_pattern_recog (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
3642 gimple_stmt_iterator si
;
3644 vect_recog_func_ptr vect_recog_func
;
3645 auto_vec
<gimple
, 1> stmts_to_replace
;
3648 if (dump_enabled_p ())
3649 dump_printf_loc (MSG_NOTE
, vect_location
,
3650 "=== vect_pattern_recog ===\n");
3654 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3655 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
3656 nbbs
= loop
->num_nodes
;
3660 bbs
= &BB_VINFO_BB (bb_vinfo
);
3664 /* Scan through the loop stmts, applying the pattern recognition
3665 functions starting at each stmt visited: */
3666 for (i
= 0; i
< nbbs
; i
++)
3668 basic_block bb
= bbs
[i
];
3669 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
3671 if (bb_vinfo
&& (stmt
= gsi_stmt (si
))
3672 && vinfo_for_stmt (stmt
)
3673 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
3676 /* Scan over all generic vect_recog_xxx_pattern functions. */
3677 for (j
= 0; j
< NUM_PATTERNS
; j
++)
3679 vect_recog_func
= vect_vect_recog_func_ptrs
[j
];
3680 vect_pattern_recog_1 (vect_recog_func
, si
,