1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
26 #include "stor-layout.h"
33 #include "hard-reg-set.h"
36 #include "dominance.h"
37 #include "basic-block.h"
38 #include "gimple-pretty-print.h"
39 #include "tree-ssa-alias.h"
40 #include "internal-fn.h"
42 #include "gimple-expr.h"
46 #include "gimple-iterator.h"
47 #include "gimple-ssa.h"
48 #include "tree-phinodes.h"
49 #include "ssa-iterators.h"
50 #include "stringpool.h"
51 #include "tree-ssanames.h"
54 #include "insn-codes.h"
57 #include "tree-data-ref.h"
58 #include "tree-vectorizer.h"
59 #include "recog.h" /* FIXME: for insn_data */
60 #include "diagnostic-core.h"
64 /* Pattern recognition functions */
65 static gimple
vect_recog_widen_sum_pattern (vec
<gimple
> *, tree
*,
67 static gimple
vect_recog_widen_mult_pattern (vec
<gimple
> *, tree
*,
69 static gimple
vect_recog_dot_prod_pattern (vec
<gimple
> *, tree
*,
71 static gimple
vect_recog_sad_pattern (vec
<gimple
> *, tree
*,
73 static gimple
vect_recog_pow_pattern (vec
<gimple
> *, tree
*, tree
*);
74 static gimple
vect_recog_over_widening_pattern (vec
<gimple
> *, tree
*,
76 static gimple
vect_recog_widen_shift_pattern (vec
<gimple
> *,
78 static gimple
vect_recog_rotate_pattern (vec
<gimple
> *, tree
*, tree
*);
79 static gimple
vect_recog_vector_vector_shift_pattern (vec
<gimple
> *,
81 static gimple
vect_recog_divmod_pattern (vec
<gimple
> *,
83 static gimple
vect_recog_mixed_size_cond_pattern (vec
<gimple
> *,
85 static gimple
vect_recog_bool_pattern (vec
<gimple
> *, tree
*, tree
*);
86 static vect_recog_func_ptr vect_vect_recog_func_ptrs
[NUM_PATTERNS
] = {
87 vect_recog_widen_mult_pattern
,
88 vect_recog_widen_sum_pattern
,
89 vect_recog_dot_prod_pattern
,
90 vect_recog_sad_pattern
,
91 vect_recog_pow_pattern
,
92 vect_recog_widen_shift_pattern
,
93 vect_recog_over_widening_pattern
,
94 vect_recog_rotate_pattern
,
95 vect_recog_vector_vector_shift_pattern
,
96 vect_recog_divmod_pattern
,
97 vect_recog_mixed_size_cond_pattern
,
98 vect_recog_bool_pattern
};
101 append_pattern_def_seq (stmt_vec_info stmt_info
, gimple stmt
)
103 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
108 new_pattern_def_seq (stmt_vec_info stmt_info
, gimple stmt
)
110 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
111 append_pattern_def_seq (stmt_info
, stmt
);
114 /* Check whether STMT2 is in the same loop or basic block as STMT1.
115 Which of the two applies depends on whether we're currently doing
116 loop-based or basic-block-based vectorization, as determined by
117 the vinfo_for_stmt for STMT1 (which must be defined).
119 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
120 to be defined as well. */
123 vect_same_loop_or_bb_p (gimple stmt1
, gimple stmt2
)
125 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt1
);
126 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
127 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
129 if (!gimple_bb (stmt2
))
134 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
135 if (!flow_bb_inside_loop_p (loop
, gimple_bb (stmt2
)))
140 if (gimple_bb (stmt2
) != BB_VINFO_BB (bb_vinfo
)
141 || gimple_code (stmt2
) == GIMPLE_PHI
)
145 gcc_assert (vinfo_for_stmt (stmt2
));
149 /* If the LHS of DEF_STMT has a single use, and that statement is
150 in the same loop or basic block, return it. */
153 vect_single_imm_use (gimple def_stmt
)
155 tree lhs
= gimple_assign_lhs (def_stmt
);
159 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
162 if (!vect_same_loop_or_bb_p (def_stmt
, use_stmt
))
168 /* Check whether NAME, an ssa-name used in USE_STMT,
169 is a result of a type promotion, such that:
170 DEF_STMT: NAME = NOP (name0)
171 If CHECK_SIGN is TRUE, check that either both types are signed or both are
175 type_conversion_p (tree name
, gimple use_stmt
, bool check_sign
,
176 tree
*orig_type
, gimple
*def_stmt
, bool *promotion
)
180 loop_vec_info loop_vinfo
;
181 stmt_vec_info stmt_vinfo
;
182 tree type
= TREE_TYPE (name
);
184 enum vect_def_type dt
;
186 bb_vec_info bb_vinfo
;
188 stmt_vinfo
= vinfo_for_stmt (use_stmt
);
189 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
190 bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
191 if (!vect_is_simple_use (name
, use_stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
195 if (dt
!= vect_internal_def
196 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
202 if (!is_gimple_assign (*def_stmt
))
205 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt
)))
208 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
210 *orig_type
= TREE_TYPE (oprnd0
);
211 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*orig_type
)
212 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*orig_type
)) && check_sign
))
215 if (TYPE_PRECISION (type
) >= (TYPE_PRECISION (*orig_type
) * 2))
220 if (!vect_is_simple_use (oprnd0
, *def_stmt
, loop_vinfo
,
221 bb_vinfo
, &dummy_gimple
, &dummy
, &dt
))
227 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
228 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
231 vect_recog_temp_ssa_var (tree type
, gimple stmt
)
233 return make_temp_ssa_name (type
, stmt
, "patt");
236 /* Function vect_recog_dot_prod_pattern
238 Try to find the following pattern:
244 sum_0 = phi <init, sum_1>
247 S3 x_T = (TYPE1) x_t;
248 S4 y_T = (TYPE1) y_t;
250 [S6 prod = (TYPE2) prod; #optional]
251 S7 sum_1 = prod + sum_0;
253 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
254 same size of 'TYPE1' or bigger. This is a special case of a reduction
259 * STMTS: Contains a stmt from which the pattern search begins. In the
260 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
265 * TYPE_IN: The type of the input arguments to the pattern.
267 * TYPE_OUT: The type of the output of this pattern.
269 * Return value: A new stmt that will be used to replace the sequence of
270 stmts that constitute the pattern. In this case it will be:
271 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
273 Note: The dot-prod idiom is a widening reduction pattern that is
274 vectorized without preserving all the intermediate results. It
275 produces only N/2 (widened) results (by summing up pairs of
276 intermediate results) rather than all N results. Therefore, we
277 cannot allow this pattern when we want to get all the results and in
278 the correct order (as is the case when this computation is in an
279 inner-loop nested in an outer-loop that us being vectorized). */
282 vect_recog_dot_prod_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
285 gimple stmt
, last_stmt
= (*stmts
)[0];
287 tree oprnd00
, oprnd01
;
288 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
289 tree type
, half_type
;
292 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
300 loop
= LOOP_VINFO_LOOP (loop_info
);
302 if (!is_gimple_assign (last_stmt
))
305 type
= gimple_expr_type (last_stmt
);
307 /* Look for the following pattern
311 DDPROD = (TYPE2) DPROD;
312 sum_1 = DDPROD + sum_0;
314 - DX is double the size of X
315 - DY is double the size of Y
316 - DX, DY, DPROD all have the same type
317 - sum is the same size of DPROD or bigger
318 - sum has been recognized as a reduction variable.
320 This is equivalent to:
321 DPROD = X w* Y; #widen mult
322 sum_1 = DPROD w+ sum_0; #widen summation
324 DPROD = X w* Y; #widen mult
325 sum_1 = DPROD + sum_0; #summation
328 /* Starting from LAST_STMT, follow the defs of its uses in search
329 of the above pattern. */
331 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
334 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
336 /* Has been detected as widening-summation? */
338 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
339 type
= gimple_expr_type (stmt
);
340 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
342 oprnd0
= gimple_assign_rhs1 (stmt
);
343 oprnd1
= gimple_assign_rhs2 (stmt
);
344 half_type
= TREE_TYPE (oprnd0
);
350 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
)
352 oprnd0
= gimple_assign_rhs1 (last_stmt
);
353 oprnd1
= gimple_assign_rhs2 (last_stmt
);
354 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
355 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
359 if (type_conversion_p (oprnd0
, stmt
, true, &half_type
, &def_stmt
,
364 oprnd0
= gimple_assign_rhs1 (stmt
);
370 /* So far so good. Since last_stmt was detected as a (summation) reduction,
371 we know that oprnd1 is the reduction variable (defined by a loop-header
372 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
373 Left to check that oprnd0 is defined by a (widen_)mult_expr */
374 if (TREE_CODE (oprnd0
) != SSA_NAME
)
377 prod_type
= half_type
;
378 stmt
= SSA_NAME_DEF_STMT (oprnd0
);
380 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
381 if (!gimple_bb (stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
384 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
385 inside the loop (in case we are analyzing an outer-loop). */
386 if (!is_gimple_assign (stmt
))
388 stmt_vinfo
= vinfo_for_stmt (stmt
);
389 gcc_assert (stmt_vinfo
);
390 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
392 if (gimple_assign_rhs_code (stmt
) != MULT_EXPR
)
394 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
396 /* Has been detected as a widening multiplication? */
398 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
399 if (gimple_assign_rhs_code (stmt
) != WIDEN_MULT_EXPR
)
401 stmt_vinfo
= vinfo_for_stmt (stmt
);
402 gcc_assert (stmt_vinfo
);
403 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_internal_def
);
404 oprnd00
= gimple_assign_rhs1 (stmt
);
405 oprnd01
= gimple_assign_rhs2 (stmt
);
406 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt
))
407 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
);
411 tree half_type0
, half_type1
;
415 oprnd0
= gimple_assign_rhs1 (stmt
);
416 oprnd1
= gimple_assign_rhs2 (stmt
);
417 if (!types_compatible_p (TREE_TYPE (oprnd0
), prod_type
)
418 || !types_compatible_p (TREE_TYPE (oprnd1
), prod_type
))
420 if (!type_conversion_p (oprnd0
, stmt
, true, &half_type0
, &def_stmt
,
424 oprnd00
= gimple_assign_rhs1 (def_stmt
);
425 if (!type_conversion_p (oprnd1
, stmt
, true, &half_type1
, &def_stmt
,
429 oprnd01
= gimple_assign_rhs1 (def_stmt
);
430 if (!types_compatible_p (half_type0
, half_type1
))
432 if (TYPE_PRECISION (prod_type
) != TYPE_PRECISION (half_type0
) * 2)
436 half_type
= TREE_TYPE (oprnd00
);
437 *type_in
= half_type
;
440 /* Pattern detected. Create a stmt to be used to replace the pattern: */
441 var
= vect_recog_temp_ssa_var (type
, NULL
);
442 pattern_stmt
= gimple_build_assign_with_ops (DOT_PROD_EXPR
, var
,
443 oprnd00
, oprnd01
, oprnd1
);
445 if (dump_enabled_p ())
447 dump_printf_loc (MSG_NOTE
, vect_location
,
448 "vect_recog_dot_prod_pattern: detected: ");
449 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
450 dump_printf (MSG_NOTE
, "\n");
453 /* We don't allow changing the order of the computation in the inner-loop
454 when doing outer-loop vectorization. */
455 gcc_assert (!nested_in_vect_loop_p (loop
, last_stmt
));
461 /* Function vect_recog_sad_pattern
463 Try to find the following Sum of Absolute Difference (SAD) pattern:
466 signed TYPE1 diff, abs_diff;
469 sum_0 = phi <init, sum_1>
472 S3 x_T = (TYPE1) x_t;
473 S4 y_T = (TYPE1) y_t;
475 S6 abs_diff = ABS_EXPR <diff>;
476 [S7 abs_diff = (TYPE2) abs_diff; #optional]
477 S8 sum_1 = abs_diff + sum_0;
479 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
480 same size of 'TYPE1' or bigger. This is a special case of a reduction
485 * STMTS: Contains a stmt from which the pattern search begins. In the
486 example, when this function is called with S8, the pattern
487 {S3,S4,S5,S6,S7,S8} will be detected.
491 * TYPE_IN: The type of the input arguments to the pattern.
493 * TYPE_OUT: The type of the output of this pattern.
495 * Return value: A new stmt that will be used to replace the sequence of
496 stmts that constitute the pattern. In this case it will be:
497 SAD_EXPR <x_t, y_t, sum_0>
501 vect_recog_sad_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
504 gimple last_stmt
= (*stmts
)[0];
505 tree sad_oprnd0
, sad_oprnd1
;
506 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
508 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
515 loop
= LOOP_VINFO_LOOP (loop_info
);
517 if (!is_gimple_assign (last_stmt
))
520 tree sum_type
= gimple_expr_type (last_stmt
);
522 /* Look for the following pattern
526 DAD = ABS_EXPR <DDIFF>;
527 DDPROD = (TYPE2) DPROD;
530 - DX is at least double the size of X
531 - DY is at least double the size of Y
532 - DX, DY, DDIFF, DAD all have the same type
533 - sum is the same size of DAD or bigger
534 - sum has been recognized as a reduction variable.
536 This is equivalent to:
537 DDIFF = X w- Y; #widen sub
538 DAD = ABS_EXPR <DDIFF>;
539 sum_1 = DAD w+ sum_0; #widen summation
541 DDIFF = X w- Y; #widen sub
542 DAD = ABS_EXPR <DDIFF>;
543 sum_1 = DAD + sum_0; #summation
546 /* Starting from LAST_STMT, follow the defs of its uses in search
547 of the above pattern. */
549 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
552 tree plus_oprnd0
, plus_oprnd1
;
554 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
556 /* Has been detected as widening-summation? */
558 gimple stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
559 sum_type
= gimple_expr_type (stmt
);
560 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
562 plus_oprnd0
= gimple_assign_rhs1 (stmt
);
563 plus_oprnd1
= gimple_assign_rhs2 (stmt
);
564 half_type
= TREE_TYPE (plus_oprnd0
);
570 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
)
572 plus_oprnd0
= gimple_assign_rhs1 (last_stmt
);
573 plus_oprnd1
= gimple_assign_rhs2 (last_stmt
);
574 if (!types_compatible_p (TREE_TYPE (plus_oprnd0
), sum_type
)
575 || !types_compatible_p (TREE_TYPE (plus_oprnd1
), sum_type
))
578 /* The type conversion could be promotion, demotion,
579 or just signed -> unsigned. */
580 if (type_conversion_p (plus_oprnd0
, last_stmt
, false,
581 &half_type
, &def_stmt
, &promotion
))
582 plus_oprnd0
= gimple_assign_rhs1 (def_stmt
);
584 half_type
= sum_type
;
587 /* So far so good. Since last_stmt was detected as a (summation) reduction,
588 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
589 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
590 Then check that plus_oprnd0 is defined by an abs_expr. */
592 if (TREE_CODE (plus_oprnd0
) != SSA_NAME
)
595 tree abs_type
= half_type
;
596 gimple abs_stmt
= SSA_NAME_DEF_STMT (plus_oprnd0
);
598 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
599 if (!gimple_bb (abs_stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (abs_stmt
)))
602 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
603 inside the loop (in case we are analyzing an outer-loop). */
604 if (!is_gimple_assign (abs_stmt
))
607 stmt_vec_info abs_stmt_vinfo
= vinfo_for_stmt (abs_stmt
);
608 gcc_assert (abs_stmt_vinfo
);
609 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo
) != vect_internal_def
)
611 if (gimple_assign_rhs_code (abs_stmt
) != ABS_EXPR
)
614 tree abs_oprnd
= gimple_assign_rhs1 (abs_stmt
);
615 if (!types_compatible_p (TREE_TYPE (abs_oprnd
), abs_type
))
617 if (TYPE_UNSIGNED (abs_type
))
620 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
622 if (TREE_CODE (abs_oprnd
) != SSA_NAME
)
625 gimple diff_stmt
= SSA_NAME_DEF_STMT (abs_oprnd
);
627 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
628 if (!gimple_bb (diff_stmt
)
629 || !flow_bb_inside_loop_p (loop
, gimple_bb (diff_stmt
)))
632 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
633 inside the loop (in case we are analyzing an outer-loop). */
634 if (!is_gimple_assign (diff_stmt
))
637 stmt_vec_info diff_stmt_vinfo
= vinfo_for_stmt (diff_stmt
);
638 gcc_assert (diff_stmt_vinfo
);
639 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo
) != vect_internal_def
)
641 if (gimple_assign_rhs_code (diff_stmt
) != MINUS_EXPR
)
644 tree half_type0
, half_type1
;
647 tree minus_oprnd0
= gimple_assign_rhs1 (diff_stmt
);
648 tree minus_oprnd1
= gimple_assign_rhs2 (diff_stmt
);
650 if (!types_compatible_p (TREE_TYPE (minus_oprnd0
), abs_type
)
651 || !types_compatible_p (TREE_TYPE (minus_oprnd1
), abs_type
))
653 if (!type_conversion_p (minus_oprnd0
, diff_stmt
, false,
654 &half_type0
, &def_stmt
, &promotion
)
657 sad_oprnd0
= gimple_assign_rhs1 (def_stmt
);
659 if (!type_conversion_p (minus_oprnd1
, diff_stmt
, false,
660 &half_type1
, &def_stmt
, &promotion
)
663 sad_oprnd1
= gimple_assign_rhs1 (def_stmt
);
665 if (!types_compatible_p (half_type0
, half_type1
))
667 if (TYPE_PRECISION (abs_type
) < TYPE_PRECISION (half_type0
) * 2
668 || TYPE_PRECISION (sum_type
) < TYPE_PRECISION (half_type0
) * 2)
671 *type_in
= TREE_TYPE (sad_oprnd0
);
672 *type_out
= sum_type
;
674 /* Pattern detected. Create a stmt to be used to replace the pattern: */
675 tree var
= vect_recog_temp_ssa_var (sum_type
, NULL
);
676 gimple pattern_stmt
= gimple_build_assign_with_ops
677 (SAD_EXPR
, var
, sad_oprnd0
, sad_oprnd1
, plus_oprnd1
);
679 if (dump_enabled_p ())
681 dump_printf_loc (MSG_NOTE
, vect_location
,
682 "vect_recog_sad_pattern: detected: ");
683 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
684 dump_printf (MSG_NOTE
, "\n");
687 /* We don't allow changing the order of the computation in the inner-loop
688 when doing outer-loop vectorization. */
689 gcc_assert (!nested_in_vect_loop_p (loop
, last_stmt
));
695 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
698 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
699 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
701 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
702 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
703 that satisfies the above restrictions, we can perform a widening opeartion
704 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
705 with a_it = (interm_type) a_t; */
708 vect_handle_widen_op_by_const (gimple stmt
, enum tree_code code
,
709 tree const_oprnd
, tree
*oprnd
,
710 vec
<gimple
> *stmts
, tree type
,
711 tree
*half_type
, gimple def_stmt
)
713 tree new_type
, new_oprnd
;
716 if (code
!= MULT_EXPR
&& code
!= LSHIFT_EXPR
)
719 if (((code
== MULT_EXPR
&& int_fits_type_p (const_oprnd
, *half_type
))
720 || (code
== LSHIFT_EXPR
721 && compare_tree_int (const_oprnd
, TYPE_PRECISION (*half_type
))
723 && TYPE_PRECISION (type
) == (TYPE_PRECISION (*half_type
) * 2))
725 /* CONST_OPRND is a constant of HALF_TYPE. */
726 *oprnd
= gimple_assign_rhs1 (def_stmt
);
730 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (*half_type
) * 4))
733 if (!vect_same_loop_or_bb_p (stmt
, def_stmt
))
736 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
737 a type 2 times bigger than HALF_TYPE. */
738 new_type
= build_nonstandard_integer_type (TYPE_PRECISION (type
) / 2,
739 TYPE_UNSIGNED (type
));
740 if ((code
== MULT_EXPR
&& !int_fits_type_p (const_oprnd
, new_type
))
741 || (code
== LSHIFT_EXPR
742 && compare_tree_int (const_oprnd
, TYPE_PRECISION (new_type
)) == 1))
745 /* Use NEW_TYPE for widening operation. */
746 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
748 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
749 /* Check if the already created pattern stmt is what we need. */
750 if (!is_gimple_assign (new_stmt
)
751 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt
))
752 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != new_type
)
755 stmts
->safe_push (def_stmt
);
756 *oprnd
= gimple_assign_lhs (new_stmt
);
760 /* Create a_T = (NEW_TYPE) a_t; */
761 *oprnd
= gimple_assign_rhs1 (def_stmt
);
762 new_oprnd
= make_ssa_name (new_type
, NULL
);
763 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
, *oprnd
,
765 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
766 stmts
->safe_push (def_stmt
);
770 *half_type
= new_type
;
775 /* Function vect_recog_widen_mult_pattern
777 Try to find the following pattern:
781 TYPE a_T, b_T, prod_T;
787 S5 prod_T = a_T * b_T;
789 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
791 Also detect unsigned cases:
795 unsigned TYPE u_prod_T;
796 TYPE a_T, b_T, prod_T;
802 S5 prod_T = a_T * b_T;
803 S6 u_prod_T = (unsigned TYPE) prod_T;
805 and multiplication by constants:
812 S5 prod_T = a_T * CONST;
814 A special case of multiplication by constants is when 'TYPE' is 4 times
815 bigger than 'type', but CONST fits an intermediate type 2 times smaller
816 than 'TYPE'. In that case we create an additional pattern stmt for S3
817 to create a variable of the intermediate type, and perform widen-mult
818 on the intermediate type as well:
822 TYPE a_T, prod_T, prod_T';
826 '--> a_it = (interm_type) a_t;
827 S5 prod_T = a_T * CONST;
828 '--> prod_T' = a_it w* CONST;
832 * STMTS: Contains a stmt from which the pattern search begins. In the
833 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
834 is detected. In case of unsigned widen-mult, the original stmt (S5) is
835 replaced with S6 in STMTS. In case of multiplication by a constant
836 of an intermediate type (the last case above), STMTS also contains S3
837 (inserted before S5).
841 * TYPE_IN: The type of the input arguments to the pattern.
843 * TYPE_OUT: The type of the output of this pattern.
845 * Return value: A new stmt that will be used to replace the sequence of
846 stmts that constitute the pattern. In this case it will be:
847 WIDEN_MULT <a_t, b_t>
848 If the result of WIDEN_MULT needs to be converted to a larger type, the
849 returned stmt will be this type conversion stmt.
853 vect_recog_widen_mult_pattern (vec
<gimple
> *stmts
,
854 tree
*type_in
, tree
*type_out
)
856 gimple last_stmt
= stmts
->pop ();
857 gimple def_stmt0
, def_stmt1
;
859 tree type
, half_type0
, half_type1
;
860 gimple new_stmt
= NULL
, pattern_stmt
= NULL
;
861 tree vectype
, vecitype
;
863 enum tree_code dummy_code
;
869 if (!is_gimple_assign (last_stmt
))
872 type
= gimple_expr_type (last_stmt
);
874 /* Starting from LAST_STMT, follow the defs of its uses in search
875 of the above pattern. */
877 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
880 oprnd0
= gimple_assign_rhs1 (last_stmt
);
881 oprnd1
= gimple_assign_rhs2 (last_stmt
);
882 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
883 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
886 /* Check argument 0. */
887 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
891 /* Check argument 1. */
892 op1_ok
= type_conversion_p (oprnd1
, last_stmt
, false, &half_type1
,
893 &def_stmt1
, &promotion
);
895 if (op1_ok
&& promotion
)
897 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
898 oprnd1
= gimple_assign_rhs1 (def_stmt1
);
902 if (TREE_CODE (oprnd1
) == INTEGER_CST
903 && TREE_CODE (half_type0
) == INTEGER_TYPE
904 && vect_handle_widen_op_by_const (last_stmt
, MULT_EXPR
, oprnd1
,
905 &oprnd0
, stmts
, type
,
906 &half_type0
, def_stmt0
))
908 half_type1
= half_type0
;
909 oprnd1
= fold_convert (half_type1
, oprnd1
);
915 /* If the two arguments have different sizes, convert the one with
916 the smaller type into the larger type. */
917 if (TYPE_PRECISION (half_type0
) != TYPE_PRECISION (half_type1
))
920 gimple def_stmt
= NULL
;
922 if (TYPE_PRECISION (half_type0
) < TYPE_PRECISION (half_type1
))
924 def_stmt
= def_stmt0
;
925 half_type0
= half_type1
;
930 def_stmt
= def_stmt1
;
931 half_type1
= half_type0
;
935 tree old_oprnd
= gimple_assign_rhs1 (def_stmt
);
936 tree new_oprnd
= make_ssa_name (half_type0
, NULL
);
937 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
938 old_oprnd
, NULL_TREE
);
942 /* Handle unsigned case. Look for
943 S6 u_prod_T = (unsigned TYPE) prod_T;
944 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
945 if (TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type0
))
951 if (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (half_type1
))
954 use_stmt
= vect_single_imm_use (last_stmt
);
955 if (!use_stmt
|| !is_gimple_assign (use_stmt
)
956 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
959 use_lhs
= gimple_assign_lhs (use_stmt
);
960 use_type
= TREE_TYPE (use_lhs
);
961 if (!INTEGRAL_TYPE_P (use_type
)
962 || (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (use_type
))
963 || (TYPE_PRECISION (type
) != TYPE_PRECISION (use_type
)))
967 last_stmt
= use_stmt
;
970 if (!types_compatible_p (half_type0
, half_type1
))
973 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
974 to get an intermediate result of type ITYPE. In this case we need
975 to build a statement to convert this intermediate result to type TYPE. */
977 if (TYPE_PRECISION (type
) > TYPE_PRECISION (half_type0
) * 2)
978 itype
= build_nonstandard_integer_type
979 (GET_MODE_BITSIZE (TYPE_MODE (half_type0
)) * 2,
980 TYPE_UNSIGNED (type
));
982 /* Pattern detected. */
983 if (dump_enabled_p ())
984 dump_printf_loc (MSG_NOTE
, vect_location
,
985 "vect_recog_widen_mult_pattern: detected:\n");
987 /* Check target support */
988 vectype
= get_vectype_for_scalar_type (half_type0
);
989 vecitype
= get_vectype_for_scalar_type (itype
);
992 || !supportable_widening_operation (WIDEN_MULT_EXPR
, last_stmt
,
994 &dummy_code
, &dummy_code
,
995 &dummy_int
, &dummy_vec
))
999 *type_out
= get_vectype_for_scalar_type (type
);
1001 /* Pattern supported. Create a stmt to be used to replace the pattern: */
1002 var
= vect_recog_temp_ssa_var (itype
, NULL
);
1003 pattern_stmt
= gimple_build_assign_with_ops (WIDEN_MULT_EXPR
, var
, oprnd0
,
1006 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1007 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1008 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1009 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
1011 /* If the original two operands have different sizes, we may need to convert
1012 the smaller one into the larget type. If this is the case, at this point
1013 the new stmt is already built. */
1016 append_pattern_def_seq (stmt_vinfo
, new_stmt
);
1017 stmt_vec_info new_stmt_info
1018 = new_stmt_vec_info (new_stmt
, loop_vinfo
, bb_vinfo
);
1019 set_vinfo_for_stmt (new_stmt
, new_stmt_info
);
1020 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
1023 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
1024 the result of the widen-mult operation into type TYPE. */
1027 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
1028 stmt_vec_info pattern_stmt_info
1029 = new_stmt_vec_info (pattern_stmt
, loop_vinfo
, bb_vinfo
);
1030 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
1031 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vecitype
;
1033 = gimple_build_assign_with_ops (NOP_EXPR
,
1034 vect_recog_temp_ssa_var (type
, NULL
),
1035 gimple_assign_lhs (pattern_stmt
),
1039 if (dump_enabled_p ())
1040 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1042 stmts
->safe_push (last_stmt
);
1043 return pattern_stmt
;
1047 /* Function vect_recog_pow_pattern
1049 Try to find the following pattern:
1053 with POW being one of pow, powf, powi, powif and N being
1058 * LAST_STMT: A stmt from which the pattern search begins.
1062 * TYPE_IN: The type of the input arguments to the pattern.
1064 * TYPE_OUT: The type of the output of this pattern.
1066 * Return value: A new stmt that will be used to replace the sequence of
1067 stmts that constitute the pattern. In this case it will be:
1074 vect_recog_pow_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
1077 gimple last_stmt
= (*stmts
)[0];
1078 tree fn
, base
, exp
= NULL
;
1082 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
1085 fn
= gimple_call_fndecl (last_stmt
);
1086 if (fn
== NULL_TREE
|| DECL_BUILT_IN_CLASS (fn
) != BUILT_IN_NORMAL
)
1089 switch (DECL_FUNCTION_CODE (fn
))
1091 case BUILT_IN_POWIF
:
1095 base
= gimple_call_arg (last_stmt
, 0);
1096 exp
= gimple_call_arg (last_stmt
, 1);
1097 if (TREE_CODE (exp
) != REAL_CST
1098 && TREE_CODE (exp
) != INTEGER_CST
)
1106 /* We now have a pow or powi builtin function call with a constant
1109 *type_out
= NULL_TREE
;
1111 /* Catch squaring. */
1112 if ((tree_fits_shwi_p (exp
)
1113 && tree_to_shwi (exp
) == 2)
1114 || (TREE_CODE (exp
) == REAL_CST
1115 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconst2
)))
1117 *type_in
= TREE_TYPE (base
);
1119 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
1120 stmt
= gimple_build_assign_with_ops (MULT_EXPR
, var
, base
, base
);
1124 /* Catch square root. */
1125 if (TREE_CODE (exp
) == REAL_CST
1126 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconsthalf
))
1128 tree newfn
= mathfn_built_in (TREE_TYPE (base
), BUILT_IN_SQRT
);
1129 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (base
));
1132 gimple stmt
= gimple_build_call (newfn
, 1, base
);
1133 if (vectorizable_function (stmt
, *type_in
, *type_in
)
1136 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
1137 gimple_call_set_lhs (stmt
, var
);
1147 /* Function vect_recog_widen_sum_pattern
1149 Try to find the following pattern:
1152 TYPE x_T, sum = init;
1154 sum_0 = phi <init, sum_1>
1156 S2 x_T = (TYPE) x_t;
1157 S3 sum_1 = x_T + sum_0;
1159 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1160 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1161 a special case of a reduction computation.
1165 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1166 when this function is called with S3, the pattern {S2,S3} will be detected.
1170 * TYPE_IN: The type of the input arguments to the pattern.
1172 * TYPE_OUT: The type of the output of this pattern.
1174 * Return value: A new stmt that will be used to replace the sequence of
1175 stmts that constitute the pattern. In this case it will be:
1176 WIDEN_SUM <x_t, sum_0>
1178 Note: The widening-sum idiom is a widening reduction pattern that is
1179 vectorized without preserving all the intermediate results. It
1180 produces only N/2 (widened) results (by summing up pairs of
1181 intermediate results) rather than all N results. Therefore, we
1182 cannot allow this pattern when we want to get all the results and in
1183 the correct order (as is the case when this computation is in an
1184 inner-loop nested in an outer-loop that us being vectorized). */
1187 vect_recog_widen_sum_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
1190 gimple stmt
, last_stmt
= (*stmts
)[0];
1191 tree oprnd0
, oprnd1
;
1192 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1193 tree type
, half_type
;
1194 gimple pattern_stmt
;
1195 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1203 loop
= LOOP_VINFO_LOOP (loop_info
);
1205 if (!is_gimple_assign (last_stmt
))
1208 type
= gimple_expr_type (last_stmt
);
1210 /* Look for the following pattern
1213 In which DX is at least double the size of X, and sum_1 has been
1214 recognized as a reduction variable.
1217 /* Starting from LAST_STMT, follow the defs of its uses in search
1218 of the above pattern. */
1220 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
1223 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
)
1226 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1227 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1228 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
1229 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
1232 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1233 we know that oprnd1 is the reduction variable (defined by a loop-header
1234 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1235 Left to check that oprnd0 is defined by a cast from type 'type' to type
1238 if (!type_conversion_p (oprnd0
, last_stmt
, true, &half_type
, &stmt
,
1243 oprnd0
= gimple_assign_rhs1 (stmt
);
1244 *type_in
= half_type
;
1247 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1248 var
= vect_recog_temp_ssa_var (type
, NULL
);
1249 pattern_stmt
= gimple_build_assign_with_ops (WIDEN_SUM_EXPR
, var
,
1252 if (dump_enabled_p ())
1254 dump_printf_loc (MSG_NOTE
, vect_location
,
1255 "vect_recog_widen_sum_pattern: detected: ");
1256 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1257 dump_printf (MSG_NOTE
, "\n");
1260 /* We don't allow changing the order of the computation in the inner-loop
1261 when doing outer-loop vectorization. */
1262 gcc_assert (!nested_in_vect_loop_p (loop
, last_stmt
));
1264 return pattern_stmt
;
1268 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1271 STMT - a statement to check.
1272 DEF - we support operations with two operands, one of which is constant.
1273 The other operand can be defined by a demotion operation, or by a
1274 previous statement in a sequence of over-promoted operations. In the
1275 later case DEF is used to replace that operand. (It is defined by a
1276 pattern statement we created for the previous statement in the
1280 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1281 NULL, it's the type of DEF.
1282 STMTS - additional pattern statements. If a pattern statement (type
1283 conversion) is created in this function, its original statement is
1287 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1288 operands to use in the new pattern statement for STMT (will be created
1289 in vect_recog_over_widening_pattern ()).
1290 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1291 statements for STMT: the first one is a type promotion and the second
1292 one is the operation itself. We return the type promotion statement
1293 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1294 the second pattern statement. */
1297 vect_operation_fits_smaller_type (gimple stmt
, tree def
, tree
*new_type
,
1298 tree
*op0
, tree
*op1
, gimple
*new_def_stmt
,
1301 enum tree_code code
;
1302 tree const_oprnd
, oprnd
;
1303 tree interm_type
= NULL_TREE
, half_type
, new_oprnd
, type
;
1304 gimple def_stmt
, new_stmt
;
1310 *new_def_stmt
= NULL
;
1312 if (!is_gimple_assign (stmt
))
1315 code
= gimple_assign_rhs_code (stmt
);
1316 if (code
!= LSHIFT_EXPR
&& code
!= RSHIFT_EXPR
1317 && code
!= BIT_IOR_EXPR
&& code
!= BIT_XOR_EXPR
&& code
!= BIT_AND_EXPR
)
1320 oprnd
= gimple_assign_rhs1 (stmt
);
1321 const_oprnd
= gimple_assign_rhs2 (stmt
);
1322 type
= gimple_expr_type (stmt
);
1324 if (TREE_CODE (oprnd
) != SSA_NAME
1325 || TREE_CODE (const_oprnd
) != INTEGER_CST
)
1328 /* If oprnd has other uses besides that in stmt we cannot mark it
1329 as being part of a pattern only. */
1330 if (!has_single_use (oprnd
))
1333 /* If we are in the middle of a sequence, we use DEF from a previous
1334 statement. Otherwise, OPRND has to be a result of type promotion. */
1337 half_type
= *new_type
;
1343 if (!type_conversion_p (oprnd
, stmt
, false, &half_type
, &def_stmt
,
1346 || !vect_same_loop_or_bb_p (stmt
, def_stmt
))
1350 /* Can we perform the operation on a smaller type? */
1356 if (!int_fits_type_p (const_oprnd
, half_type
))
1358 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1359 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1362 interm_type
= build_nonstandard_integer_type (
1363 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1364 if (!int_fits_type_p (const_oprnd
, interm_type
))
1371 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1372 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1375 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1376 (e.g., if the original value was char, the shift amount is at most 8
1377 if we want to use short). */
1378 if (compare_tree_int (const_oprnd
, TYPE_PRECISION (half_type
)) == 1)
1381 interm_type
= build_nonstandard_integer_type (
1382 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1384 if (!vect_supportable_shift (code
, interm_type
))
1390 if (vect_supportable_shift (code
, half_type
))
1393 /* Try intermediate type - HALF_TYPE is not supported. */
1394 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
1397 interm_type
= build_nonstandard_integer_type (
1398 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
1400 if (!vect_supportable_shift (code
, interm_type
))
1409 /* There are four possible cases:
1410 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1411 the first statement in the sequence)
1412 a. The original, HALF_TYPE, is not enough - we replace the promotion
1413 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1414 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1416 2. OPRND is defined by a pattern statement we created.
1417 a. Its type is not sufficient for the operation, we create a new stmt:
1418 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1419 this statement in NEW_DEF_STMT, and it is later put in
1420 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1421 b. OPRND is good to use in the new statement. */
1426 /* Replace the original type conversion HALF_TYPE->TYPE with
1427 HALF_TYPE->INTERM_TYPE. */
1428 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
1430 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
1431 /* Check if the already created pattern stmt is what we need. */
1432 if (!is_gimple_assign (new_stmt
)
1433 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt
))
1434 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != interm_type
)
1437 stmts
->safe_push (def_stmt
);
1438 oprnd
= gimple_assign_lhs (new_stmt
);
1442 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1443 oprnd
= gimple_assign_rhs1 (def_stmt
);
1444 new_oprnd
= make_ssa_name (interm_type
, NULL
);
1445 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
1447 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
1448 stmts
->safe_push (def_stmt
);
1454 /* Retrieve the operand before the type promotion. */
1455 oprnd
= gimple_assign_rhs1 (def_stmt
);
1462 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1463 new_oprnd
= make_ssa_name (interm_type
, NULL
);
1464 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
1467 *new_def_stmt
= new_stmt
;
1470 /* Otherwise, OPRND is already set. */
1474 *new_type
= interm_type
;
1476 *new_type
= half_type
;
1479 *op1
= fold_convert (*new_type
, const_oprnd
);
1485 /* Try to find a statement or a sequence of statements that can be performed
1489 TYPE x_T, res0_T, res1_T;
1492 S2 x_T = (TYPE) x_t;
1493 S3 res0_T = op (x_T, C0);
1494 S4 res1_T = op (res0_T, C1);
1495 S5 ... = () res1_T; - type demotion
1497 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1499 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1500 be 'type' or some intermediate type. For now, we expect S5 to be a type
1501 demotion operation. We also check that S3 and S4 have only one use. */
1504 vect_recog_over_widening_pattern (vec
<gimple
> *stmts
,
1505 tree
*type_in
, tree
*type_out
)
1507 gimple stmt
= stmts
->pop ();
1508 gimple pattern_stmt
= NULL
, new_def_stmt
, prev_stmt
= NULL
, use_stmt
= NULL
;
1509 tree op0
, op1
, vectype
= NULL_TREE
, use_lhs
, use_type
;
1510 tree var
= NULL_TREE
, new_type
= NULL_TREE
, new_oprnd
;
1517 if (!vinfo_for_stmt (stmt
)
1518 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt
)))
1521 new_def_stmt
= NULL
;
1522 if (!vect_operation_fits_smaller_type (stmt
, var
, &new_type
,
1523 &op0
, &op1
, &new_def_stmt
,
1532 /* STMT can be performed on a smaller type. Check its uses. */
1533 use_stmt
= vect_single_imm_use (stmt
);
1534 if (!use_stmt
|| !is_gimple_assign (use_stmt
))
1537 /* Create pattern statement for STMT. */
1538 vectype
= get_vectype_for_scalar_type (new_type
);
1542 /* We want to collect all the statements for which we create pattern
1543 statetments, except for the case when the last statement in the
1544 sequence doesn't have a corresponding pattern statement. In such
1545 case we associate the last pattern statement with the last statement
1546 in the sequence. Therefore, we only add the original statement to
1547 the list if we know that it is not the last. */
1549 stmts
->safe_push (prev_stmt
);
1551 var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1553 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt
), var
,
1555 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
1556 new_pattern_def_seq (vinfo_for_stmt (stmt
), new_def_stmt
);
1558 if (dump_enabled_p ())
1560 dump_printf_loc (MSG_NOTE
, vect_location
,
1561 "created pattern stmt: ");
1562 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1563 dump_printf (MSG_NOTE
, "\n");
1566 type
= gimple_expr_type (stmt
);
1573 /* We got a sequence. We expect it to end with a type demotion operation.
1574 Otherwise, we quit (for now). There are three possible cases: the
1575 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1576 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1577 NEW_TYPE differs (we create a new conversion statement). */
1578 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
1580 use_lhs
= gimple_assign_lhs (use_stmt
);
1581 use_type
= TREE_TYPE (use_lhs
);
1582 /* Support only type demotion or signedess change. */
1583 if (!INTEGRAL_TYPE_P (use_type
)
1584 || TYPE_PRECISION (type
) <= TYPE_PRECISION (use_type
))
1587 /* Check that NEW_TYPE is not bigger than the conversion result. */
1588 if (TYPE_PRECISION (new_type
) > TYPE_PRECISION (use_type
))
1591 if (TYPE_UNSIGNED (new_type
) != TYPE_UNSIGNED (use_type
)
1592 || TYPE_PRECISION (new_type
) != TYPE_PRECISION (use_type
))
1594 /* Create NEW_TYPE->USE_TYPE conversion. */
1595 new_oprnd
= make_ssa_name (use_type
, NULL
);
1596 pattern_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
1598 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt
)) = pattern_stmt
;
1600 *type_in
= get_vectype_for_scalar_type (new_type
);
1601 *type_out
= get_vectype_for_scalar_type (use_type
);
1603 /* We created a pattern statement for the last statement in the
1604 sequence, so we don't need to associate it with the pattern
1605 statement created for PREV_STMT. Therefore, we add PREV_STMT
1606 to the list in order to mark it later in vect_pattern_recog_1. */
1608 stmts
->safe_push (prev_stmt
);
1613 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt
))
1614 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt
));
1617 *type_out
= NULL_TREE
;
1620 stmts
->safe_push (use_stmt
);
1623 /* TODO: support general case, create a conversion to the correct type. */
1626 /* Pattern detected. */
1627 if (dump_enabled_p ())
1629 dump_printf_loc (MSG_NOTE
, vect_location
,
1630 "vect_recog_over_widening_pattern: detected: ");
1631 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
1632 dump_printf (MSG_NOTE
, "\n");
1635 return pattern_stmt
;
1638 /* Detect widening shift pattern:
1644 S2 a_T = (TYPE) a_t;
1645 S3 res_T = a_T << CONST;
1647 where type 'TYPE' is at least double the size of type 'type'.
1649 Also detect cases where the shift result is immediately converted
1650 to another type 'result_type' that is no larger in size than 'TYPE'.
1651 In those cases we perform a widen-shift that directly results in
1652 'result_type', to avoid a possible over-widening situation:
1656 result_type res_result;
1659 S2 a_T = (TYPE) a_t;
1660 S3 res_T = a_T << CONST;
1661 S4 res_result = (result_type) res_T;
1662 '--> res_result' = a_t w<< CONST;
1664 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1665 create an additional pattern stmt for S2 to create a variable of an
1666 intermediate type, and perform widen-shift on the intermediate type:
1670 TYPE a_T, res_T, res_T';
1673 S2 a_T = (TYPE) a_t;
1674 '--> a_it = (interm_type) a_t;
1675 S3 res_T = a_T << CONST;
1676 '--> res_T' = a_it <<* CONST;
1680 * STMTS: Contains a stmt from which the pattern search begins.
1681 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1682 in STMTS. When an intermediate type is used and a pattern statement is
1683 created for S2, we also put S2 here (before S3).
1687 * TYPE_IN: The type of the input arguments to the pattern.
1689 * TYPE_OUT: The type of the output of this pattern.
1691 * Return value: A new stmt that will be used to replace the sequence of
1692 stmts that constitute the pattern. In this case it will be:
1693 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1696 vect_recog_widen_shift_pattern (vec
<gimple
> *stmts
,
1697 tree
*type_in
, tree
*type_out
)
1699 gimple last_stmt
= stmts
->pop ();
1701 tree oprnd0
, oprnd1
;
1702 tree type
, half_type0
;
1703 gimple pattern_stmt
;
1704 tree vectype
, vectype_out
= NULL_TREE
;
1706 enum tree_code dummy_code
;
1708 vec
<tree
> dummy_vec
;
1712 if (!is_gimple_assign (last_stmt
) || !vinfo_for_stmt (last_stmt
))
1715 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt
)))
1718 if (gimple_assign_rhs_code (last_stmt
) != LSHIFT_EXPR
)
1721 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1722 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1723 if (TREE_CODE (oprnd0
) != SSA_NAME
|| TREE_CODE (oprnd1
) != INTEGER_CST
)
1726 /* Check operand 0: it has to be defined by a type promotion. */
1727 if (!type_conversion_p (oprnd0
, last_stmt
, false, &half_type0
, &def_stmt0
,
1732 /* Check operand 1: has to be positive. We check that it fits the type
1733 in vect_handle_widen_op_by_const (). */
1734 if (tree_int_cst_compare (oprnd1
, size_zero_node
) <= 0)
1737 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
1738 type
= gimple_expr_type (last_stmt
);
1740 /* Check for subsequent conversion to another type. */
1741 use_stmt
= vect_single_imm_use (last_stmt
);
1742 if (use_stmt
&& is_gimple_assign (use_stmt
)
1743 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
))
1744 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
1746 tree use_lhs
= gimple_assign_lhs (use_stmt
);
1747 tree use_type
= TREE_TYPE (use_lhs
);
1749 if (INTEGRAL_TYPE_P (use_type
)
1750 && TYPE_PRECISION (use_type
) <= TYPE_PRECISION (type
))
1752 last_stmt
= use_stmt
;
1757 /* Check if this a widening operation. */
1758 if (!vect_handle_widen_op_by_const (last_stmt
, LSHIFT_EXPR
, oprnd1
,
1760 type
, &half_type0
, def_stmt0
))
1763 /* Pattern detected. */
1764 if (dump_enabled_p ())
1765 dump_printf_loc (MSG_NOTE
, vect_location
,
1766 "vect_recog_widen_shift_pattern: detected:\n");
1768 /* Check target support. */
1769 vectype
= get_vectype_for_scalar_type (half_type0
);
1770 vectype_out
= get_vectype_for_scalar_type (type
);
1774 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR
, last_stmt
,
1775 vectype_out
, vectype
,
1776 &dummy_code
, &dummy_code
,
1777 &dummy_int
, &dummy_vec
))
1781 *type_out
= vectype_out
;
1783 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1784 var
= vect_recog_temp_ssa_var (type
, NULL
);
1786 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR
, var
, oprnd0
, oprnd1
);
1788 if (dump_enabled_p ())
1789 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
1791 stmts
->safe_push (last_stmt
);
1792 return pattern_stmt
;
1795 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1799 S0 a_t = b_t r<< c_t;
1803 * STMTS: Contains a stmt from which the pattern search begins,
1804 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1808 S2 e_t = d_t & (B - 1);
1809 S3 f_t = b_t << c_t;
1810 S4 g_t = b_t >> e_t;
1813 where B is element bitsize of type.
1817 * TYPE_IN: The type of the input arguments to the pattern.
1819 * TYPE_OUT: The type of the output of this pattern.
1821 * Return value: A new stmt that will be used to replace the rotate
1825 vect_recog_rotate_pattern (vec
<gimple
> *stmts
, tree
*type_in
, tree
*type_out
)
1827 gimple last_stmt
= stmts
->pop ();
1828 tree oprnd0
, oprnd1
, lhs
, var
, var1
, var2
, vectype
, type
, stype
, def
, def2
;
1829 gimple pattern_stmt
, def_stmt
;
1830 enum tree_code rhs_code
;
1831 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1832 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1833 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1834 enum vect_def_type dt
;
1835 optab optab1
, optab2
;
1836 edge ext_def
= NULL
;
1838 if (!is_gimple_assign (last_stmt
))
1841 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1851 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
1854 lhs
= gimple_assign_lhs (last_stmt
);
1855 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1856 type
= TREE_TYPE (oprnd0
);
1857 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1858 if (TREE_CODE (oprnd0
) != SSA_NAME
1859 || TYPE_PRECISION (TREE_TYPE (lhs
)) != TYPE_PRECISION (type
)
1860 || !INTEGRAL_TYPE_P (type
)
1861 || !TYPE_UNSIGNED (type
))
1864 if (!vect_is_simple_use (oprnd1
, last_stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
1868 if (dt
!= vect_internal_def
1869 && dt
!= vect_constant_def
1870 && dt
!= vect_external_def
)
1873 vectype
= get_vectype_for_scalar_type (type
);
1874 if (vectype
== NULL_TREE
)
1877 /* If vector/vector or vector/scalar rotate is supported by the target,
1878 don't do anything here. */
1879 optab1
= optab_for_tree_code (rhs_code
, vectype
, optab_vector
);
1881 && optab_handler (optab1
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1884 if (bb_vinfo
!= NULL
|| dt
!= vect_internal_def
)
1886 optab2
= optab_for_tree_code (rhs_code
, vectype
, optab_scalar
);
1888 && optab_handler (optab2
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
1892 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1893 don't do anything here either. */
1894 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_vector
);
1895 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_vector
);
1897 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1899 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1901 if (bb_vinfo
== NULL
&& dt
== vect_internal_def
)
1903 optab1
= optab_for_tree_code (LSHIFT_EXPR
, vectype
, optab_scalar
);
1904 optab2
= optab_for_tree_code (RSHIFT_EXPR
, vectype
, optab_scalar
);
1906 || optab_handler (optab1
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
1908 || optab_handler (optab2
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
1913 *type_out
= vectype
;
1914 if (*type_in
== NULL_TREE
)
1917 if (dt
== vect_external_def
1918 && TREE_CODE (oprnd1
) == SSA_NAME
1921 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1922 ext_def
= loop_preheader_edge (loop
);
1923 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1
))
1925 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (oprnd1
));
1927 || !dominated_by_p (CDI_DOMINATORS
, ext_def
->dest
, bb
))
1933 if (TREE_CODE (oprnd1
) == INTEGER_CST
1934 || TYPE_MODE (TREE_TYPE (oprnd1
)) == TYPE_MODE (type
))
1936 else if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
1938 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
1939 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (type
)
1940 && TYPE_PRECISION (TREE_TYPE (rhs1
))
1941 == TYPE_PRECISION (type
))
1945 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
1946 if (def
== NULL_TREE
)
1948 def
= vect_recog_temp_ssa_var (type
, NULL
);
1949 def_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, def
, oprnd1
,
1954 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1955 gcc_assert (!new_bb
);
1958 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1960 stype
= TREE_TYPE (def
);
1962 if (TREE_CODE (def
) == INTEGER_CST
)
1964 if (!tree_fits_uhwi_p (def
)
1965 || tree_to_uhwi (def
) >= GET_MODE_PRECISION (TYPE_MODE (type
))
1966 || integer_zerop (def
))
1968 def2
= build_int_cst (stype
,
1969 GET_MODE_PRECISION (TYPE_MODE (type
))
1970 - tree_to_uhwi (def
));
1974 tree vecstype
= get_vectype_for_scalar_type (stype
);
1975 stmt_vec_info def_stmt_vinfo
;
1977 if (vecstype
== NULL_TREE
)
1979 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1980 def_stmt
= gimple_build_assign_with_ops (NEGATE_EXPR
, def2
, def
,
1985 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
1986 gcc_assert (!new_bb
);
1990 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
1991 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1992 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
1993 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1996 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
1998 = build_int_cst (stype
, GET_MODE_PRECISION (TYPE_MODE (stype
)) - 1);
1999 def_stmt
= gimple_build_assign_with_ops (BIT_AND_EXPR
, def2
,
2000 gimple_assign_lhs (def_stmt
),
2005 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
2006 gcc_assert (!new_bb
);
2010 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2011 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2012 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecstype
;
2013 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2017 var1
= vect_recog_temp_ssa_var (type
, NULL
);
2018 def_stmt
= gimple_build_assign_with_ops (rhs_code
== LROTATE_EXPR
2019 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
2021 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2023 var2
= vect_recog_temp_ssa_var (type
, NULL
);
2024 def_stmt
= gimple_build_assign_with_ops (rhs_code
== LROTATE_EXPR
2025 ? RSHIFT_EXPR
: LSHIFT_EXPR
,
2026 var2
, oprnd0
, def2
);
2027 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2029 /* Pattern detected. */
2030 if (dump_enabled_p ())
2031 dump_printf_loc (MSG_NOTE
, vect_location
,
2032 "vect_recog_rotate_pattern: detected:\n");
2034 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2035 var
= vect_recog_temp_ssa_var (type
, NULL
);
2036 pattern_stmt
= gimple_build_assign_with_ops (BIT_IOR_EXPR
, var
, var1
, var2
);
2038 if (dump_enabled_p ())
2039 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
2041 stmts
->safe_push (last_stmt
);
2042 return pattern_stmt
;
2045 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2053 S3 res_T = b_T op a_t;
2055 where type 'TYPE' is a type with different size than 'type',
2056 and op is <<, >> or rotate.
2061 TYPE b_T, c_T, res_T;
2064 S1 a_t = (type) c_T;
2066 S3 res_T = b_T op a_t;
2070 * STMTS: Contains a stmt from which the pattern search begins,
2071 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2072 with a shift/rotate which has same type on both operands, in the
2073 second case just b_T op c_T, in the first case with added cast
2074 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2078 * TYPE_IN: The type of the input arguments to the pattern.
2080 * TYPE_OUT: The type of the output of this pattern.
2082 * Return value: A new stmt that will be used to replace the shift/rotate
2086 vect_recog_vector_vector_shift_pattern (vec
<gimple
> *stmts
,
2087 tree
*type_in
, tree
*type_out
)
2089 gimple last_stmt
= stmts
->pop ();
2090 tree oprnd0
, oprnd1
, lhs
, var
;
2091 gimple pattern_stmt
, def_stmt
;
2092 enum tree_code rhs_code
;
2093 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2094 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2095 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2096 enum vect_def_type dt
;
2099 if (!is_gimple_assign (last_stmt
))
2102 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2114 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2117 lhs
= gimple_assign_lhs (last_stmt
);
2118 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2119 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2120 if (TREE_CODE (oprnd0
) != SSA_NAME
2121 || TREE_CODE (oprnd1
) != SSA_NAME
2122 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
2123 || TYPE_PRECISION (TREE_TYPE (oprnd1
))
2124 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1
)))
2125 || TYPE_PRECISION (TREE_TYPE (lhs
))
2126 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2129 if (!vect_is_simple_use (oprnd1
, last_stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
2133 if (dt
!= vect_internal_def
)
2136 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (oprnd0
));
2137 *type_out
= *type_in
;
2138 if (*type_in
== NULL_TREE
)
2142 if (gimple_assign_cast_p (def_stmt
))
2144 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
2145 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
2146 && TYPE_PRECISION (TREE_TYPE (rhs1
))
2147 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
2151 if (def
== NULL_TREE
)
2153 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2154 def_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, def
, oprnd1
,
2156 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2159 /* Pattern detected. */
2160 if (dump_enabled_p ())
2161 dump_printf_loc (MSG_NOTE
, vect_location
,
2162 "vect_recog_vector_vector_shift_pattern: detected:\n");
2164 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2165 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
2166 pattern_stmt
= gimple_build_assign_with_ops (rhs_code
, var
, oprnd0
, def
);
2168 if (dump_enabled_p ())
2169 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
, 0);
2171 stmts
->safe_push (last_stmt
);
2172 return pattern_stmt
;
2175 /* Detect a signed division by a constant that wouldn't be
2176 otherwise vectorized:
2182 where type 'type' is an integral type and N is a constant.
2184 Similarly handle modulo by a constant:
2190 * STMTS: Contains a stmt from which the pattern search begins,
2191 i.e. the division stmt. S1 is replaced by if N is a power
2192 of two constant and type is signed:
2193 S3 y_t = b_t < 0 ? N - 1 : 0;
2195 S1' a_t = x_t >> log2 (N);
2197 S4 is replaced if N is a power of two constant and
2198 type is signed by (where *_T temporaries have unsigned type):
2199 S9 y_T = b_t < 0 ? -1U : 0U;
2200 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2201 S7 z_t = (type) z_T;
2203 S5 x_t = w_t & (N - 1);
2204 S4' a_t = x_t - z_t;
2208 * TYPE_IN: The type of the input arguments to the pattern.
2210 * TYPE_OUT: The type of the output of this pattern.
2212 * Return value: A new stmt that will be used to replace the division
2213 S1 or modulo S4 stmt. */
2216 vect_recog_divmod_pattern (vec
<gimple
> *stmts
,
2217 tree
*type_in
, tree
*type_out
)
2219 gimple last_stmt
= stmts
->pop ();
2220 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
2221 gimple pattern_stmt
, def_stmt
;
2222 enum tree_code rhs_code
;
2223 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2224 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2225 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2228 int dummy_int
, prec
;
2229 stmt_vec_info def_stmt_vinfo
;
2231 if (!is_gimple_assign (last_stmt
))
2234 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2237 case TRUNC_DIV_EXPR
:
2238 case TRUNC_MOD_EXPR
:
2244 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
2247 oprnd0
= gimple_assign_rhs1 (last_stmt
);
2248 oprnd1
= gimple_assign_rhs2 (last_stmt
);
2249 itype
= TREE_TYPE (oprnd0
);
2250 if (TREE_CODE (oprnd0
) != SSA_NAME
2251 || TREE_CODE (oprnd1
) != INTEGER_CST
2252 || TREE_CODE (itype
) != INTEGER_TYPE
2253 || TYPE_PRECISION (itype
) != GET_MODE_PRECISION (TYPE_MODE (itype
)))
2256 vectype
= get_vectype_for_scalar_type (itype
);
2257 if (vectype
== NULL_TREE
)
2260 /* If the target can handle vectorized division or modulo natively,
2261 don't attempt to optimize this. */
2262 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
2263 if (optab
!= unknown_optab
)
2265 machine_mode vec_mode
= TYPE_MODE (vectype
);
2266 int icode
= (int) optab_handler (optab
, vec_mode
);
2267 if (icode
!= CODE_FOR_nothing
)
2271 prec
= TYPE_PRECISION (itype
);
2272 if (integer_pow2p (oprnd1
))
2274 if (TYPE_UNSIGNED (itype
) || tree_int_cst_sgn (oprnd1
) != 1)
2277 /* Pattern detected. */
2278 if (dump_enabled_p ())
2279 dump_printf_loc (MSG_NOTE
, vect_location
,
2280 "vect_recog_divmod_pattern: detected:\n");
2282 cond
= build2 (LT_EXPR
, boolean_type_node
, oprnd0
,
2283 build_int_cst (itype
, 0));
2284 if (rhs_code
== TRUNC_DIV_EXPR
)
2286 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
2289 = gimple_build_assign_with_ops (COND_EXPR
, var
, cond
,
2290 fold_build2 (MINUS_EXPR
, itype
,
2292 build_int_cst (itype
,
2294 build_int_cst (itype
, 0));
2295 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2296 var
= vect_recog_temp_ssa_var (itype
, NULL
);
2298 = gimple_build_assign_with_ops (PLUS_EXPR
, var
, oprnd0
,
2299 gimple_assign_lhs (def_stmt
));
2300 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2302 shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
2304 = gimple_build_assign_with_ops (RSHIFT_EXPR
,
2305 vect_recog_temp_ssa_var (itype
,
2312 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2313 if (compare_tree_int (oprnd1
, 2) == 0)
2315 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2317 = gimple_build_assign_with_ops (COND_EXPR
, signmask
, cond
,
2318 build_int_cst (itype
, 1),
2319 build_int_cst (itype
, 0));
2320 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2325 = build_nonstandard_integer_type (prec
, 1);
2326 tree vecutype
= get_vectype_for_scalar_type (utype
);
2328 = build_int_cst (utype
, GET_MODE_BITSIZE (TYPE_MODE (itype
))
2329 - tree_log2 (oprnd1
));
2330 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
2333 = gimple_build_assign_with_ops (COND_EXPR
, var
, cond
,
2334 build_int_cst (utype
, -1),
2335 build_int_cst (utype
, 0));
2337 = new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2338 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2339 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2340 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2341 var
= vect_recog_temp_ssa_var (utype
, NULL
);
2343 = gimple_build_assign_with_ops (RSHIFT_EXPR
, var
,
2344 gimple_assign_lhs (def_stmt
),
2347 = new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2348 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
2349 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
2350 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2351 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
2353 = gimple_build_assign_with_ops (NOP_EXPR
, signmask
, var
,
2355 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2358 = gimple_build_assign_with_ops (PLUS_EXPR
,
2359 vect_recog_temp_ssa_var (itype
,
2362 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2364 = gimple_build_assign_with_ops (BIT_AND_EXPR
,
2365 vect_recog_temp_ssa_var (itype
,
2367 gimple_assign_lhs (def_stmt
),
2368 fold_build2 (MINUS_EXPR
, itype
,
2370 build_int_cst (itype
,
2372 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2375 = gimple_build_assign_with_ops (MINUS_EXPR
,
2376 vect_recog_temp_ssa_var (itype
,
2378 gimple_assign_lhs (def_stmt
),
2382 if (dump_enabled_p ())
2383 dump_gimple_stmt_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, pattern_stmt
,
2386 stmts
->safe_push (last_stmt
);
2389 *type_out
= vectype
;
2390 return pattern_stmt
;
2393 if (prec
> HOST_BITS_PER_WIDE_INT
2394 || integer_zerop (oprnd1
))
2397 if (!can_mult_highpart_p (TYPE_MODE (vectype
), TYPE_UNSIGNED (itype
)))
2400 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
2402 if (TYPE_UNSIGNED (itype
))
2404 unsigned HOST_WIDE_INT mh
, ml
;
2405 int pre_shift
, post_shift
;
2406 unsigned HOST_WIDE_INT d
= (TREE_INT_CST_LOW (oprnd1
)
2407 & GET_MODE_MASK (TYPE_MODE (itype
)));
2408 tree t1
, t2
, t3
, t4
;
2410 if (d
>= ((unsigned HOST_WIDE_INT
) 1 << (prec
- 1)))
2411 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2414 /* Find a suitable multiplier and right shift count
2415 instead of multiplying with D. */
2416 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
2418 /* If the suggested multiplier is more than SIZE bits, we can do better
2419 for even divisors, using an initial right shift. */
2420 if (mh
!= 0 && (d
& 1) == 0)
2422 pre_shift
= floor_log2 (d
& -d
);
2423 mh
= choose_multiplier (d
>> pre_shift
, prec
, prec
- pre_shift
,
2424 &ml
, &post_shift
, &dummy_int
);
2432 if (post_shift
- 1 >= prec
)
2435 /* t1 = oprnd0 h* ml;
2439 q = t4 >> (post_shift - 1); */
2440 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2442 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR
, t1
, oprnd0
,
2443 build_int_cst (itype
, ml
));
2444 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2446 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2448 = gimple_build_assign_with_ops (MINUS_EXPR
, t2
, oprnd0
, t1
);
2449 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2451 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2453 = gimple_build_assign_with_ops (RSHIFT_EXPR
, t3
, t2
,
2455 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2457 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2459 = gimple_build_assign_with_ops (PLUS_EXPR
, t4
, t1
, t3
);
2461 if (post_shift
!= 1)
2463 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2465 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2467 = gimple_build_assign_with_ops (RSHIFT_EXPR
, q
, t4
,
2468 build_int_cst (itype
,
2475 pattern_stmt
= def_stmt
;
2480 if (pre_shift
>= prec
|| post_shift
>= prec
)
2483 /* t1 = oprnd0 >> pre_shift;
2485 q = t2 >> post_shift; */
2488 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2490 = gimple_build_assign_with_ops (RSHIFT_EXPR
, t1
, oprnd0
,
2491 build_int_cst (NULL
,
2493 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2498 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2500 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR
, t2
, t1
,
2501 build_int_cst (itype
, ml
));
2505 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2507 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2509 = gimple_build_assign_with_ops (RSHIFT_EXPR
, q
, t2
,
2510 build_int_cst (itype
,
2516 pattern_stmt
= def_stmt
;
2521 unsigned HOST_WIDE_INT ml
;
2523 HOST_WIDE_INT d
= TREE_INT_CST_LOW (oprnd1
);
2524 unsigned HOST_WIDE_INT abs_d
;
2526 tree t1
, t2
, t3
, t4
;
2528 /* Give up for -1. */
2532 /* Since d might be INT_MIN, we have to cast to
2533 unsigned HOST_WIDE_INT before negating to avoid
2534 undefined signed overflow. */
2536 ? (unsigned HOST_WIDE_INT
) d
2537 : - (unsigned HOST_WIDE_INT
) d
);
2539 /* n rem d = n rem -d */
2540 if (rhs_code
== TRUNC_MOD_EXPR
&& d
< 0)
2543 oprnd1
= build_int_cst (itype
, abs_d
);
2545 else if (HOST_BITS_PER_WIDE_INT
>= prec
2546 && abs_d
== (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2547 /* This case is not handled correctly below. */
2550 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
, &post_shift
, &dummy_int
);
2551 if (ml
>= (unsigned HOST_WIDE_INT
) 1 << (prec
- 1))
2554 ml
|= (~(unsigned HOST_WIDE_INT
) 0) << (prec
- 1);
2556 if (post_shift
>= prec
)
2559 /* t1 = oprnd0 h* ml; */
2560 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2562 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR
, t1
, oprnd0
,
2563 build_int_cst (itype
, ml
));
2567 /* t2 = t1 + oprnd0; */
2568 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2569 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
2571 = gimple_build_assign_with_ops (PLUS_EXPR
, t2
, t1
, oprnd0
);
2578 /* t3 = t2 >> post_shift; */
2579 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2580 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
2582 = gimple_build_assign_with_ops (RSHIFT_EXPR
, t3
, t2
,
2583 build_int_cst (itype
, post_shift
));
2588 wide_int oprnd0_min
, oprnd0_max
;
2590 if (get_range_info (oprnd0
, &oprnd0_min
, &oprnd0_max
) == VR_RANGE
)
2592 if (!wi::neg_p (oprnd0_min
, TYPE_SIGN (itype
)))
2594 else if (wi::neg_p (oprnd0_max
, TYPE_SIGN (itype
)))
2598 if (msb
== 0 && d
>= 0)
2602 pattern_stmt
= def_stmt
;
2606 /* t4 = oprnd0 >> (prec - 1);
2607 or if we know from VRP that oprnd0 >= 0
2609 or if we know from VRP that oprnd0 < 0
2611 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2612 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
2615 = gimple_build_assign_with_ops (INTEGER_CST
,
2616 t4
, build_int_cst (itype
, msb
),
2620 = gimple_build_assign_with_ops (RSHIFT_EXPR
, t4
, oprnd0
,
2621 build_int_cst (itype
, prec
- 1));
2622 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2624 /* q = t3 - t4; or q = t4 - t3; */
2625 q
= vect_recog_temp_ssa_var (itype
, NULL
);
2627 = gimple_build_assign_with_ops (MINUS_EXPR
, q
, d
< 0 ? t4
: t3
,
2632 if (rhs_code
== TRUNC_MOD_EXPR
)
2636 /* We divided. Now finish by:
2639 append_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2641 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
2643 = gimple_build_assign_with_ops (MULT_EXPR
, t1
, q
, oprnd1
);
2644 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
2646 r
= vect_recog_temp_ssa_var (itype
, NULL
);
2648 = gimple_build_assign_with_ops (MINUS_EXPR
, r
, oprnd0
, t1
);
2651 /* Pattern detected. */
2652 if (dump_enabled_p ())
2654 dump_printf_loc (MSG_NOTE
, vect_location
,
2655 "vect_recog_divmod_pattern: detected: ");
2656 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
2657 dump_printf (MSG_NOTE
, "\n");
2660 stmts
->safe_push (last_stmt
);
2663 *type_out
= vectype
;
2664 return pattern_stmt
;
2667 /* Function vect_recog_mixed_size_cond_pattern
2669 Try to find the following pattern:
2674 S1 a_T = x_t CMP y_t ? b_T : c_T;
2676 where type 'TYPE' is an integral type which has different size
2677 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2678 than 'type', the constants need to fit into an integer type
2679 with the same width as 'type') or results of conversion from 'type'.
2683 * LAST_STMT: A stmt from which the pattern search begins.
2687 * TYPE_IN: The type of the input arguments to the pattern.
2689 * TYPE_OUT: The type of the output of this pattern.
2691 * Return value: A new stmt that will be used to replace the pattern.
2692 Additionally a def_stmt is added.
2694 a_it = x_t CMP y_t ? b_it : c_it;
2695 a_T = (TYPE) a_it; */
2698 vect_recog_mixed_size_cond_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
2701 gimple last_stmt
= (*stmts
)[0];
2702 tree cond_expr
, then_clause
, else_clause
;
2703 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
), def_stmt_info
;
2704 tree type
, vectype
, comp_vectype
, itype
= NULL_TREE
, vecitype
;
2705 machine_mode cmpmode
;
2706 gimple pattern_stmt
, def_stmt
;
2707 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2708 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
2709 tree orig_type0
= NULL_TREE
, orig_type1
= NULL_TREE
;
2710 gimple def_stmt0
= NULL
, def_stmt1
= NULL
;
2712 tree comp_scalar_type
;
2714 if (!is_gimple_assign (last_stmt
)
2715 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
2716 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
2719 cond_expr
= gimple_assign_rhs1 (last_stmt
);
2720 then_clause
= gimple_assign_rhs2 (last_stmt
);
2721 else_clause
= gimple_assign_rhs3 (last_stmt
);
2723 if (!COMPARISON_CLASS_P (cond_expr
))
2726 comp_scalar_type
= TREE_TYPE (TREE_OPERAND (cond_expr
, 0));
2727 comp_vectype
= get_vectype_for_scalar_type (comp_scalar_type
);
2728 if (comp_vectype
== NULL_TREE
)
2731 type
= gimple_expr_type (last_stmt
);
2732 if (types_compatible_p (type
, comp_scalar_type
)
2733 || ((TREE_CODE (then_clause
) != INTEGER_CST
2734 || TREE_CODE (else_clause
) != INTEGER_CST
)
2735 && !INTEGRAL_TYPE_P (comp_scalar_type
))
2736 || !INTEGRAL_TYPE_P (type
))
2739 if ((TREE_CODE (then_clause
) != INTEGER_CST
2740 && !type_conversion_p (then_clause
, last_stmt
, false, &orig_type0
,
2741 &def_stmt0
, &promotion
))
2742 || (TREE_CODE (else_clause
) != INTEGER_CST
2743 && !type_conversion_p (else_clause
, last_stmt
, false, &orig_type1
,
2744 &def_stmt1
, &promotion
)))
2747 if (orig_type0
&& orig_type1
2748 && !types_compatible_p (orig_type0
, orig_type1
))
2753 if (!types_compatible_p (orig_type0
, comp_scalar_type
))
2755 then_clause
= gimple_assign_rhs1 (def_stmt0
);
2761 if (!types_compatible_p (orig_type1
, comp_scalar_type
))
2763 else_clause
= gimple_assign_rhs1 (def_stmt1
);
2767 cmpmode
= GET_MODE_INNER (TYPE_MODE (comp_vectype
));
2769 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) == GET_MODE_BITSIZE (cmpmode
))
2772 vectype
= get_vectype_for_scalar_type (type
);
2773 if (vectype
== NULL_TREE
)
2776 if (expand_vec_cond_expr_p (vectype
, comp_vectype
))
2779 if (itype
== NULL_TREE
)
2780 itype
= build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode
),
2781 TYPE_UNSIGNED (type
));
2783 if (itype
== NULL_TREE
2784 || GET_MODE_BITSIZE (TYPE_MODE (itype
)) != GET_MODE_BITSIZE (cmpmode
))
2787 vecitype
= get_vectype_for_scalar_type (itype
);
2788 if (vecitype
== NULL_TREE
)
2791 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
))
2794 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) > GET_MODE_BITSIZE (cmpmode
))
2796 if ((TREE_CODE (then_clause
) == INTEGER_CST
2797 && !int_fits_type_p (then_clause
, itype
))
2798 || (TREE_CODE (else_clause
) == INTEGER_CST
2799 && !int_fits_type_p (else_clause
, itype
)))
2804 = gimple_build_assign_with_ops (COND_EXPR
,
2805 vect_recog_temp_ssa_var (itype
, NULL
),
2806 unshare_expr (cond_expr
),
2807 fold_convert (itype
, then_clause
),
2808 fold_convert (itype
, else_clause
));
2810 = gimple_build_assign_with_ops (NOP_EXPR
,
2811 vect_recog_temp_ssa_var (type
, NULL
),
2812 gimple_assign_lhs (def_stmt
), NULL_TREE
);
2814 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
2815 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
, bb_vinfo
);
2816 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
2817 STMT_VINFO_VECTYPE (def_stmt_info
) = vecitype
;
2818 *type_in
= vecitype
;
2819 *type_out
= vectype
;
2821 if (dump_enabled_p ())
2822 dump_printf_loc (MSG_NOTE
, vect_location
,
2823 "vect_recog_mixed_size_cond_pattern: detected:\n");
2825 return pattern_stmt
;
2829 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2830 true if bool VAR can be optimized that way. */
2833 check_bool_pattern (tree var
, loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
2836 enum vect_def_type dt
;
2838 enum tree_code rhs_code
;
2840 if (!vect_is_simple_use (var
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
, &def
,
2844 if (dt
!= vect_internal_def
)
2847 if (!is_gimple_assign (def_stmt
))
2850 if (!has_single_use (def
))
2853 rhs1
= gimple_assign_rhs1 (def_stmt
);
2854 rhs_code
= gimple_assign_rhs_code (def_stmt
);
2858 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2861 if ((TYPE_PRECISION (TREE_TYPE (rhs1
)) != 1
2862 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
2863 && TREE_CODE (TREE_TYPE (rhs1
)) != BOOLEAN_TYPE
)
2865 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2868 return check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
);
2873 if (!check_bool_pattern (rhs1
, loop_vinfo
, bb_vinfo
))
2875 return check_bool_pattern (gimple_assign_rhs2 (def_stmt
), loop_vinfo
,
2879 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
2881 tree vecitype
, comp_vectype
;
2883 /* If the comparison can throw, then is_gimple_condexpr will be
2884 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2885 if (stmt_could_throw_p (def_stmt
))
2888 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
2889 if (comp_vectype
== NULL_TREE
)
2892 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
2894 machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
2896 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
2897 vecitype
= get_vectype_for_scalar_type (itype
);
2898 if (vecitype
== NULL_TREE
)
2902 vecitype
= comp_vectype
;
2903 return expand_vec_cond_expr_p (vecitype
, comp_vectype
);
2910 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2911 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2912 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2915 adjust_bool_pattern_cast (tree type
, tree var
)
2917 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (SSA_NAME_DEF_STMT (var
));
2918 gimple cast_stmt
, pattern_stmt
;
2920 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
));
2921 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
2922 new_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2924 = gimple_build_assign_with_ops (NOP_EXPR
,
2925 vect_recog_temp_ssa_var (type
, NULL
),
2926 gimple_assign_lhs (pattern_stmt
),
2928 STMT_VINFO_RELATED_STMT (stmt_vinfo
) = cast_stmt
;
2929 return gimple_assign_lhs (cast_stmt
);
2933 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2934 recursively. VAR is an SSA_NAME that should be transformed from bool
2935 to a wider integer type, OUT_TYPE is the desired final integer type of
2936 the whole pattern, TRUEVAL should be NULL unless optimizing
2937 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2938 in the then_clause, STMTS is where statements with added pattern stmts
2939 should be pushed to. */
2942 adjust_bool_pattern (tree var
, tree out_type
, tree trueval
,
2945 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2946 enum tree_code rhs_code
, def_rhs_code
;
2947 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
2949 gimple pattern_stmt
, def_stmt
;
2951 rhs1
= gimple_assign_rhs1 (stmt
);
2952 rhs2
= gimple_assign_rhs2 (stmt
);
2953 rhs_code
= gimple_assign_rhs_code (stmt
);
2954 loc
= gimple_location (stmt
);
2959 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2960 itype
= TREE_TYPE (irhs1
);
2962 = gimple_build_assign_with_ops (SSA_NAME
,
2963 vect_recog_temp_ssa_var (itype
, NULL
),
2968 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2969 itype
= TREE_TYPE (irhs1
);
2971 = gimple_build_assign_with_ops (BIT_XOR_EXPR
,
2972 vect_recog_temp_ssa_var (itype
, NULL
),
2973 irhs1
, build_int_cst (itype
, 1));
2977 /* Try to optimize x = y & (a < b ? 1 : 0); into
2978 x = (a < b ? y : 0);
2984 S1 a_b = x1 CMP1 y1;
2985 S2 b_b = x2 CMP2 y2;
2987 S4 d_T = (TYPE) c_b;
2989 we would normally emit:
2991 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2992 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2993 S3' c_T = a_T & b_T;
2996 but we can save one stmt by using the
2997 result of one of the COND_EXPRs in the other COND_EXPR and leave
2998 BIT_AND_EXPR stmt out:
3000 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3001 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3004 At least when VEC_COND_EXPR is implemented using masks
3005 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3006 computes the comparison masks and ands it, in one case with
3007 all ones vector, in the other case with a vector register.
3008 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3009 often more expensive. */
3010 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
3011 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3012 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3014 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3015 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3016 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3017 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
3020 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
3021 irhs2
= adjust_bool_pattern (rhs2
, out_type
, irhs1
, stmts
);
3022 tstmt
= stmts
->pop ();
3023 gcc_assert (tstmt
== def_stmt
);
3024 stmts
->quick_push (stmt
);
3025 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
3026 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
3027 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
3028 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
3032 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
3035 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
3036 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
3037 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
3039 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
3040 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
3041 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
3042 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
3045 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
3046 irhs1
= adjust_bool_pattern (rhs1
, out_type
, irhs2
, stmts
);
3047 tstmt
= stmts
->pop ();
3048 gcc_assert (tstmt
== def_stmt
);
3049 stmts
->quick_push (stmt
);
3050 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
3051 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
3052 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
3053 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
3057 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3063 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
3064 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
3066 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
3067 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
3069 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
3070 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
3071 int out_prec
= TYPE_PRECISION (out_type
);
3072 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
3073 irhs2
= adjust_bool_pattern_cast (TREE_TYPE (irhs1
), rhs2
);
3074 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
3075 irhs1
= adjust_bool_pattern_cast (TREE_TYPE (irhs2
), rhs1
);
3078 irhs1
= adjust_bool_pattern_cast (out_type
, rhs1
);
3079 irhs2
= adjust_bool_pattern_cast (out_type
, rhs2
);
3082 itype
= TREE_TYPE (irhs1
);
3084 = gimple_build_assign_with_ops (rhs_code
,
3085 vect_recog_temp_ssa_var (itype
, NULL
),
3090 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
3091 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
3092 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
))
3093 || (TYPE_PRECISION (TREE_TYPE (rhs1
))
3094 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1
)))))
3096 machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
3098 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
3101 itype
= TREE_TYPE (rhs1
);
3102 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
3103 if (trueval
== NULL_TREE
)
3104 trueval
= build_int_cst (itype
, 1);
3106 gcc_checking_assert (useless_type_conversion_p (itype
,
3107 TREE_TYPE (trueval
)));
3109 = gimple_build_assign_with_ops (COND_EXPR
,
3110 vect_recog_temp_ssa_var (itype
, NULL
),
3112 build_int_cst (itype
, 0));
3116 stmts
->safe_push (stmt
);
3117 gimple_set_location (pattern_stmt
, loc
);
3118 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
3119 return gimple_assign_lhs (pattern_stmt
);
3123 /* Function vect_recog_bool_pattern
3125 Try to find pattern like following:
3127 bool a_b, b_b, c_b, d_b, e_b;
3130 S1 a_b = x1 CMP1 y1;
3131 S2 b_b = x2 CMP2 y2;
3133 S4 d_b = x3 CMP3 y3;
3135 S6 f_T = (TYPE) e_b;
3137 where type 'TYPE' is an integral type. Or a similar pattern
3140 S6 f_Y = e_b ? r_Y : s_Y;
3142 as results from if-conversion of a complex condition.
3146 * LAST_STMT: A stmt at the end from which the pattern
3147 search begins, i.e. cast of a bool to
3152 * TYPE_IN: The type of the input arguments to the pattern.
3154 * TYPE_OUT: The type of the output of this pattern.
3156 * Return value: A new stmt that will be used to replace the pattern.
3158 Assuming size of TYPE is the same as size of all comparisons
3159 (otherwise some casts would be added where needed), the above
3160 sequence we create related pattern stmts:
3161 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3162 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3163 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3164 S5' e_T = c_T | d_T;
3167 Instead of the above S3' we could emit:
3168 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3169 S3' c_T = a_T | b_T;
3170 but the above is more efficient. */
3173 vect_recog_bool_pattern (vec
<gimple
> *stmts
, tree
*type_in
,
3176 gimple last_stmt
= stmts
->pop ();
3177 enum tree_code rhs_code
;
3178 tree var
, lhs
, rhs
, vectype
;
3179 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
3180 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
3181 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
3182 gimple pattern_stmt
;
3184 if (!is_gimple_assign (last_stmt
))
3187 var
= gimple_assign_rhs1 (last_stmt
);
3188 lhs
= gimple_assign_lhs (last_stmt
);
3190 if ((TYPE_PRECISION (TREE_TYPE (var
)) != 1
3191 || !TYPE_UNSIGNED (TREE_TYPE (var
)))
3192 && TREE_CODE (TREE_TYPE (var
)) != BOOLEAN_TYPE
)
3195 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3196 if (CONVERT_EXPR_CODE_P (rhs_code
))
3198 if (TREE_CODE (TREE_TYPE (lhs
)) != INTEGER_TYPE
3199 || TYPE_PRECISION (TREE_TYPE (lhs
)) == 1)
3201 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3202 if (vectype
== NULL_TREE
)
3205 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
3208 rhs
= adjust_bool_pattern (var
, TREE_TYPE (lhs
), NULL_TREE
, stmts
);
3209 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3210 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3212 = gimple_build_assign_with_ops (SSA_NAME
, lhs
, rhs
, NULL_TREE
);
3215 = gimple_build_assign_with_ops (NOP_EXPR
, lhs
, rhs
, NULL_TREE
);
3216 *type_out
= vectype
;
3218 stmts
->safe_push (last_stmt
);
3219 if (dump_enabled_p ())
3220 dump_printf_loc (MSG_NOTE
, vect_location
,
3221 "vect_recog_bool_pattern: detected:\n");
3223 return pattern_stmt
;
3225 else if (rhs_code
== COND_EXPR
3226 && TREE_CODE (var
) == SSA_NAME
)
3228 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
3229 if (vectype
== NULL_TREE
)
3232 /* Build a scalar type for the boolean result that when
3233 vectorized matches the vector type of the result in
3234 size and number of elements. */
3236 = wi::udiv_trunc (TYPE_SIZE (vectype
),
3237 TYPE_VECTOR_SUBPARTS (vectype
)).to_uhwi ();
3239 = build_nonstandard_integer_type (prec
,
3240 TYPE_UNSIGNED (TREE_TYPE (var
)));
3241 if (get_vectype_for_scalar_type (type
) == NULL_TREE
)
3244 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
3247 rhs
= adjust_bool_pattern (var
, type
, NULL_TREE
, stmts
);
3248 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3250 = gimple_build_assign_with_ops (COND_EXPR
, lhs
,
3251 build2 (NE_EXPR
, boolean_type_node
,
3252 rhs
, build_int_cst (type
, 0)),
3253 gimple_assign_rhs2 (last_stmt
),
3254 gimple_assign_rhs3 (last_stmt
));
3255 *type_out
= vectype
;
3257 stmts
->safe_push (last_stmt
);
3258 if (dump_enabled_p ())
3259 dump_printf_loc (MSG_NOTE
, vect_location
,
3260 "vect_recog_bool_pattern: detected:\n");
3262 return pattern_stmt
;
3264 else if (rhs_code
== SSA_NAME
3265 && STMT_VINFO_DATA_REF (stmt_vinfo
))
3267 stmt_vec_info pattern_stmt_info
;
3268 vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
3269 gcc_assert (vectype
!= NULL_TREE
);
3270 if (!VECTOR_MODE_P (TYPE_MODE (vectype
)))
3272 if (!check_bool_pattern (var
, loop_vinfo
, bb_vinfo
))
3275 rhs
= adjust_bool_pattern (var
, TREE_TYPE (vectype
), NULL_TREE
, stmts
);
3276 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
3277 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
3279 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
3281 = gimple_build_assign_with_ops (NOP_EXPR
, rhs2
, rhs
, NULL_TREE
);
3282 new_pattern_def_seq (stmt_vinfo
, cast_stmt
);
3286 = gimple_build_assign_with_ops (SSA_NAME
, lhs
, rhs
, NULL_TREE
);
3287 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
,
3289 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3290 STMT_VINFO_DATA_REF (pattern_stmt_info
)
3291 = STMT_VINFO_DATA_REF (stmt_vinfo
);
3292 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info
)
3293 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo
);
3294 STMT_VINFO_DR_INIT (pattern_stmt_info
) = STMT_VINFO_DR_INIT (stmt_vinfo
);
3295 STMT_VINFO_DR_OFFSET (pattern_stmt_info
)
3296 = STMT_VINFO_DR_OFFSET (stmt_vinfo
);
3297 STMT_VINFO_DR_STEP (pattern_stmt_info
) = STMT_VINFO_DR_STEP (stmt_vinfo
);
3298 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info
)
3299 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo
);
3300 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo
)) = pattern_stmt
;
3301 *type_out
= vectype
;
3303 stmts
->safe_push (last_stmt
);
3304 if (dump_enabled_p ())
3305 dump_printf_loc (MSG_NOTE
, vect_location
,
3306 "vect_recog_bool_pattern: detected:\n");
3307 return pattern_stmt
;
3314 /* Mark statements that are involved in a pattern. */
3317 vect_mark_pattern_stmts (gimple orig_stmt
, gimple pattern_stmt
,
3318 tree pattern_vectype
)
3320 stmt_vec_info pattern_stmt_info
, def_stmt_info
;
3321 stmt_vec_info orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
3322 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (orig_stmt_info
);
3323 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (orig_stmt_info
);
3326 pattern_stmt_info
= vinfo_for_stmt (pattern_stmt
);
3327 if (pattern_stmt_info
== NULL
)
3329 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
,
3331 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
3333 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt
));
3335 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt
;
3336 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
3337 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
3338 STMT_VINFO_VECTYPE (pattern_stmt_info
) = pattern_vectype
;
3339 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
3340 STMT_VINFO_RELATED_STMT (orig_stmt_info
) = pattern_stmt
;
3341 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
)
3342 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
3343 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
))
3345 gimple_stmt_iterator si
;
3346 for (si
= gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
));
3347 !gsi_end_p (si
); gsi_next (&si
))
3349 def_stmt
= gsi_stmt (si
);
3350 def_stmt_info
= vinfo_for_stmt (def_stmt
);
3351 if (def_stmt_info
== NULL
)
3353 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
,
3355 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
3357 gimple_set_bb (def_stmt
, gimple_bb (orig_stmt
));
3358 STMT_VINFO_RELATED_STMT (def_stmt_info
) = orig_stmt
;
3359 STMT_VINFO_DEF_TYPE (def_stmt_info
) = vect_internal_def
;
3360 if (STMT_VINFO_VECTYPE (def_stmt_info
) == NULL_TREE
)
3361 STMT_VINFO_VECTYPE (def_stmt_info
) = pattern_vectype
;
3366 /* Function vect_pattern_recog_1
3369 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3370 computation pattern.
3371 STMT: A stmt from which the pattern search should start.
3373 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3374 expression that computes the same functionality and can be used to
3375 replace the sequence of stmts that are involved in the pattern.
3378 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3379 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3380 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3381 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3382 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3383 to the available target pattern.
3385 This function also does some bookkeeping, as explained in the documentation
3386 for vect_recog_pattern. */
3389 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func
,
3390 gimple_stmt_iterator si
,
3391 vec
<gimple
> *stmts_to_replace
)
3393 gimple stmt
= gsi_stmt (si
), pattern_stmt
;
3394 stmt_vec_info stmt_info
;
3395 loop_vec_info loop_vinfo
;
3396 tree pattern_vectype
;
3397 tree type_in
, type_out
;
3398 enum tree_code code
;
3402 stmts_to_replace
->truncate (0);
3403 stmts_to_replace
->quick_push (stmt
);
3404 pattern_stmt
= (* vect_recog_func
) (stmts_to_replace
, &type_in
, &type_out
);
3408 stmt
= stmts_to_replace
->last ();
3409 stmt_info
= vinfo_for_stmt (stmt
);
3410 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3412 if (VECTOR_MODE_P (TYPE_MODE (type_in
)))
3414 /* No need to check target support (already checked by the pattern
3415 recognition function). */
3416 pattern_vectype
= type_out
? type_out
: type_in
;
3420 machine_mode vec_mode
;
3421 enum insn_code icode
;
3424 /* Check target support */
3425 type_in
= get_vectype_for_scalar_type (type_in
);
3429 type_out
= get_vectype_for_scalar_type (type_out
);
3434 pattern_vectype
= type_out
;
3436 if (is_gimple_assign (pattern_stmt
))
3437 code
= gimple_assign_rhs_code (pattern_stmt
);
3440 gcc_assert (is_gimple_call (pattern_stmt
));
3444 optab
= optab_for_tree_code (code
, type_in
, optab_default
);
3445 vec_mode
= TYPE_MODE (type_in
);
3447 || (icode
= optab_handler (optab
, vec_mode
)) == CODE_FOR_nothing
3448 || (insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (type_out
)))
3452 /* Found a vectorizable pattern. */
3453 if (dump_enabled_p ())
3455 dump_printf_loc (MSG_NOTE
, vect_location
,
3456 "pattern recognized: ");
3457 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3458 dump_printf (MSG_NOTE
, "\n");
3461 /* Mark the stmts that are involved in the pattern. */
3462 vect_mark_pattern_stmts (stmt
, pattern_stmt
, pattern_vectype
);
3464 /* Patterns cannot be vectorized using SLP, because they change the order of
3467 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
, next
)
3469 LOOP_VINFO_REDUCTIONS (loop_vinfo
).ordered_remove (i
);
3471 /* It is possible that additional pattern stmts are created and inserted in
3472 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3473 relevant statements. */
3474 for (i
= 0; stmts_to_replace
->iterate (i
, &stmt
)
3475 && (unsigned) i
< (stmts_to_replace
->length () - 1);
3478 stmt_info
= vinfo_for_stmt (stmt
);
3479 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
3480 if (dump_enabled_p ())
3482 dump_printf_loc (MSG_NOTE
, vect_location
,
3483 "additional pattern stmt: ");
3484 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_stmt
, 0);
3485 dump_printf (MSG_NOTE
, "\n");
3488 vect_mark_pattern_stmts (stmt
, pattern_stmt
, NULL_TREE
);
3493 /* Function vect_pattern_recog
3496 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3499 Output - for each computation idiom that is detected we create a new stmt
3500 that provides the same functionality and that can be vectorized. We
3501 also record some information in the struct_stmt_info of the relevant
3502 stmts, as explained below:
3504 At the entry to this function we have the following stmts, with the
3505 following initial value in the STMT_VINFO fields:
3507 stmt in_pattern_p related_stmt vec_stmt
3508 S1: a_i = .... - - -
3509 S2: a_2 = ..use(a_i).. - - -
3510 S3: a_1 = ..use(a_2).. - - -
3511 S4: a_0 = ..use(a_1).. - - -
3512 S5: ... = ..use(a_0).. - - -
3514 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3515 represented by a single stmt. We then:
3516 - create a new stmt S6 equivalent to the pattern (the stmt is not
3517 inserted into the code)
3518 - fill in the STMT_VINFO fields as follows:
3520 in_pattern_p related_stmt vec_stmt
3521 S1: a_i = .... - - -
3522 S2: a_2 = ..use(a_i).. - - -
3523 S3: a_1 = ..use(a_2).. - - -
3524 S4: a_0 = ..use(a_1).. true S6 -
3525 '---> S6: a_new = .... - S4 -
3526 S5: ... = ..use(a_0).. - - -
3528 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3529 to each other through the RELATED_STMT field).
3531 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3532 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3533 remain irrelevant unless used by stmts other than S4.
3535 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3536 (because they are marked as irrelevant). It will vectorize S6, and record
3537 a pointer to the new vector stmt VS6 from S6 (as usual).
3538 S4 will be skipped, and S5 will be vectorized as usual:
3540 in_pattern_p related_stmt vec_stmt
3541 S1: a_i = .... - - -
3542 S2: a_2 = ..use(a_i).. - - -
3543 S3: a_1 = ..use(a_2).. - - -
3544 > VS6: va_new = .... - - -
3545 S4: a_0 = ..use(a_1).. true S6 VS6
3546 '---> S6: a_new = .... - S4 VS6
3547 > VS5: ... = ..vuse(va_new).. - - -
3548 S5: ... = ..use(a_0).. - - -
3550 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3551 elsewhere), and we'll end up with:
3554 VS5: ... = ..vuse(va_new)..
3556 In case of more than one pattern statements, e.g., widen-mult with
3560 S2 a_T = (TYPE) a_t;
3561 '--> S3: a_it = (interm_type) a_t;
3562 S4 prod_T = a_T * CONST;
3563 '--> S5: prod_T' = a_it w* CONST;
3565 there may be other users of a_T outside the pattern. In that case S2 will
3566 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3567 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3568 be recorded in S3. */
3571 vect_pattern_recog (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
3576 gimple_stmt_iterator si
;
3578 vect_recog_func_ptr vect_recog_func
;
3579 auto_vec
<gimple
, 1> stmts_to_replace
;
3582 if (dump_enabled_p ())
3583 dump_printf_loc (MSG_NOTE
, vect_location
,
3584 "=== vect_pattern_recog ===\n");
3588 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3589 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
3590 nbbs
= loop
->num_nodes
;
3594 bbs
= &BB_VINFO_BB (bb_vinfo
);
3598 /* Scan through the loop stmts, applying the pattern recognition
3599 functions starting at each stmt visited: */
3600 for (i
= 0; i
< nbbs
; i
++)
3602 basic_block bb
= bbs
[i
];
3603 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
3605 if (bb_vinfo
&& (stmt
= gsi_stmt (si
))
3606 && vinfo_for_stmt (stmt
)
3607 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
3610 /* Scan over all generic vect_recog_xxx_pattern functions. */
3611 for (j
= 0; j
< NUM_PATTERNS
; j
++)
3613 vect_recog_func
= vect_vect_recog_func_ptrs
[j
];
3614 vect_pattern_recog_1 (vect_recog_func
, si
,